├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── increment-version.yml │ ├── integrationTests.yaml │ ├── pre-release.yaml │ ├── release.yaml │ ├── test-head.yaml │ └── tests.yaml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── ClickHouseKafkaConnectDashboard.json ├── Dockerfile ├── LICENSE ├── README.md ├── SECURITY.md ├── Support Scripts.md ├── VERSION ├── build.gradle.kts ├── config └── archive │ ├── assets │ └── logo.svg │ └── manifest.json ├── docs ├── DESIGN.md ├── architecture.png ├── cropping_batches.png ├── deduplication.png ├── full_state_machine.png ├── invalid_states.png └── start_state.png ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── jmx-export-connector.yml ├── settings.gradle.kts └── src ├── integrationTest ├── java │ └── com │ │ └── clickhouse │ │ └── kafka │ │ └── connect │ │ └── sink │ │ ├── ClickHouseCloudTest.java │ │ ├── ClickHouseSinkConnectorIntegrationTest.java │ │ ├── ExactlyOnceTest.java │ │ └── helper │ │ ├── ClickHouseAPI.java │ │ ├── ClickHouseTestHelpers.java │ │ ├── ConfluentPlatform.java │ │ ├── SchemaTestData.java │ │ └── SchemalessTestData.java └── resources │ ├── clickhouse_sink.json │ ├── clickhouse_sink_no_proxy.json │ ├── clickhouse_sink_no_proxy_schemaless.json │ ├── clickhouse_sink_schemaless.json │ ├── clickhouse_sink_with_jdbc_prop.json │ ├── log4j.properties │ ├── stock_gen.json │ └── stock_gen_json.json ├── main ├── java │ └── com │ │ └── clickhouse │ │ └── kafka │ │ └── connect │ │ ├── ClickHouseSinkConnector.java │ │ ├── sink │ │ ├── ClickHouseSinkConfig.java │ │ ├── ClickHouseSinkTask.java │ │ ├── ProxySinkTask.java │ │ ├── data │ │ │ ├── Data.java │ │ │ ├── Record.java │ │ │ ├── SchemaType.java │ │ │ ├── StructToJsonMap.java │ │ │ └── convert │ │ │ │ ├── EmptyRecordConvertor.java │ │ │ │ ├── RecordConvertor.java │ │ │ │ ├── SchemaRecordConvertor.java │ │ │ │ ├── SchemalessRecordConvertor.java │ │ │ │ └── StringRecordConvertor.java │ │ ├── db │ │ │ ├── ClickHouseWriter.java │ │ │ ├── DBWriter.java │ │ │ ├── InMemoryDBWriter.java │ │ │ ├── TableMappingRefresher.java │ │ │ ├── helper │ │ │ │ ├── ClickHouseFieldDescriptor.java │ │ │ │ └── ClickHouseHelperClient.java │ │ │ └── mapping │ │ │ │ ├── Column.java │ │ │ │ ├── Table.java │ │ │ │ └── Type.java │ │ ├── dedup │ │ │ ├── DeDup.java │ │ │ └── DeDupStrategy.java │ │ ├── dlq │ │ │ ├── DuplicateException.java │ │ │ └── ErrorReporter.java │ │ ├── kafka │ │ │ ├── OffsetContainer.java │ │ │ ├── RangeContainer.java │ │ │ ├── RangeState.java │ │ │ └── TopicPartitionContainer.java │ │ ├── processing │ │ │ └── Processing.java │ │ └── state │ │ │ ├── State.java │ │ │ ├── StateProvider.java │ │ │ ├── StateRecord.java │ │ │ └── provider │ │ │ ├── InMemoryState.java │ │ │ └── KeeperStateProvider.java │ │ ├── transforms │ │ ├── ExtractTopic.java │ │ ├── ExtractTopicConfig.java │ │ └── KeyToValue.java │ │ └── util │ │ ├── Mask.java │ │ ├── QueryIdentifier.java │ │ ├── Utils.java │ │ ├── jmx │ │ ├── ExecutionTimer.java │ │ ├── MBeanServerUtils.java │ │ ├── SinkTaskStatistics.java │ │ └── SinkTaskStatisticsMBean.java │ │ └── reactor │ │ └── function │ │ ├── Tuple2.java │ │ ├── Tuple3.java │ │ └── Tuples.java └── resources │ └── META-INF │ └── services │ ├── org.apache.kafka.connect.sink.SinkConnector │ └── org.apache.kafka.connect.transforms.Transformation └── test ├── java ├── com │ └── clickhouse │ │ └── kafka │ │ └── connect │ │ └── sink │ │ ├── ClickHouseBase.java │ │ ├── ClickHouseSinkJdbcPropertiesTest.java │ │ ├── ClickHouseSinkTaskMappingTest.java │ │ ├── ClickHouseSinkTaskSchemalessProxyTest.java │ │ ├── ClickHouseSinkTaskSchemalessTest.java │ │ ├── ClickHouseSinkTaskStringTest.java │ │ ├── ClickHouseSinkTaskTest.java │ │ ├── ClickHouseSinkTaskWithSchemaProxyTest.java │ │ ├── ClickHouseSinkTaskWithSchemaTest.java │ │ ├── db │ │ ├── ClickHouseWriterTest.java │ │ ├── helper │ │ │ └── ClickHouseHelperClientTest.java │ │ └── mapping │ │ │ ├── ColumnTest.java │ │ │ └── TableTest.java │ │ ├── dlq │ │ └── InMemoryDLQ.java │ │ ├── helper │ │ ├── ClickHouseTestHelpers.java │ │ ├── SchemaTestData.java │ │ └── SchemalessTestData.java │ │ ├── junit │ │ └── extension │ │ │ ├── FromVersionConditionExtension.java │ │ │ └── SinceClickHouseVersion.java │ │ ├── kafa │ │ └── RangeContainerTest.java │ │ ├── processing │ │ └── ProcessingTest.java │ │ ├── provider │ │ └── LocalProviderTest.java │ │ └── util │ │ ├── MaskTest.java │ │ └── UtilsTest.java └── transforms │ └── KeyToValueTest.java └── resources └── log4j.properties /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # These are explicitly windows files and should use crlf 5 | *.bat text eol=crlf 6 | 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ### Describe the bug 12 | 13 | ### Steps to reproduce 14 | 1. 15 | 2. 16 | 3. 17 | 18 | ### Expected behaviour 19 | 20 | ### Error log 21 | 22 | ### Configuration 23 | #### Environment 24 | * Kafka-Connect version: 25 | * Kafka Connect configuration: 26 | * Kafka version: 27 | * Kafka environment: 28 | * OS: 29 | 30 | #### ClickHouse server 31 | * ClickHouse Server version: 32 | * ClickHouse Server non-default settings, if any: 33 | * `CREATE TABLE` statements for tables involved: 34 | * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Question 4 | url: https://github.com/ClickHouse/clickhouse-kafka-connect/discussions 5 | about: Please ask and answer questions here. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: What would you like to add to the project? 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gradle" 9 | directory: "/" 10 | schedule: 11 | interval: "monthly" 12 | labels: 13 | - "dependencies" 14 | open-pull-requests-limit: 5 15 | 16 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | 4 | ## Checklist 5 | Delete items not relevant to your PR: 6 | - [ ] Unit and integration tests covering the common scenarios were added 7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG 8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials 9 | -------------------------------------------------------------------------------- /.github/workflows/increment-version.yml: -------------------------------------------------------------------------------- 1 | name: Increment Version 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | version: 7 | type: choice 8 | description: 'Version - patch, minor, or major' 9 | required: true 10 | options: 11 | - patch 12 | - minor 13 | - major 14 | 15 | permissions: 16 | contents: write 17 | pull-requests: write 18 | 19 | env: 20 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Check out code 27 | uses: actions/checkout@v3 28 | with: 29 | fetch-depth: 0 # this is required to see all branches 30 | - name: Configure Git 31 | run: | 32 | git config --global user.email "actions@github.com" 33 | git config --global user.name "GitHub Action" 34 | git config --global --add --bool push.autoSetupRemote true 35 | - name: Setup Node.js environment 36 | uses: actions/setup-node@v3 37 | with: 38 | node-version: '16' 39 | - name: Install dependencies 40 | run: | 41 | npm install semver 42 | npm install fs 43 | - name: Increment version 44 | run: | 45 | echo "Incrementing version..." 46 | VERSION=$(cat VERSION) 47 | NEW_VERSION=$(node -p "require('semver').inc('$VERSION', '${{ github.event.inputs.version }}')") 48 | echo "v$NEW_VERSION" > VERSION 49 | echo "NEW_VERSION=$NEW_VERSION" >> "$GITHUB_ENV" 50 | echo "New version is $NEW_VERSION" 51 | - name: Commit and push 52 | run: | 53 | git checkout -b update-version-${{ env.NEW_VERSION }} 54 | git add VERSION 55 | git commit -m "Increment version to ${{ env.NEW_VERSION }}" 56 | git push 57 | gh pr create --title "Automatic version increment ${{ env.NEW_VERSION }}" --body "This is an automated PR to increment the version." 58 | -------------------------------------------------------------------------------- /.github/workflows/integrationTests.yaml: -------------------------------------------------------------------------------- 1 | name: Integration Tests 2 | 3 | on: [workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Mask secure properties 10 | run: | 11 | echo "::add-mask::${{ secrets.CLICKHOUSE_PASSWORD }}" 12 | echo "::add-mask::${{ secrets.CLICKHOUSE_CLOUD_SECRET }}" 13 | - uses: actions/checkout@v3 14 | - name: Set up JDK 17 15 | uses: actions/setup-java@v3 16 | with: 17 | java-version: '17' 18 | distribution: 'adopt' 19 | architecture: x64 20 | - name: Setup and execute Gradle 'integrationTest' task 21 | uses: gradle/gradle-build-action@v2 22 | with: 23 | arguments: integrationTest --info -D clickhouse.host=${{vars.CLICKHOUSE_HOST}} -D clickhouse.port=${{vars.CLICKHOUSE_PORT}} -D clickhouse.password=${{secrets.CLICKHOUSE_PASSWORD}} -D clickhouse.cloud.organization=${{vars.CLICKHOUSE_CLOUD_ORGANIZATION}} -D clickhouse.cloud.id=${{vars.CLICKHOUSE_CLOUD_ID}} -D clickhouse.cloud.secret=${{secrets.CLICKHOUSE_CLOUD_SECRET}} -D clickhouse.cloud.serviceId=${{vars.CLICKHOUSE_CLOUD_SERVICE_ID}} -D clickhouse.cloud.host=${{vars.CLICKHOUSE_CLOUD_API_HOST}} 24 | -------------------------------------------------------------------------------- /.github/workflows/pre-release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | permissions: 7 | contents: write 8 | 9 | jobs: 10 | build_release: 11 | name: build_release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v3 16 | - name: Set env 17 | run: echo "RELEASE_VERSION=$(cat VERSION)" >> $GITHUB_ENV 18 | - name: Test 19 | run: | 20 | echo $RELEASE_VERSION 21 | echo ${{ env.RELEASE_VERSION }} 22 | - name: Set up JDK 17 23 | uses: actions/setup-java@v3 24 | with: 25 | java-version: '17' 26 | distribution: 'adopt' 27 | architecture: x64 28 | - name: Setup and execute Gradle 'createConfluentArchive' task 29 | uses: gradle/gradle-build-action@v2 30 | with: 31 | arguments: createConfluentArchive 32 | gradle-version: '7.4.2' 33 | - name: release 34 | uses: actions/create-release@v1 35 | id: create_release 36 | with: 37 | draft: false 38 | prerelease: true 39 | release_name: ${{ env.RELEASE_VERSION }} 40 | tag_name: ${{ env.RELEASE_VERSION }} 41 | env: 42 | GITHUB_TOKEN: ${{ github.token }} 43 | - name: upload release artifact 44 | uses: actions/upload-release-asset@v1 45 | env: 46 | GITHUB_TOKEN: ${{ github.token }} 47 | with: 48 | upload_url: ${{ steps.create_release.outputs.upload_url }} 49 | asset_path: ./build/confluent/clickhouse-kafka-connect-${{ env.RELEASE_VERSION }}.zip 50 | asset_name: clickhouse-kafka-connect-${{ env.RELEASE_VERSION }}.zip 51 | asset_content_type: application/zip -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | permissions: 7 | contents: write 8 | 9 | jobs: 10 | build_release: 11 | name: build_release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v3 16 | - name: Set env 17 | run: echo "RELEASE_VERSION=$(cat VERSION)" >> $GITHUB_ENV 18 | - name: Test 19 | run: | 20 | echo $RELEASE_VERSION 21 | echo ${{ env.RELEASE_VERSION }} 22 | - name: Set up JDK 17 23 | uses: actions/setup-java@v3 24 | with: 25 | java-version: '17' 26 | distribution: 'adopt' 27 | architecture: x64 28 | - name: Setup and execute Gradle 'createConfluentArchive' task 29 | uses: gradle/gradle-build-action@v2 30 | with: 31 | arguments: createConfluentArchive 32 | gradle-version: '7.4.2' 33 | - name: release 34 | uses: actions/create-release@v1 35 | id: create_release 36 | with: 37 | draft: false 38 | prerelease: false 39 | release_name: ${{ env.RELEASE_VERSION }} 40 | tag_name: ${{ env.RELEASE_VERSION }} 41 | env: 42 | GITHUB_TOKEN: ${{ github.token }} 43 | - name: upload release artifact 44 | uses: actions/upload-release-asset@v1 45 | env: 46 | GITHUB_TOKEN: ${{ github.token }} 47 | with: 48 | upload_url: ${{ steps.create_release.outputs.upload_url }} 49 | asset_path: ./build/confluent/clickhouse-kafka-connect-${{ env.RELEASE_VERSION }}.zip 50 | asset_name: clickhouse-kafka-connect-${{ env.RELEASE_VERSION }}.zip 51 | asset_content_type: application/zip 52 | - name: Tag latest commit as release 53 | run: | 54 | git config --local user.email "action@github.com" 55 | git config --local user.name "GitHub Action" 56 | git tag ${{ env.RELEASE_VERSION }} 57 | git push origin ${{ env.RELEASE_VERSION }} 58 | -------------------------------------------------------------------------------- /.github/workflows/test-head.yaml: -------------------------------------------------------------------------------- 1 | name: Build Against ClickHouse Head 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | workflow_dispatch: 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.number || github.sha }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | clickhouse: [ "head" ] 18 | name: ClickHouse ${{ matrix.clickhouse }} tests 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Set up JDK 17 22 | uses: actions/setup-java@v3 23 | with: 24 | java-version: '17' 25 | distribution: 'adopt' 26 | architecture: x64 27 | - name: Setup and execute Gradle 'test' task 28 | uses: gradle/gradle-build-action@v2 29 | env: 30 | CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} 31 | with: 32 | arguments: test 33 | 34 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: ClickHouse Kafka Connect Tests CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | client: ["V1", "V2"] 12 | clickhouse: ["23.7", "24.3", "latest", "cloud"] 13 | name: ClickHouse ${{ matrix.clickhouse }} Client version ${{ matrix.client }} tests 14 | steps: 15 | - name: Check for Cloud Credentials 16 | id: check-cloud-credentials 17 | run: | 18 | if [[ "${{ matrix.clickhouse }}" == "cloud" && (-z "${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }}" || -z "${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }}") ]]; then 19 | echo "SKIP_STEP=true" >> $GITHUB_ENV 20 | else 21 | echo "SKIP_STEP=false" >> $GITHUB_ENV 22 | fi 23 | shell: bash 24 | 25 | - uses: actions/checkout@v3 26 | if: env.SKIP_STEP != 'true' 27 | - name: Set up JDK 17 28 | if: env.SKIP_STEP != 'true' 29 | uses: actions/setup-java@v3 30 | with: 31 | java-version: '17' 32 | distribution: 'adopt' 33 | architecture: x64 34 | - name: Setup and execute Gradle 'test' task 35 | if: env.SKIP_STEP != 'true' 36 | uses: gradle/gradle-build-action@v2 37 | env: 38 | CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} 39 | CLICKHOUSE_CLOUD_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }} 40 | CLICKHOUSE_CLOUD_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }} 41 | CLIENT_VERSION: ${{ matrix.client }} 42 | with: 43 | arguments: test 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | 25 | # Ignore Gradle project-specific cache directory 26 | .gradle 27 | 28 | # Ignore Gradle build output directory 29 | build 30 | 31 | # Ignore IntelliJ 32 | .idea 33 | 34 | # Mac Folder Details 35 | .DS_Store -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | So you want to get started developing with our Kafka Connect Sink Connector, eh? Well welcome aboard! 3 | 4 | ## Pre-Requisites 5 | * [OpenJDK 17](https://aws.amazon.com/corretto/) or some other equivalent for compiling the connector code 6 | * [Docker Desktop](https://docs.docker.com/engine/install/) for running tests 7 | * [Github Desktop](https://desktop.github.com/) or some other equivalent git to download the repo 8 | * A local copy of the repo, pulled from git 9 | 10 | ## Building and Running Unit Tests 11 | You should be able to compile + run the unit tests locally by going to the root project folder and running `./gradlew clean test`. Note this doesn't produce a release artifact, you'll have to execute a later step for that. 12 | 13 | ## Generating the Build Artifact 14 | To create the actual jar we need, run `./gradlew createConfluentArchive` from the project root. That should output a zip file into `/build/confluent/` that you can use to run the connector locally (or upload to a cloud service). 15 | 16 | ## Using Docker for Local Development 17 | Docker is a great tool for local development! To make things easier, this is a docker compose file (named docker-compose.yml in later sections) that can be tweaked as needed. We've split off the environment variables into a separate .env file, and we supply the connector details making a simple REST call. 18 | 19 | 20 | ```yaml 21 | --- 22 | version: '2' 23 | name: 'confluent-connect' 24 | services: 25 | connect: 26 | image: confluentinc/cp-kafka-connect:latest 27 | hostname: connect 28 | container_name: kafka-connect 29 | volumes: 30 | - "~/DockerShare:/usr/share/dockershare/:ro" 31 | ports: 32 | - "8083:8083" 33 | - "7778:7778" 34 | - "8020:8020" 35 | environment: 36 | CONNECT_BOOTSTRAP_SERVERS: ${BOOTSTRAP_SERVERS} 37 | CONNECT_SECURITY_PROTOCOL: ${SECURITY_PROTOCOL} 38 | CONNECT_CONSUMER_SECURITY_PROTOCOL: ${SECURITY_PROTOCOL} 39 | CONNECT_PRODUCER_SECURITY_PROTOCOL: ${SECURITY_PROTOCOL} 40 | CONNECT_SASL_MECHANISM: ${SASL_MECHANISM} 41 | CONNECT_CONSUMER_SASL_MECHANISM: ${SASL_MECHANISM} 42 | CONNECT_PRODUCER_SASL_MECHANISM: ${SASL_MECHANISM} 43 | CONNECT_SASL_JAAS_CONFIG: ${SASL_JAAS_CONFIG} 44 | CONNECT_CONSUMER_SASL_JAAS_CONFIG: ${SASL_JAAS_CONFIG} 45 | CONNECT_PRODUCER_SASL_JAAS_CONFIG: ${SASL_JAAS_CONFIG} 46 | CONNECT_SCHEMA_REGISTRY_URL: ${SCHEMA_REGISTRY_URL} 47 | CONNECT_CONSUMER_SCHEMA_REGISTRY_URL: ${SCHEMA_REGISTRY_URL} 48 | CONNECT_PRODUCER_SCHEMA_REGISTRY_URL: ${SCHEMA_REGISTRY_URL} 49 | CONNECT_BASIC_AUTH_CREDENTIALS_SOURCE: ${SCHEMA_BASIC_AUTH_CREDENTIALS_SOURCE} 50 | CONNECT_CONSUMER_BASIC_AUTH_CREDENTIALS_SOURCE: ${SCHEMA_BASIC_AUTH_CREDENTIALS_SOURCE} 51 | CONNECT_PRODUCER_BASIC_AUTH_CREDENTIALS_SOURCE: ${SCHEMA_BASIC_AUTH_CREDENTIALS_SOURCE} 52 | CONNECT_BASIC_AUTH_USER_INFO: ${SCHEMA_BASIC_AUTH_USER_INFO} 53 | CONNECT_CONSUMER_BASIC_AUTH_USER_INFO: ${SCHEMA_BASIC_AUTH_USER_INFO} 54 | CONNECT_PRODUCER_BASIC_AUTH_USER_INFO: ${SCHEMA_BASIC_AUTH_USER_INFO} 55 | CONNECT_CONSUMER_MAX_POLL_RECORDS: 1 56 | CONNECT_REST_ADVERTISED_HOST_NAME: connect 57 | CONNECT_GROUP_ID: local-connect-group 58 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 59 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 60 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 61 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 62 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 63 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 64 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 65 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter 66 | CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 67 | # CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: 68 | # CLASSPATH required due to CC-2422 69 | CLASSPATH: /usr/share/java/monitoring-interceptors/monitoring-interceptors-7.3.0.jar 70 | # CONNECT_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" 71 | # CONNECT_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" 72 | CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components,/usr/share/dockershare" 73 | CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR,com.clickhouse=DEBUG 74 | # KAFKA_JMX_HOSTNAME: localhost 75 | # KAFKA_JMX_PORT: 7778 76 | # KAFKA_JMX_OPTS: -javaagent:/usr/share/java/cp-base-new/jmx_prometheus_javaagent-0.18.0.jar=8020:/usr/share/dockershare/jmx-export.yml -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false 77 | 78 | ``` 79 | 80 | A sample .env file (should be located in the same directory as the docker-compose.yml file): 81 | ``` 82 | BOOTSTRAP_SERVERS=[HOST_NAME:PORT_NUMBER] 83 | SECURITY_PROTOCOL=SASL_SSL 84 | SASL_MECHANISM=PLAIN 85 | SASL_JAAS_CONFIG="org.apache.kafka.common.security.plain.PlainLoginModule required username='[KAFKA_USERNAME]' password='[KAFKA_PASSWORD]';" 86 | SCHEMA_REGISTRY_URL=[https://HOST_NAME OR http://HOST_NAME] 87 | SCHEMA_BASIC_AUTH_CREDENTIALS_SOURCE=USER_INFO 88 | SCHEMA_BASIC_AUTH_USER_INFO="[SCHEMA_USERNAME]:[SCHEMA_PASSWORD]" 89 | ``` 90 | 91 | A sample REST call you could use to create the connector (POST to `localhost:8083/connectors`). NOTE: This includes postman environment variables, just replace any of the {{...}} variables with your values. 92 | ``` 93 | { 94 | "name": "clickhouse-connect", 95 | "config": { 96 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 97 | "tasks.max": "1", 98 | "database": "{{ClickHouse_database}}", 99 | "hostname": "{{ClickHouse_hostname}}", 100 | "password": "{{ClickHouse_password}}", 101 | "port": "{{ClickHouse_port}}", 102 | "errors.retry.timeout": "60", 103 | "exactlyOnce": "false", 104 | "ssl": "true", 105 | "topics": "sample-topic", 106 | "errors.tolerance": "none", 107 | "username": "{{ClickHouse_username}}", 108 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 109 | "value.converter.schemas.enable": "false", 110 | "errors.log.enable": "true", 111 | "clickhouseSettings": "", 112 | "topic2TableMap": "", 113 | "consumer.override.max.poll.records": "50", 114 | "transforms": "Metadata", 115 | "transforms.Metadata.type": "org.apache.kafka.connect.transforms.InsertField$Value", 116 | "transforms.Metadata.offset.field": "offset", 117 | "transforms.Metadata.partition.field": "part", 118 | "transforms.Metadata.timestamp.field": "kafkaField", 119 | "transforms.Metadata.topic.field": "topic" 120 | } 121 | } 122 | ``` 123 | 124 | 125 | ## Proposing code changes 126 | This is a relatively straightforward process: 127 | * Ensure there's unit test coverage for the changes (and that prior tests work still, of course). 128 | * Update VERSION to the next logical version number 129 | * Add changes to CHANGELOG in a human-readable way 130 | * Submit a PR 131 | 132 | ## Releasing a new version 133 | There's an action for that! -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CONFLUENT_VERSION 2 | FROM confluentinc/cp-server-connect-base:${CONFLUENT_VERSION} 3 | 4 | ARG CONNECTOR_VERSION 5 | RUN confluent-hub install --no-prompt clickhouse/clickhouse-kafka-connect:${CONNECTOR_VERSION} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ClickHouse Kafka Connect Sink 2 | 3 | ## About 4 | clickhouse-kafka-connect is the official Kafka Connect sink connector for [ClickHouse](https://clickhouse.com/). 5 | 6 | The Kafka connector delivers data from a Kafka topic to a ClickHouse table. 7 | ## Documentation 8 | 9 | See the [ClickHouse website](https://clickhouse.com/docs/en/integrations/kafka/clickhouse-kafka-connect-sink) for the full documentation entry. 10 | 11 | ## Design 12 | For a full overview of the design and how exactly-once delivery semantics are achieved, see the [design document](./docs/DESIGN.md). 13 | 14 | ## Help 15 | For additional help, please [file an issue in the repository](https://github.com/ClickHouse/clickhouse-kafka-connect/issues) or raise a question in [ClickHouse public Slack](https://clickhouse.com/slack). 16 | 17 | ## KeyToValue Transformation 18 | We've created a transformation that allows you to convert a Kafka message key into a value. 19 | This is useful when you want to store the key in a separate column in ClickHouse - by default, the column is `_key` and the type is String. 20 | 21 | ```sql 22 | CREATE TABLE your_table_name 23 | ( 24 | `your_column_name` String, 25 | ... 26 | ... 27 | ... 28 | `_key` String 29 | ) ENGINE = MergeTree() 30 | ``` 31 | 32 | Simply add the transformation to your connector configuration: 33 | 34 | ```properties 35 | transforms=keyToValue 36 | transforms.keyToValue.type=com.clickhouse.kafka.connect.transforms.KeyToValue 37 | transforms.keyToValue.field=_key 38 | ``` -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Security Announcements 4 | Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). 5 | 6 | ## Scope and Supported Versions 7 | 8 | Generally the latest release contains the most recent security updates - we increment version numbers based on all changes, including security fixes. 9 | 10 | Where applicable we might backport, but generally the latest is the most secure. 11 | 12 | ## Reporting a Vulnerability 13 | 14 | We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. 15 | 16 | To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. 17 | 18 | ### When Should I Report a Vulnerability? 19 | 20 | - You think you discovered a potential security vulnerability in ClickHouse 21 | - You are unsure how a vulnerability affects ClickHouse 22 | 23 | ### When Should I NOT Report a Vulnerability? 24 | 25 | - You need help tuning ClickHouse components for security 26 | - You need help applying security related updates 27 | - Your issue is not security related 28 | 29 | ## Security Vulnerability Response 30 | 31 | Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days. 32 | As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated. 33 | 34 | ## Public Disclosure Timing 35 | 36 | A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. 37 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | v1.3.1 2 | -------------------------------------------------------------------------------- /config/archive/assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /config/archive/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "clickhouse-kafka-connect", 3 | "version" : "${project.version}", 4 | "title" : "ClickHouse Connector for Apache Kafka", 5 | "description" : "the official Kafka Connect Sink connector for ClickHouse.", 6 | "documentation_url": "https://clickhouse.com/docs/en/integrations/kafka/clickhouse-kafka-connect-sink", 7 | 8 | "owner" : { 9 | "username" : "clickhouse", 10 | "name" : "ClickHouse Inc.", 11 | "url" : "https://clickhouse.com/", 12 | "logo" : "assets/logo.svg" 13 | }, 14 | 15 | "support" : { 16 | "logo" : "assets/logo.svg", 17 | "summary" : "Officially supported by ClickHouse Inc.", 18 | "url" : "https://github.com/ClickHouse/clickhouse-kafka-connect/issues", 19 | "provider_name" : "ClickHouse Inc." 20 | }, 21 | 22 | "tags" : [ 23 | "clickhouse", 24 | "olap", 25 | "analytics", 26 | "sql", 27 | "json" 28 | ], 29 | 30 | "features" : { 31 | "supported_encodings" : [ "any" ], 32 | "confluent_control_center_integration" : true, 33 | "delivery_guarantee": ["exactly_once"], 34 | "single_message_transforms" : true, 35 | "kafka_connect_api" : true 36 | }, 37 | 38 | "logo" : "assets/logo.svg", 39 | "source_url" : "https://github.com/ClickHouse/clickhouse-kafka-connect", 40 | "docker_image" : { }, 41 | "component_types" : ["sink"], 42 | "requirements": ["ClickHouse v22.5 or later."], 43 | "release_date": "${project.releaseDate}", 44 | "license": [{ 45 | "name": "Apache License, Version 2.0", 46 | "url": "http://www.apache.org/licenses/LICENSE-2.0" 47 | }] 48 | } 49 | -------------------------------------------------------------------------------- /docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/architecture.png -------------------------------------------------------------------------------- /docs/cropping_batches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/cropping_batches.png -------------------------------------------------------------------------------- /docs/deduplication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/deduplication.png -------------------------------------------------------------------------------- /docs/full_state_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/full_state_machine.png -------------------------------------------------------------------------------- /docs/invalid_states.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/invalid_states.png -------------------------------------------------------------------------------- /docs/start_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/docs/start_state.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/clickhouse-kafka-connect/e640775592af3a4a80f7c5fbc09520e5a0005da4/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Use "xargs" to parse quoted args. 209 | # 210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 211 | # 212 | # In Bash we could simply go: 213 | # 214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 215 | # set -- "${ARGS[@]}" "$@" 216 | # 217 | # but POSIX shell has neither arrays nor command substitution, so instead we 218 | # post-process each arg (as a line of input to sed) to backslash-escape any 219 | # character that might be a shell metacharacter, then use eval to reverse 220 | # that process (while maintaining the separation between arguments), and wrap 221 | # the whole thing up as a single "set" statement. 222 | # 223 | # This will of course break if any of these variables contains a newline or 224 | # an unmatched quote. 225 | # 226 | 227 | eval "set -- $( 228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 229 | xargs -n1 | 230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 231 | tr '\n' ' ' 232 | )" '"$@"' 233 | 234 | exec "$JAVACMD" "$@" 235 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /jmx-export-connector.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lowercaseOutputName: true 3 | lowercaseOutputLabelNames: true 4 | rules: 5 | - pattern: "com.clickhouse<>(.*): (.*)" 6 | name: clickhouse_kafka_connect 7 | labels: 8 | sinktask: "$1" 9 | version: "$2" 10 | attribute: "$3" 11 | help: "ClickHouseKafkaConnector metrics version $2" 12 | type: COUNTER 13 | - pattern: 'kafka.(.+)<>start-time-ms' 14 | name: kafka_$1_start_time_seconds 15 | labels: 16 | clientId: "$2" 17 | help: "Kafka $1 JMX metric start time seconds" 18 | type: GAUGE 19 | valueFactor: 0.001 20 | - pattern: 'kafka.(.+)<>(commit-id|version): (.+)' 21 | name: kafka_$1_$3_info 22 | value: 1 23 | labels: 24 | clientId: "$2" 25 | $3: "$4" 26 | help: "Kafka $1 JMX metric info version and commit-id" 27 | type: GAUGE 28 | 29 | #kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 30 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 31 | - pattern: kafka.(.+)<>(.+-total|compression-rate|.+-avg|.+-replica|.+-lag|.+-lead) 32 | name: kafka_$2_$6 33 | labels: 34 | clientId: "$3" 35 | topic: "$4" 36 | partition: "$5" 37 | help: "Kafka $1 JMX metric type $2" 38 | type: GAUGE 39 | 40 | #kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}" 41 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 42 | - pattern: kafka.(.+)<>(.+-total|compression-rate|.+-avg) 43 | name: kafka_$2_$5 44 | labels: 45 | clientId: "$3" 46 | topic: "$4" 47 | help: "Kafka $1 JMX metric type $2" 48 | type: GAUGE 49 | 50 | #kafka.connect:type=connect-node-metrics,client-id="{clientid}",node-id="{nodeid}" 51 | #kafka.consumer:type=consumer-node-metrics,client-id=consumer-1,node-id="{nodeid}" 52 | - pattern: kafka.(.+)<>(.+-total|.+-avg) 53 | name: kafka_$2_$5 54 | labels: 55 | clientId: "$3" 56 | nodeId: "$4" 57 | help: "Kafka $1 JMX metric type $2" 58 | type: UNTYPED 59 | 60 | #kafka.connect:type=kafka-metrics-count,client-id="{clientid}" 61 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}" 62 | #kafka.consumer:type=consumer-coordinator-metrics,client-id="{clientid}" 63 | #kafka.consumer:type=consumer-metrics,client-id="{clientid}" 64 | - pattern: kafka.(.+)<>(.+-total|.+-avg|.+-bytes|.+-count|.+-ratio|.+-age|.+-flight|.+-threads|.+-connectors|.+-tasks|.+-ago) 65 | name: kafka_$2_$4 66 | labels: 67 | clientId: "$3" 68 | help: "Kafka $1 JMX metric type $2" 69 | type: GAUGE 70 | 71 | #kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}<> status" 72 | - pattern: 'kafka.connect<>status: ([a-z-]+)' 73 | name: kafka_connect_connector_status 74 | value: 1 75 | labels: 76 | connector: "$1" 77 | task: "$2" 78 | status: "$3" 79 | help: "Kafka Connect JMX Connector status" 80 | type: GAUGE 81 | 82 | #kafka.connect:type=task-error-metrics,connector="{connector}",task="{task}" 83 | #kafka.connect:type=source-task-metrics,connector="{connector}",task="{task}" 84 | #kafka.connect:type=sink-task-metrics,connector="{connector}",task="{task}" 85 | #kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}" 86 | - pattern: kafka.connect<>(.+-total|.+-count|.+-ms|.+-ratio|.+-avg|.+-failures|.+-requests|.+-timestamp|.+-logged|.+-errors|.+-retries|.+-skipped) 87 | name: kafka_connect_$1_$4 88 | labels: 89 | connector: "$2" 90 | task: "$3" 91 | help: "Kafka Connect JMX metric type $1" 92 | type: GAUGE 93 | 94 | #kafka.connect:type=connector-metrics,connector="{connector}" 95 | #kafka.connect:type=connect-worker-metrics,connector="{connector}" 96 | - pattern: kafka.connect<>([a-z-]+) 97 | name: kafka_connect_worker_$2 98 | labels: 99 | connector: "$1" 100 | help: "Kafka Connect JMX metric $1" 101 | type: GAUGE 102 | 103 | #kafka.connect:type=connect-worker-metrics 104 | - pattern: kafka.connect<>([a-z-]+) 105 | name: kafka_connect_worker_$1 106 | help: "Kafka Connect JMX metric worker" 107 | type: GAUGE 108 | 109 | #kafka.connect:type=connect-worker-rebalance-metrics 110 | - pattern: kafka.connect<>([a-z-]+) 111 | name: kafka_connect_worker_rebalance_$1 112 | help: "Kafka Connect JMX metric rebalance information" 113 | type: GAUGE -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/7.4.2/userguide/multi_project_builds.html 8 | * This project uses @Incubating APIs which are subject to change. 9 | */ 10 | 11 | rootProject.name = "clickhouse-kafka-connect" 12 | -------------------------------------------------------------------------------- /src/integrationTest/java/com/clickhouse/kafka/connect/sink/ClickHouseCloudTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink; 2 | 3 | import com.clickhouse.client.ClickHouseProtocol; 4 | import com.clickhouse.kafka.connect.ClickHouseSinkConnector; 5 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 6 | import com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers; 7 | import com.clickhouse.kafka.connect.sink.helper.SchemalessTestData; 8 | import org.apache.kafka.connect.sink.SinkRecord; 9 | import org.junit.jupiter.api.Test; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.util.ArrayList; 14 | import java.util.Collection; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.Properties; 18 | 19 | import static org.junit.Assert.assertTrue; 20 | 21 | public class ClickHouseCloudTest { 22 | private static final Logger LOGGER = LoggerFactory.getLogger(ClickHouseCloudTest.class); 23 | private static final Properties properties = System.getProperties(); 24 | 25 | private ClickHouseHelperClient createClient(Map props) { 26 | ClickHouseSinkConfig csc = new ClickHouseSinkConfig(props); 27 | 28 | String hostname = csc.getHostname(); 29 | int port = csc.getPort(); 30 | String database = csc.getDatabase(); 31 | String username = csc.getUsername(); 32 | String password = csc.getPassword(); 33 | boolean sslEnabled = csc.isSslEnabled(); 34 | int timeout = csc.getTimeout(); 35 | 36 | 37 | return new ClickHouseHelperClient.ClickHouseClientBuilder(hostname, port, csc.getProxyType(), csc.getProxyHost(), csc.getProxyPort()) 38 | .setDatabase(database) 39 | .setUsername(username) 40 | .setPassword(password) 41 | .sslEnable(sslEnabled) 42 | .setTimeout(timeout) 43 | .setRetry(csc.getRetry()) 44 | .build(); 45 | } 46 | 47 | 48 | private Map getTestProperties() { 49 | Map props = new HashMap<>(); 50 | props.put(ClickHouseSinkConnector.HOSTNAME, String.valueOf(properties.getOrDefault("clickhouse.host", "clickhouse"))); 51 | props.put(ClickHouseSinkConnector.PORT, String.valueOf(properties.getOrDefault("clickhouse.port", ClickHouseProtocol.HTTP.getDefaultPort()))); 52 | props.put(ClickHouseSinkConnector.DATABASE, String.valueOf(properties.getOrDefault("clickhouse.database", "default"))); 53 | props.put(ClickHouseSinkConnector.USERNAME, String.valueOf(properties.getOrDefault("clickhouse.username", "default"))); 54 | props.put(ClickHouseSinkConnector.PASSWORD, String.valueOf(properties.getOrDefault("clickhouse.password", ""))); 55 | props.put(ClickHouseSinkConnector.SSL_ENABLED, "true"); 56 | return props; 57 | } 58 | 59 | 60 | 61 | @Test 62 | public void overlappingDataTest() { 63 | Map props = getTestProperties(); 64 | ClickHouseHelperClient chc = createClient(props); 65 | String topic = "schemaless_overlap_table_test"; 66 | ClickHouseTestHelpers.dropTable(chc, topic); 67 | ClickHouseTestHelpers.createTable(chc, topic, "CREATE TABLE %s ( `indexCount` Int64, `off16` Int16, `str` String, `p_int8` Int8, `p_int16` Int16, `p_int32` Int32, " + 68 | "`p_int64` Int64, `p_float32` Float32, `p_float64` Float64, `p_bool` Bool) Engine = ReplicatedMergeTree ORDER BY off16"); 69 | Collection sr = SchemalessTestData.createPrimitiveTypes(topic, 1); 70 | Collection firstBatch = new ArrayList<>(); 71 | Collection secondBatch = new ArrayList<>(); 72 | Collection thirdBatch = new ArrayList<>(); 73 | 74 | //For the sake of the comments, assume size = 100 75 | int firstBatchEndIndex = sr.size() / 2; // 0 - 50 76 | int secondBatchStartIndex = firstBatchEndIndex - sr.size() / 4; // 25 77 | int secondBatchEndIndex = firstBatchEndIndex + sr.size() / 4; // 75 78 | 79 | for (SinkRecord record : sr) { 80 | if (record.kafkaOffset() <= firstBatchEndIndex) { 81 | firstBatch.add(record); 82 | } 83 | 84 | if (record.kafkaOffset() >= secondBatchStartIndex && record.kafkaOffset() <= secondBatchEndIndex) { 85 | secondBatch.add(record); 86 | } 87 | 88 | if (record.kafkaOffset() >= secondBatchStartIndex) { 89 | thirdBatch.add(record); 90 | } 91 | } 92 | 93 | ClickHouseSinkTask chst = new ClickHouseSinkTask(); 94 | chst.start(props); 95 | chst.put(firstBatch); 96 | chst.stop(); 97 | chst.start(props); 98 | chst.put(secondBatch); 99 | chst.stop(); 100 | chst.start(props); 101 | chst.put(thirdBatch); 102 | chst.stop(); 103 | LOGGER.info("Total Records: {}", sr.size()); 104 | LOGGER.info("Row Count: {}", ClickHouseTestHelpers.countRows(chc, topic)); 105 | assertTrue(ClickHouseTestHelpers.countRows(chc, topic) >= sr.size()); 106 | assertTrue(ClickHouseTestHelpers.checkSequentialRows(chc, topic, sr.size())); 107 | ClickHouseTestHelpers.dropTable(chc, topic); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/integrationTest/java/com/clickhouse/kafka/connect/sink/ExactlyOnceTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink; 2 | 3 | import com.clickhouse.client.ClickHouseProtocol; 4 | import com.clickhouse.client.api.query.Records; 5 | import com.clickhouse.client.config.ClickHouseProxyType; 6 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 7 | import com.clickhouse.kafka.connect.sink.helper.ClickHouseAPI; 8 | import com.clickhouse.kafka.connect.sink.helper.ConfluentPlatform; 9 | import org.junit.jupiter.api.AfterAll; 10 | import org.junit.jupiter.api.BeforeAll; 11 | import org.junit.jupiter.api.BeforeEach; 12 | import org.junit.jupiter.api.Test; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.testcontainers.containers.Network; 16 | 17 | import java.io.File; 18 | import java.io.IOException; 19 | import java.net.URISyntaxException; 20 | import java.nio.file.Files; 21 | import java.nio.file.Paths; 22 | import java.util.LinkedList; 23 | import java.util.List; 24 | import java.util.Properties; 25 | 26 | import static com.clickhouse.kafka.connect.sink.helper.ClickHouseAPI.createReplicatedMergeTreeTable; 27 | import static com.clickhouse.kafka.connect.sink.helper.ClickHouseAPI.dropTable; 28 | import static org.junit.jupiter.api.Assertions.assertTrue; 29 | 30 | 31 | public class ExactlyOnceTest { 32 | private static final Logger LOGGER = LoggerFactory.getLogger(ExactlyOnceTest.class); 33 | public static ConfluentPlatform confluentPlatform; 34 | private static ClickHouseAPI clickhouseAPI; 35 | private static ClickHouseHelperClient chcNoProxy; 36 | private static final Properties properties = System.getProperties(); 37 | private static final String SINK_CONNECTOR_NAME = "ClickHouseSinkConnector"; 38 | 39 | @BeforeAll 40 | public static void setUp() { 41 | chcNoProxy = new ClickHouseHelperClient.ClickHouseClientBuilder(properties.getProperty("clickhouse.host"), Integer.parseInt(properties.getProperty("clickhouse.port")), 42 | ClickHouseProxyType.IGNORE, null, -1) 43 | .setUsername((String) properties.getOrDefault("clickhouse.username", "default")) 44 | .setPassword(properties.getProperty("clickhouse.password")) 45 | .sslEnable(true) 46 | .build(); 47 | clickhouseAPI = new ClickHouseAPI(properties); 48 | 49 | 50 | Network network = Network.newNetwork(); 51 | List connectorPath = new LinkedList<>(); 52 | String confluentArchive = new File(Paths.get("build/confluentArchive").toString()).getAbsolutePath(); 53 | connectorPath.add(confluentArchive); 54 | confluentPlatform = new ConfluentPlatform(network, connectorPath); 55 | } 56 | 57 | @AfterAll 58 | public static void tearDown() { 59 | confluentPlatform.close(); 60 | } 61 | 62 | 63 | 64 | 65 | 66 | 67 | private static void setupSchemaConnector(String topicName, int taskCount) throws IOException, InterruptedException { 68 | LOGGER.info("Setting up connector..."); 69 | setupConnector("src/integrationTest/resources/clickhouse_sink_no_proxy.json", topicName, taskCount); 70 | Thread.sleep(5 * 1000); 71 | } 72 | private static void setupSchemalessConnector(String topicName, int taskCount) throws IOException, InterruptedException { 73 | LOGGER.info("Setting schemaless up connector..."); 74 | setupConnector("src/integrationTest/resources/clickhouse_sink_no_proxy_schemaless.json", topicName, taskCount); 75 | Thread.sleep(5 * 1000); 76 | } 77 | private static void setupConnector(String fileName, String topicName, int taskCount) throws IOException { 78 | System.out.println("Setting up connector..."); 79 | dropTable(chcNoProxy, topicName); 80 | createReplicatedMergeTreeTable(chcNoProxy, topicName); 81 | 82 | String payloadClickHouseSink = String.join("", Files.readAllLines(Paths.get(fileName))); 83 | String jsonString = String.format(payloadClickHouseSink, SINK_CONNECTOR_NAME, SINK_CONNECTOR_NAME, taskCount, topicName, 84 | properties.getOrDefault("clickhouse.host", "clickhouse"), 85 | properties.getOrDefault("clickhouse.port", ClickHouseProtocol.HTTP.getDefaultPort()), 86 | properties.getOrDefault("clickhouse.database", "default"), 87 | properties.getOrDefault("clickhouse.username", "default"), 88 | properties.getOrDefault("clickhouse.password", ""), 89 | true); 90 | 91 | confluentPlatform.createConnect(jsonString); 92 | } 93 | 94 | 95 | 96 | private int generateData(String topicName, int numberOfPartitions, int numberOfRecords) throws IOException, InterruptedException { 97 | return confluentPlatform.generateData("src/integrationTest/resources/stock_gen.json", topicName, numberOfPartitions, numberOfRecords); 98 | } 99 | private int generateSchemalessData(String topicName, int numberOfPartitions, int numberOfRecords) throws IOException, InterruptedException { 100 | return confluentPlatform.generateData("src/integrationTest/resources/stock_gen_json.json", topicName, numberOfPartitions, numberOfRecords); 101 | } 102 | 103 | 104 | 105 | 106 | 107 | @BeforeEach 108 | public void beforeEach() throws IOException { 109 | confluentPlatform.deleteConnectors(SINK_CONNECTOR_NAME); 110 | } 111 | 112 | 113 | private boolean compareSchemalessCounts(String topicName, int partitions) throws InterruptedException, IOException { 114 | createReplicatedMergeTreeTable(chcNoProxy, topicName); 115 | ClickHouseAPI.clearTable(chcNoProxy, topicName); 116 | confluentPlatform.createTopic(topicName, partitions); 117 | int count = generateSchemalessData(topicName, partitions, 250); 118 | LOGGER.info("Expected Total: {}", count); 119 | setupSchemalessConnector(topicName, partitions); 120 | ClickHouseAPI.waitWhileCounting(chcNoProxy, topicName, 5); 121 | 122 | int[] databaseCounts = ClickHouseAPI.getCounts(chcNoProxy, topicName);//Essentially the final count 123 | ClickHouseAPI.dropTable(chcNoProxy, topicName); 124 | return databaseCounts[2] == 0 && databaseCounts[1] == count; 125 | } 126 | 127 | @Test 128 | public void checkTotalsEqual() throws InterruptedException, IOException { 129 | assertTrue(compareSchemalessCounts("singlePartitionTopic", 1)); 130 | } 131 | 132 | @Test 133 | public void checkTotalsEqualMulti() throws InterruptedException, IOException { 134 | assertTrue(compareSchemalessCounts("multiPartitionTopic", 3)); 135 | } 136 | 137 | 138 | private void checkSpottyNetworkSchemaless(String topicName, int numberOfPartitions) throws InterruptedException, IOException, URISyntaxException { 139 | boolean allSuccess = true; 140 | int runCount = 1; 141 | do { 142 | LOGGER.info("Run: {}", runCount); 143 | confluentPlatform.createTopic(topicName, numberOfPartitions); 144 | createReplicatedMergeTreeTable(chcNoProxy, topicName); 145 | ClickHouseAPI.clearTable(chcNoProxy, topicName); 146 | 147 | int count = generateSchemalessData(topicName, numberOfPartitions, 1500); 148 | setupSchemalessConnector(topicName, numberOfPartitions); 149 | 150 | clickhouseAPI.restartService(); 151 | confluentPlatform.restartConnector(SINK_CONNECTOR_NAME); 152 | 153 | LOGGER.info("Expected Total: {}", count); 154 | ClickHouseAPI.waitWhileCounting(chcNoProxy, topicName, 7); 155 | 156 | int[] databaseCounts = ClickHouseAPI.getCounts(chcNoProxy, topicName);//Essentially the final count 157 | if (databaseCounts[2] != 0 || databaseCounts[1] != count) { 158 | allSuccess = false; 159 | LOGGER.error("Duplicates: {}", databaseCounts[2]); 160 | Records records = ClickHouseAPI.selectDuplicates(chcNoProxy, topicName); 161 | records.forEach(record -> LOGGER.error("Duplicate: {}", record)); 162 | } 163 | 164 | confluentPlatform.deleteConnectors(SINK_CONNECTOR_NAME); 165 | confluentPlatform.deleteTopic(topicName); 166 | ClickHouseAPI.dropTable(chcNoProxy, topicName); 167 | runCount++; 168 | } while (runCount < 3 && allSuccess); 169 | 170 | assertTrue(allSuccess); 171 | } 172 | 173 | @Test 174 | public void checkSpottyNetwork() throws InterruptedException, IOException, URISyntaxException { 175 | checkSpottyNetworkSchemaless("checkSpottyNetworkSinglePartition", 1); 176 | } 177 | 178 | @Test 179 | public void checkSpottyNetworkMulti() throws InterruptedException, IOException, URISyntaxException { 180 | checkSpottyNetworkSchemaless("checkSpottyNetworkMultiPartitions", 3); 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/integrationTest/java/com/clickhouse/kafka/connect/sink/helper/ClickHouseTestHelpers.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.helper; 2 | 3 | import com.clickhouse.client.ClickHouseClient; 4 | import com.clickhouse.client.ClickHouseException; 5 | import com.clickhouse.client.ClickHouseNodeSelector; 6 | import com.clickhouse.client.ClickHouseProtocol; 7 | import com.clickhouse.client.ClickHouseResponse; 8 | import com.clickhouse.data.ClickHouseRecord; 9 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | public class ClickHouseTestHelpers { 14 | private static final Logger LOGGER = LoggerFactory.getLogger(ClickHouseTestHelpers.class); 15 | public static final String CLICKHOUSE_DOCKER_IMAGE = "clickhouse/clickhouse-server:23.8"; 16 | public static void dropTable(ClickHouseHelperClient chc, String tableName) { 17 | String dropTable = String.format("DROP TABLE IF EXISTS `%s`", tableName); 18 | try (ClickHouseClient client = ClickHouseClient.builder() 19 | .options(chc.getDefaultClientOptions()) 20 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 21 | .build(); 22 | ClickHouseResponse response = client.read(chc.getServer()) 23 | .query(dropTable) 24 | .executeAndWait()) { 25 | return; 26 | } catch (ClickHouseException e) { 27 | throw new RuntimeException(e); 28 | } 29 | } 30 | 31 | public static void createTable(ClickHouseHelperClient chc, String tableName, String createTableQuery) { 32 | String createTableQueryTmp = String.format(createTableQuery, tableName); 33 | 34 | try (ClickHouseClient client = ClickHouseClient.builder() 35 | .options(chc.getDefaultClientOptions()) 36 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 37 | .build(); 38 | ClickHouseResponse response = client.read(chc.getServer()) 39 | .query(createTableQueryTmp) 40 | .executeAndWait()) { 41 | return; 42 | } catch (ClickHouseException e) { 43 | throw new RuntimeException(e); 44 | } 45 | } 46 | 47 | public static int countRows(ClickHouseHelperClient chc, String tableName) { 48 | String queryCount = String.format("SELECT COUNT(*) FROM `%s`", tableName); 49 | return runQuery(chc, queryCount); 50 | } 51 | 52 | public static int sumRows(ClickHouseHelperClient chc, String tableName, String column) { 53 | String queryCount = String.format("SELECT SUM(`%s`) FROM `%s`", column, tableName); 54 | return runQuery(chc, queryCount); 55 | } 56 | 57 | public static int countRowsWithEmojis(ClickHouseHelperClient chc, String tableName) { 58 | String queryCount = "SELECT COUNT(*) FROM `" + tableName + "` WHERE str LIKE '%\uD83D\uDE00%'"; 59 | return runQuery(chc, queryCount); 60 | } 61 | 62 | private static int runQuery(ClickHouseHelperClient chc, String query) { 63 | try (ClickHouseClient client = ClickHouseClient.builder() 64 | .options(chc.getDefaultClientOptions()) 65 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 66 | .build(); 67 | ClickHouseResponse response = client.read(chc.getServer()) 68 | .query(query) 69 | .executeAndWait()) { 70 | return response.firstRecord().getValue(0).asInteger(); 71 | } catch (ClickHouseException e) { 72 | throw new RuntimeException(e); 73 | } 74 | } 75 | 76 | public static boolean checkSequentialRows(ClickHouseHelperClient chc, String tableName, int totalRecords) { 77 | String queryCount = String.format("SELECT DISTINCT `indexCount` FROM `%s` ORDER BY `indexCount` ASC", tableName); 78 | try (ClickHouseClient client = ClickHouseClient.builder() 79 | .options(chc.getDefaultClientOptions()) 80 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 81 | .build(); 82 | ClickHouseResponse response = client.read(chc.getServer()) 83 | .query(queryCount) 84 | .executeAndWait()) { 85 | 86 | int expectedIndexCount = 0; 87 | for (ClickHouseRecord record : response.records()) { 88 | int currentIndexCount = record.getValue(0).asInteger(); 89 | if (currentIndexCount != expectedIndexCount) { 90 | LOGGER.error("currentIndexCount: {}, expectedIndexCount: {}", currentIndexCount, expectedIndexCount); 91 | return false; 92 | } 93 | expectedIndexCount++; 94 | } 95 | 96 | LOGGER.info("Total Records: {}, expectedIndexCount: {}", totalRecords, expectedIndexCount); 97 | return totalRecords == expectedIndexCount; 98 | } catch (ClickHouseException e) { 99 | throw new RuntimeException(e); 100 | } 101 | } 102 | 103 | public static void waitWhileCounting(ClickHouseHelperClient chc, String tableName, int sleepInSeconds) { 104 | int databaseCount = countRows(chc, tableName); 105 | int lastCount = 0; 106 | int loopCount = 0; 107 | 108 | while(databaseCount != lastCount || loopCount < 6) { 109 | try { 110 | Thread.sleep(sleepInSeconds * 1000L); 111 | } catch (InterruptedException e) { 112 | throw new RuntimeException(e); 113 | } 114 | databaseCount = countRows(chc, tableName); 115 | if (lastCount == databaseCount) { 116 | loopCount++; 117 | } else { 118 | loopCount = 0; 119 | } 120 | 121 | lastCount = databaseCount; 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/integrationTest/resources/clickhouse_sink.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "name": "%s", 5 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 6 | "tasks.max": "%d", 7 | "topics": "%s", 8 | "hostname": "%s", 9 | "port": "%s", 10 | "database": "default", 11 | "username": "default", 12 | "password": "%s", 13 | "ssl": "false", 14 | "exactlyOnce" : "false", 15 | "proxyType": "http", 16 | "proxyHost": "%s", 17 | "proxyPort": "%s" 18 | } 19 | } -------------------------------------------------------------------------------- /src/integrationTest/resources/clickhouse_sink_no_proxy.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "name": "%s", 5 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 6 | "tasks.max": "%d", 7 | "topics": "%s", 8 | "hostname": "%s", 9 | "port": "%s", 10 | "database": "%s", 11 | "username": "%s", 12 | "password": "%s", 13 | "ssl": "true", 14 | "exactlyOnce" : "%b" 15 | } 16 | } -------------------------------------------------------------------------------- /src/integrationTest/resources/clickhouse_sink_no_proxy_schemaless.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "value.converter.schemas.enable": "false", 5 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 6 | "name": "%s", 7 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 8 | "tasks.max": "%d", 9 | "topics": "%s", 10 | "hostname": "%s", 11 | "port": "%s", 12 | "database": "%s", 13 | "username": "%s", 14 | "password": "%s", 15 | "ssl": "true", 16 | "exactlyOnce" : "%b" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/integrationTest/resources/clickhouse_sink_schemaless.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "value.converter.schemas.enable": "false", 5 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 6 | "name": "%s", 7 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 8 | "tasks.max": "%d", 9 | "topics": "%s", 10 | "hostname": "%s", 11 | "port": "%s", 12 | "database": "default", 13 | "username": "default", 14 | "password": "%s", 15 | "ssl": "false", 16 | "exactlyOnce" : "false", 17 | "proxyType": "http", 18 | "proxyHost": "%s", 19 | "proxyPort": "%s" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/integrationTest/resources/clickhouse_sink_with_jdbc_prop.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "name": "%s", 5 | "connector.class": "com.clickhouse.kafka.connect.ClickHouseSinkConnector", 6 | "tasks.max": "%d", 7 | "topics": "%s", 8 | "hostname": "%s", 9 | "port": "%s", 10 | "database": "default", 11 | "username": "default", 12 | "password": "%s", 13 | "ssl": "false", 14 | "exactlyOnce" : "false", 15 | "proxyType": "http", 16 | "proxyHost": "%s", 17 | "proxyPort": "%s", 18 | "jdbcConnectionProperties": "?load_balancing_policy=random&health_check_interval=5000&failover=2" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/integrationTest/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define the root logger with appender X 2 | log4j.rootLogger=INFO, console 3 | #log4j.logger.org.testcontainers=WARN 4 | #log4j.logger.com.clickhouse=DEBUG 5 | #log4j.logger.com.clickhouse.kafka.connect=DEBUG 6 | 7 | log4j.appender.console= org.apache.log4j.ConsoleAppender 8 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.console.layout.conversionPattern=[%d] %p %C %m%n -------------------------------------------------------------------------------- /src/integrationTest/resources/stock_gen.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "name": "%s", 5 | "connector.class": "io.confluent.kafka.connect.datagen.DatagenConnector", 6 | "tasks.max": "%d", 7 | "kafka.topic": "%s", 8 | "max.interval": "100", 9 | "iterations": "%d", 10 | "quickstart": "Stock_Trades" 11 | } 12 | } 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/integrationTest/resources/stock_gen_json.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "config": { 4 | "name": "%s", 5 | "value.converter.schemas.enable": "false", 6 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "connector.class": "io.confluent.kafka.connect.datagen.DatagenConnector", 8 | "tasks.max": "%d", 9 | "kafka.topic": "%s", 10 | "max.interval": "100", 11 | "iterations": "%d", 12 | "quickstart": "Stock_Trades" 13 | } 14 | } 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/ClickHouseSinkConnector.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect; 2 | 3 | import com.clickhouse.client.config.ClickHouseClientOption; 4 | import com.clickhouse.kafka.connect.sink.ClickHouseSinkConfig; 5 | import com.clickhouse.kafka.connect.sink.ClickHouseSinkTask; 6 | import org.apache.kafka.common.config.Config; 7 | import org.apache.kafka.common.config.ConfigDef; 8 | import org.apache.kafka.connect.connector.Task; 9 | import org.apache.kafka.connect.sink.SinkConnector; 10 | import org.apache.kafka.connect.sink.SinkConnectorContext; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.Map; 17 | import java.util.stream.Collectors; 18 | 19 | public class ClickHouseSinkConnector extends SinkConnector { 20 | 21 | private static final Logger LOGGER = LoggerFactory.getLogger(ClickHouseSinkConnector.class); 22 | 23 | private String hostname; 24 | private String port; 25 | private String database; 26 | 27 | private String username; 28 | 29 | private String password; 30 | 31 | private String sslEnabled; 32 | 33 | private int timeout; 34 | public static final String HOSTNAME = "hostname"; 35 | public static final String PORT = "port"; 36 | public static final String DATABASE = "database"; 37 | public static final String USERNAME = "username"; 38 | public static final String PASSWORD = "password"; 39 | public static final String SSL_ENABLED = "ssl"; 40 | public static final String CLIENT_VERSION = "client_version"; 41 | private static final ConfigDef CONFIG_DEF = ClickHouseSinkConfig.CONFIG; 42 | 43 | 44 | private Map settings; 45 | 46 | private String convertWithStream(Map map) { 47 | String mapAsString = map.keySet().stream() 48 | .map(key -> key + "=" + map.get(key)) 49 | .collect(Collectors.joining(", ", "{", "}")); 50 | return mapAsString; 51 | } 52 | 53 | @Override 54 | public void start(Map props) { 55 | LOGGER.info("Starting SinkConnect..."); 56 | LOGGER.info("Version: " + ClickHouseClientOption.class.getPackage().getImplementationVersion()); 57 | settings = props; 58 | } 59 | 60 | @Override 61 | public Class taskClass() { 62 | return ClickHouseSinkTask.class; 63 | } 64 | 65 | @Override 66 | public List> taskConfigs(int maxTasks) { 67 | ArrayList> configs = new ArrayList<>(); 68 | for (int i = 0; i < maxTasks; i++) { 69 | configs.add(settings); 70 | } 71 | return configs; 72 | } 73 | 74 | @Override 75 | public void stop() { 76 | LOGGER.info("stop SinkConnect"); 77 | } 78 | 79 | @Override 80 | protected SinkConnectorContext context() { 81 | return super.context(); 82 | } 83 | 84 | @Override 85 | public ConfigDef config() { 86 | return CONFIG_DEF; 87 | } 88 | 89 | @Override 90 | public String version() { 91 | return ClickHouseClientOption.class.getPackage().getImplementationVersion(); 92 | } 93 | 94 | @Override 95 | public Config validate(Map connectorConfigs) { 96 | Config config = super.validate(connectorConfigs); 97 | ClickHouseSinkConfig sinkConfig; 98 | try { 99 | sinkConfig = new ClickHouseSinkConfig(connectorConfigs); 100 | } catch (Exception e) { 101 | return config; 102 | } 103 | return config; 104 | } 105 | } -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/ClickHouseSinkTask.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink; 2 | 3 | import com.clickhouse.kafka.connect.sink.dlq.ErrorReporter; 4 | import com.clickhouse.kafka.connect.util.Utils; 5 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 6 | import org.apache.kafka.common.TopicPartition; 7 | import org.apache.kafka.connect.errors.ConnectException; 8 | import org.apache.kafka.connect.sink.ErrantRecordReporter; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | import org.apache.kafka.connect.sink.SinkTask; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.util.Collection; 15 | import java.util.Map; 16 | 17 | public class ClickHouseSinkTask extends SinkTask { 18 | 19 | private static final Logger LOGGER = LoggerFactory.getLogger(ClickHouseSinkTask.class); 20 | 21 | private ProxySinkTask proxySinkTask; 22 | private ClickHouseSinkConfig clickHouseSinkConfig; 23 | private ErrorReporter errorReporter; 24 | 25 | @Override 26 | public String version() { 27 | return "0.0.1"; 28 | } 29 | 30 | @Override 31 | public void start(Map props) { 32 | LOGGER.info("Start SinkTask: "); 33 | try { 34 | clickHouseSinkConfig = new ClickHouseSinkConfig(props); 35 | errorReporter = createErrorReporter(); 36 | } catch (Exception e) { 37 | throw new ConnectException("Failed to start new task" , e); 38 | } 39 | 40 | this.proxySinkTask = new ProxySinkTask(clickHouseSinkConfig, errorReporter); 41 | } 42 | 43 | 44 | @Override 45 | public void put(Collection records) { 46 | try { 47 | long putStat = System.currentTimeMillis(); 48 | this.proxySinkTask.put(records); 49 | long putEnd = System.currentTimeMillis(); 50 | if (!records.isEmpty()) { 51 | LOGGER.info("Put records: {} in {} ms", records.size(), putEnd - putStat); 52 | } 53 | } catch (Exception e) { 54 | LOGGER.trace("Passing the exception to the exception handler."); 55 | boolean errorTolerance = clickHouseSinkConfig != null && clickHouseSinkConfig.isErrorsTolerance(); 56 | Utils.handleException(e, errorTolerance, records); 57 | if (errorTolerance && errorReporter != null) { 58 | LOGGER.warn("Sending [{}] records to DLQ for exception: {}", records.size(), e.getLocalizedMessage()); 59 | records.forEach(r -> Utils.sendTODlq(errorReporter, r, e)); 60 | } 61 | } 62 | } 63 | 64 | 65 | // TODO: can be removed ss 66 | @Override 67 | public void flush(Map offsets) { 68 | LOGGER.trace("Test"); 69 | } 70 | 71 | @Override 72 | public void stop() { 73 | if (this.proxySinkTask != null) { 74 | this.proxySinkTask.stop(); 75 | } 76 | } 77 | 78 | public void setErrorReporter(ErrorReporter errorReporter) { 79 | this.errorReporter = errorReporter; 80 | } 81 | 82 | 83 | private ErrorReporter createErrorReporter() { 84 | ErrorReporter result = devNullErrorReporter(); 85 | if (context != null) { 86 | try { 87 | ErrantRecordReporter errantRecordReporter = context.errantRecordReporter(); 88 | if (errantRecordReporter != null) { 89 | result = errantRecordReporter::report; 90 | } else { 91 | LOGGER.info("Errant record reporter not configured."); 92 | } 93 | } catch (NoClassDefFoundError | NoSuchMethodError e) { 94 | // Will occur in Connect runtimes earlier than 2.6 95 | LOGGER.info("Kafka versions prior to 2.6 do not support the errant record reporter."); 96 | } 97 | } 98 | return result; 99 | } 100 | 101 | static ErrorReporter devNullErrorReporter() { 102 | return (record, e) -> { 103 | }; 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/ProxySinkTask.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink; 2 | 3 | import com.clickhouse.client.config.ClickHouseClientOption; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.db.ClickHouseWriter; 6 | import com.clickhouse.kafka.connect.sink.db.DBWriter; 7 | import com.clickhouse.kafka.connect.sink.db.TableMappingRefresher; 8 | import com.clickhouse.kafka.connect.sink.dlq.ErrorReporter; 9 | import com.clickhouse.kafka.connect.sink.processing.Processing; 10 | import com.clickhouse.kafka.connect.sink.state.StateProvider; 11 | import com.clickhouse.kafka.connect.sink.state.provider.InMemoryState; 12 | import com.clickhouse.kafka.connect.sink.state.provider.KeeperStateProvider; 13 | import com.clickhouse.kafka.connect.util.jmx.ExecutionTimer; 14 | import com.clickhouse.kafka.connect.util.jmx.MBeanServerUtils; 15 | import com.clickhouse.kafka.connect.util.jmx.SinkTaskStatistics; 16 | import org.apache.kafka.connect.sink.SinkRecord; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | import java.io.IOException; 21 | import java.util.Collection; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.Timer; 25 | import java.util.concurrent.ExecutionException; 26 | import java.util.concurrent.atomic.AtomicInteger; 27 | import java.util.stream.Collectors; 28 | 29 | public class ProxySinkTask { 30 | 31 | private static final Logger LOGGER = LoggerFactory.getLogger(ProxySinkTask.class); 32 | private static final AtomicInteger NEXT_ID = new AtomicInteger(); 33 | private Processing processing = null; 34 | private StateProvider stateProvider = null; 35 | private DBWriter dbWriter = null; 36 | private ClickHouseSinkConfig clickHouseSinkConfig = null; 37 | 38 | 39 | private final SinkTaskStatistics statistics; 40 | private int id = NEXT_ID.getAndAdd(1); 41 | 42 | public ProxySinkTask(final ClickHouseSinkConfig clickHouseSinkConfig, final ErrorReporter errorReporter) { 43 | this.clickHouseSinkConfig = clickHouseSinkConfig; 44 | LOGGER.info("Enable ExactlyOnce? {}", clickHouseSinkConfig.isExactlyOnce()); 45 | if ( clickHouseSinkConfig.isExactlyOnce() ) { 46 | this.stateProvider = new KeeperStateProvider(clickHouseSinkConfig); 47 | } else { 48 | this.stateProvider = new InMemoryState(); 49 | } 50 | 51 | ClickHouseWriter chWriter = new ClickHouseWriter(); 52 | this.dbWriter = chWriter; 53 | 54 | // Add table mapping refresher 55 | if (clickHouseSinkConfig.getTableRefreshInterval() > 0) { 56 | TableMappingRefresher tableMappingRefresher = new TableMappingRefresher(clickHouseSinkConfig.getDatabase(), chWriter); 57 | Timer tableRefreshTimer = new Timer(); 58 | tableRefreshTimer.schedule(tableMappingRefresher, clickHouseSinkConfig.getTableRefreshInterval(), clickHouseSinkConfig.getTableRefreshInterval()); 59 | } 60 | 61 | // Add dead letter queue 62 | boolean isStarted = dbWriter.start(clickHouseSinkConfig); 63 | if (!isStarted) 64 | throw new RuntimeException("Connection to ClickHouse is not active."); 65 | processing = new Processing(stateProvider, dbWriter, errorReporter, clickHouseSinkConfig); 66 | 67 | this.statistics = MBeanServerUtils.registerMBean(new SinkTaskStatistics(), getMBeanNAme()); 68 | } 69 | 70 | private String getMBeanNAme() { 71 | return String.format("com.clickhouse:type=ClickHouseKafkaConnector,name=SinkTask%d,version=%s", id, ClickHouseClientOption.class.getPackage().getImplementationVersion()); 72 | } 73 | 74 | public void stop() { 75 | MBeanServerUtils.unregisterMBean(getMBeanNAme()); 76 | } 77 | 78 | public void put(final Collection records) throws IOException, ExecutionException, InterruptedException { 79 | if (records.isEmpty()) { 80 | LOGGER.trace("No records sent to SinkTask"); 81 | return; 82 | } 83 | // Group by topic & partition 84 | ExecutionTimer taskTime = ExecutionTimer.start(); 85 | statistics.receivedRecords(records.size()); 86 | LOGGER.trace(String.format("Got %d records from put API.", records.size())); 87 | ExecutionTimer processingTime = ExecutionTimer.start(); 88 | 89 | Map> dataRecords = records.stream() 90 | .map(v -> Record.convert(v, 91 | clickHouseSinkConfig.isEnableDbTopicSplit(), 92 | clickHouseSinkConfig.getDbTopicSplitChar(), 93 | clickHouseSinkConfig.getDatabase() )) 94 | .collect(Collectors.groupingBy(!clickHouseSinkConfig.isExactlyOnce() && clickHouseSinkConfig.isIgnorePartitionsWhenBatching() 95 | ? Record::getTopic : Record::getTopicAndPartition)); 96 | 97 | statistics.recordProcessingTime(processingTime); 98 | // TODO - Multi process??? 99 | for (String topicAndPartition : dataRecords.keySet()) { 100 | // Running on etch topic & partition 101 | List rec = dataRecords.get(topicAndPartition); 102 | processing.doLogic(rec); 103 | } 104 | statistics.taskProcessingTime(taskTime); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/Data.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data; 2 | 3 | import org.apache.kafka.connect.data.Field; 4 | import org.apache.kafka.connect.data.Schema; 5 | 6 | import java.util.List; 7 | 8 | public class Data { 9 | private Schema schema; 10 | private Object object; 11 | 12 | public Data(Schema schema, Object object) { 13 | this.schema = schema; 14 | this.object = object; 15 | } 16 | 17 | public List getFields() { 18 | return schema.fields(); 19 | } 20 | 21 | public Schema.Type getFieldType() { 22 | return schema.type(); 23 | } 24 | 25 | public Schema getMapKeySchema() { 26 | return schema.keySchema(); 27 | } 28 | 29 | public Schema getNestedValueSchema() { 30 | return schema.valueSchema(); 31 | } 32 | 33 | public Object getObject() { 34 | return object; 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | if (object == null) { 40 | return null; 41 | } 42 | return object.toString(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/Record.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.convert.EmptyRecordConvertor; 4 | import com.clickhouse.kafka.connect.sink.data.convert.RecordConvertor; 5 | import com.clickhouse.kafka.connect.sink.data.convert.SchemaRecordConvertor; 6 | import com.clickhouse.kafka.connect.sink.data.convert.SchemalessRecordConvertor; 7 | import com.clickhouse.kafka.connect.sink.data.convert.StringRecordConvertor; 8 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 9 | import lombok.Getter; 10 | import org.apache.kafka.connect.data.Field; 11 | import org.apache.kafka.connect.data.Schema; 12 | import org.apache.kafka.connect.data.Struct; 13 | import org.apache.kafka.connect.errors.DataException; 14 | import org.apache.kafka.connect.sink.SinkRecord; 15 | 16 | import java.util.List; 17 | import java.util.Map; 18 | 19 | public class Record { 20 | @Getter 21 | private OffsetContainer recordOffsetContainer = null; 22 | private Object value; 23 | @Getter 24 | private Map jsonMap = null; 25 | @Getter 26 | private List fields = null; 27 | @Getter 28 | private SchemaType schemaType; 29 | @Getter 30 | private SinkRecord sinkRecord = null; 31 | @Getter 32 | private String database = null; 33 | 34 | public Record(SchemaType schemaType, OffsetContainer recordOffsetContainer, List fields, Map jsonMap, String database, SinkRecord sinkRecord) { 35 | this.recordOffsetContainer = recordOffsetContainer; 36 | this.fields = fields; 37 | this.jsonMap = jsonMap; 38 | this.sinkRecord = sinkRecord; 39 | this.schemaType = schemaType; 40 | this.database = database; 41 | } 42 | 43 | public String getTopicAndPartition() { 44 | return recordOffsetContainer.getTopicAndPartitionKey(); 45 | } 46 | 47 | public String getTopic() { 48 | return recordOffsetContainer.getTopic(); 49 | } 50 | 51 | private static final RecordConvertor schemaRecordConvertor = new SchemaRecordConvertor(); 52 | private static final RecordConvertor schemalessRecordConvertor = new SchemalessRecordConvertor(); 53 | private static final RecordConvertor emptyRecordConvertor = new EmptyRecordConvertor(); 54 | private static final RecordConvertor stringRecordConvertor = new StringRecordConvertor(); 55 | private static RecordConvertor getConvertor(Schema schema, Object data) { 56 | if (data == null ) { 57 | return emptyRecordConvertor; 58 | } 59 | if (schema != null && data instanceof Struct) { 60 | return schemaRecordConvertor; 61 | } 62 | if (data instanceof Map) { 63 | return schemalessRecordConvertor; 64 | } 65 | if (data instanceof String) { 66 | return stringRecordConvertor; 67 | } 68 | throw new DataException(String.format("No converter was found due to unexpected object type %s", data.getClass().getName())); 69 | } 70 | 71 | public static Record convert(SinkRecord sinkRecord, boolean splitDBTopic, String dbTopicSeparatorChar,String database) { 72 | RecordConvertor recordConvertor = getConvertor(sinkRecord.valueSchema(), sinkRecord.value()); 73 | return recordConvertor.convert(sinkRecord, splitDBTopic, dbTopicSeparatorChar, database); 74 | } 75 | 76 | public static Record newRecord(SchemaType schemaType, String topic, int partition, long offset, List fields, Map jsonMap, String database, SinkRecord sinkRecord) { 77 | return new Record(schemaType, new OffsetContainer(topic, partition, offset), fields, jsonMap, database, sinkRecord); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/SchemaType.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data; 2 | 3 | public enum SchemaType { 4 | SCHEMA, 5 | SCHEMA_LESS, 6 | STRING_SCHEMA 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/StructToJsonMap.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data; 2 | 3 | import org.apache.kafka.connect.data.Date; 4 | import org.apache.kafka.connect.data.Decimal; 5 | import org.apache.kafka.connect.data.Field; 6 | import org.apache.kafka.connect.data.Schema; 7 | import org.apache.kafka.connect.data.Struct; 8 | import org.apache.kafka.connect.data.Time; 9 | import org.apache.kafka.connect.data.Timestamp; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.math.BigDecimal; 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | 19 | public class StructToJsonMap { 20 | 21 | private static final Logger LOGGER = LoggerFactory.getLogger(StructToJsonMap.class); 22 | public static Map toJsonMap(Struct struct) { 23 | if (struct == null) { 24 | return null; 25 | } 26 | Map jsonMap = new HashMap(0); 27 | List fields = struct.schema().fields(); 28 | for (Field field : fields) { 29 | String fieldName = field.name(); 30 | Schema.Type fieldType = field.schema().type(); 31 | String schemaName = field.schema().name(); 32 | LOGGER.debug(String.format("fieldName [%s] fieldType [%s] schemaName [%s]" , fieldName, fieldType.name(), schemaName)); 33 | switch (fieldType) { 34 | case STRING: 35 | jsonMap.put(fieldName, new Data(field.schema(), struct.getString(fieldName))); 36 | break; 37 | case BYTES: 38 | if (Decimal.LOGICAL_NAME.equals(schemaName)) { 39 | jsonMap.put(fieldName, new Data(field.schema(), (BigDecimal) struct.get(fieldName))); 40 | } else { 41 | jsonMap.put(fieldName, new Data(field.schema(), struct.getBytes(fieldName))); 42 | } 43 | break; 44 | case INT32: 45 | if (Date.LOGICAL_NAME.equals(schemaName) || Time.LOGICAL_NAME.equals(schemaName)) { 46 | jsonMap.put(fieldName, new Data(field.schema(), (java.util.Date) struct.get(fieldName))); 47 | } else { 48 | jsonMap.put(fieldName, new Data(field.schema(), struct.getInt32(fieldName))); 49 | } 50 | break; 51 | case INT16: 52 | jsonMap.put(fieldName, new Data(field.schema(), struct.getInt16(fieldName))); 53 | break; 54 | case INT64: 55 | if (Timestamp.LOGICAL_NAME.equals(schemaName)) { 56 | jsonMap.put(fieldName, new Data(field.schema(), (java.util.Date) struct.get(fieldName))); 57 | } else { 58 | jsonMap.put(fieldName, new Data(field.schema(), struct.getInt64(fieldName))); 59 | } 60 | break; 61 | case FLOAT32: 62 | jsonMap.put(fieldName, new Data(field.schema(), struct.getFloat32(fieldName))); 63 | break; 64 | case FLOAT64: 65 | jsonMap.put(fieldName, new Data(field.schema(), struct.getFloat64(fieldName))); 66 | break; 67 | case BOOLEAN: 68 | jsonMap.put(fieldName, new Data(field.schema(), struct.getBoolean(fieldName))); 69 | break; 70 | case ARRAY: 71 | List fieldArray = struct.getArray(fieldName); 72 | if (fieldArray != null && !fieldArray.isEmpty() && fieldArray.get(0) instanceof Struct) { 73 | // If Array contains list of Structs 74 | List jsonArray = new ArrayList<>(); 75 | fieldArray.forEach(item -> { 76 | jsonArray.add(toJsonMap((Struct) item)); 77 | }); 78 | jsonMap.put(fieldName, new Data(field.schema(), jsonArray)); 79 | } else { 80 | jsonMap.put(fieldName, new Data(field.schema(), fieldArray)); 81 | } 82 | break; 83 | case STRUCT: 84 | jsonMap.put(fieldName, new Data(field.schema(), toJsonMap(struct.getStruct(fieldName)))); 85 | break; 86 | case MAP: 87 | Map fieldMap = new HashMap<>(struct.getMap(fieldName)); 88 | if (!fieldMap.isEmpty() && fieldMap.values().iterator().next() instanceof Struct) { 89 | // Map values are `Struct` 90 | 91 | for (Map.Entry entry : fieldMap.entrySet()) { 92 | entry.setValue(toJsonMap((Struct) entry.getValue())); 93 | } 94 | } 95 | jsonMap.put(fieldName, new Data(field.schema(), fieldMap)); 96 | break; 97 | default: 98 | jsonMap.put(fieldName, new Data(field.schema(), struct.get(fieldName))); 99 | break; 100 | } 101 | } 102 | return jsonMap; 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/convert/EmptyRecordConvertor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data.convert; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.Record; 4 | import com.clickhouse.kafka.connect.sink.data.SchemaType; 5 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 6 | import org.apache.kafka.connect.data.Field; 7 | import org.apache.kafka.connect.sink.SinkRecord; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | public class EmptyRecordConvertor extends RecordConvertor { 13 | @Override 14 | public Record doConvert(SinkRecord sinkRecord, String topic, String configurationDatabase) { 15 | String database = configurationDatabase; 16 | int partition = sinkRecord.kafkaPartition().intValue(); 17 | long offset = sinkRecord.kafkaOffset(); 18 | List fields = new ArrayList<>(); 19 | return new Record(SchemaType.SCHEMA_LESS, new OffsetContainer(topic, partition, offset), fields, null, database, sinkRecord); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/convert/RecordConvertor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data.convert; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.Record; 4 | import org.apache.kafka.connect.sink.SinkRecord; 5 | 6 | import java.util.regex.Pattern; 7 | 8 | public abstract class RecordConvertor { 9 | public Record convert(SinkRecord sinkRecord, boolean splitDBTopic, String dbTopicSeparatorChar, String configurationDatabase) { 10 | String database = configurationDatabase; 11 | String topic = sinkRecord.topic(); 12 | if (splitDBTopic) { 13 | String[] parts = topic.split(Pattern.quote(dbTopicSeparatorChar)); 14 | if (parts.length == 2) { 15 | database = parts[0]; 16 | topic = parts[1]; 17 | } 18 | } 19 | return doConvert(sinkRecord, topic, database); 20 | } 21 | public abstract Record doConvert(SinkRecord sinkRecord, String topic,String database); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/convert/SchemaRecordConvertor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data.convert; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.Data; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.data.SchemaType; 6 | import com.clickhouse.kafka.connect.sink.data.StructToJsonMap; 7 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 8 | import org.apache.kafka.connect.data.Struct; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | 11 | import java.util.Map; 12 | 13 | public class SchemaRecordConvertor extends RecordConvertor{ 14 | 15 | @Override 16 | public Record doConvert(SinkRecord sinkRecord, String topic,String configurationDatabase) { 17 | String database = configurationDatabase; 18 | int partition = sinkRecord.kafkaPartition().intValue(); 19 | long offset = sinkRecord.kafkaOffset(); 20 | Struct struct = (Struct) sinkRecord.value(); 21 | Map data = StructToJsonMap.toJsonMap((Struct) sinkRecord.value()); 22 | return new Record(SchemaType.SCHEMA, new OffsetContainer(topic, partition, offset), struct.schema().fields(), data, database, sinkRecord); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/convert/SchemalessRecordConvertor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data.convert; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.Data; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.data.SchemaType; 6 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 7 | import org.apache.kafka.connect.data.Field; 8 | import org.apache.kafka.connect.data.Schema; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | 11 | import java.util.ArrayList; 12 | import java.util.HashMap; 13 | import java.util.List; 14 | import java.util.Map; 15 | 16 | public class SchemalessRecordConvertor extends RecordConvertor { 17 | 18 | @Override 19 | public Record doConvert(SinkRecord sinkRecord, String topic,String configurationDatabase) { 20 | String database = configurationDatabase; 21 | int partition = sinkRecord.kafkaPartition().intValue(); 22 | long offset = sinkRecord.kafkaOffset(); 23 | List fields = new ArrayList<>(); 24 | Map map = (Map) sinkRecord.value(); 25 | Map data = new HashMap<>(); 26 | int index = 0; 27 | map.forEach((key,val) -> { 28 | fields.add(new Field(key.toString(), index, Schema.STRING_SCHEMA)); 29 | data.put(key.toString(), new Data(Schema.STRING_SCHEMA, val == null ? null : val.toString())); 30 | }); 31 | return new Record(SchemaType.SCHEMA_LESS, new OffsetContainer(topic, partition, offset), fields, data, database, sinkRecord); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/data/convert/StringRecordConvertor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.data.convert; 2 | 3 | import com.clickhouse.kafka.connect.sink.data.Record; 4 | import com.clickhouse.kafka.connect.sink.data.SchemaType; 5 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 6 | import org.apache.kafka.connect.errors.DataException; 7 | import org.apache.kafka.connect.sink.SinkRecord; 8 | 9 | public class StringRecordConvertor extends RecordConvertor { 10 | @Override 11 | public Record doConvert(SinkRecord sinkRecord, String topic,String configurationDatabase) { 12 | String database = configurationDatabase; 13 | if (sinkRecord.value() == null) { 14 | throw new DataException("Value was null for JSON conversion"); 15 | } 16 | int partition = sinkRecord.kafkaPartition().intValue(); 17 | long offset = sinkRecord.kafkaOffset(); 18 | return new Record(SchemaType.STRING_SCHEMA, new OffsetContainer(topic, partition, offset), null, null, database, sinkRecord); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/DBWriter.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db; 2 | 3 | import com.clickhouse.kafka.connect.sink.ClickHouseSinkConfig; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.dlq.ErrorReporter; 6 | import com.clickhouse.kafka.connect.util.QueryIdentifier; 7 | 8 | import java.io.IOException; 9 | import java.util.List; 10 | import java.util.concurrent.ExecutionException; 11 | 12 | public interface DBWriter { 13 | 14 | public boolean start(ClickHouseSinkConfig csc); 15 | public void stop(); 16 | public void doInsert(List records, QueryIdentifier queryId) throws IOException, ExecutionException, InterruptedException; 17 | public void doInsert(List records, QueryIdentifier queryId, ErrorReporter errorReporter) throws IOException, ExecutionException, InterruptedException; 18 | public long recordsInserted(); 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/InMemoryDBWriter.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db; 2 | 3 | import com.clickhouse.kafka.connect.sink.ClickHouseSinkConfig; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.dlq.ErrorReporter; 6 | import com.clickhouse.kafka.connect.util.QueryIdentifier; 7 | 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | public class InMemoryDBWriter implements DBWriter { 13 | 14 | 15 | 16 | private Map recordMap = null; 17 | 18 | 19 | public InMemoryDBWriter() { 20 | this.recordMap = new HashMap<>(); 21 | } 22 | @Override 23 | public boolean start(ClickHouseSinkConfig csc) { 24 | return true; 25 | } 26 | 27 | @Override 28 | public void stop() { 29 | 30 | } 31 | 32 | @Override 33 | public void doInsert(List records, QueryIdentifier queryId) { 34 | records.stream().forEach( r -> this.recordMap.put(r.getRecordOffsetContainer().getOffset(), r) ); 35 | } 36 | 37 | @Override 38 | public void doInsert(List records, QueryIdentifier queryId, ErrorReporter errorReporter) { 39 | doInsert(records, queryId); 40 | } 41 | 42 | @Override 43 | public long recordsInserted() { 44 | return this.recordMap.size(); 45 | } 46 | 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/TableMappingRefresher.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import java.util.TimerTask; 7 | 8 | public class TableMappingRefresher extends TimerTask { 9 | private static final Logger LOGGER = LoggerFactory.getLogger(TableMappingRefresher.class); 10 | private ClickHouseWriter chWriter = null; 11 | private String database = null; 12 | 13 | public TableMappingRefresher(String database, final ClickHouseWriter chWriter) { 14 | this.chWriter = chWriter; 15 | this.database = database; 16 | } 17 | 18 | @Override 19 | public void run() { 20 | try { 21 | chWriter.updateMapping(database); 22 | } catch (Exception e) { 23 | LOGGER.error("Update mapping Error: {}", e.getMessage()); 24 | } 25 | 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/helper/ClickHouseFieldDescriptor.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.helper; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import com.fasterxml.jackson.databind.PropertyNamingStrategies; 6 | import com.fasterxml.jackson.databind.annotation.JsonNaming; 7 | import lombok.Builder; 8 | import lombok.Data; 9 | import lombok.extern.jackson.Jacksonized; 10 | 11 | /** 12 | * Java object representation of one DESCRIBE TABLE result row. 13 | *

14 | * We use Jackson to instantiate it from JSON. 15 | */ 16 | @Data 17 | @Builder 18 | @Jacksonized 19 | @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) 20 | public class ClickHouseFieldDescriptor { 21 | private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 22 | private String name; 23 | private String type; 24 | private String defaultType; 25 | private String defaultExpression; 26 | private String comment; 27 | private String codecExpression; 28 | private String ttlExpression; 29 | private boolean isSubcolumn; 30 | 31 | public boolean isAlias() { 32 | return "ALIAS".equals(defaultType); 33 | } 34 | 35 | public boolean isMaterialized() { 36 | return "MATERIALIZED".equals(defaultType); 37 | } 38 | 39 | public boolean isEphemeral() { 40 | return "EPHEMERAL".equals(defaultType); 41 | } 42 | 43 | public boolean hasDefault() { 44 | return "DEFAULT".equals(defaultType); 45 | } 46 | 47 | public static ClickHouseFieldDescriptor fromJsonRow(String json) throws JsonProcessingException { 48 | return OBJECT_MAPPER.readValue(json.replace("\\", "\\\\").replace("\n", "\\n"), ClickHouseFieldDescriptor.class); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/mapping/Table.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.mapping; 2 | 3 | import com.clickhouse.kafka.connect.util.Utils; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | import lombok.experimental.Accessors; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.ArrayList; 11 | import java.util.HashMap; 12 | import java.util.List; 13 | import java.util.Map; 14 | import java.util.function.Predicate; 15 | import java.util.regex.Matcher; 16 | import java.util.regex.Pattern; 17 | 18 | @Getter 19 | public class Table { 20 | private static final Logger LOGGER = LoggerFactory.getLogger(Table.class); 21 | private static final Predicate SIZE_FIELD_MATCHER = Pattern.compile(".+\\.size[0-9]+$").asMatchPredicate(); 22 | private static final Pattern MULTIPLE_MAP_VALUES_PATTERN = Pattern.compile("(\\.values)(?=((\\.values)+$))"); 23 | 24 | private final String name; 25 | private final String database; 26 | 27 | private final List rootColumnsList; 28 | private final Map rootColumnsMap; 29 | private final List allColumnsList; 30 | private final Map allColumnsMap; 31 | 32 | @Setter 33 | @Accessors(fluent = true) 34 | private boolean hasDefaults; 35 | 36 | @Setter 37 | @Getter 38 | private int numColumns = 0; 39 | 40 | public Table(String database, String name) { 41 | this.database = database; 42 | this.name = name; 43 | this.rootColumnsList = new ArrayList<>(); 44 | this.rootColumnsMap = new HashMap<>(); 45 | 46 | this.allColumnsList = new ArrayList<>(); 47 | this.allColumnsMap = new HashMap<>(); 48 | } 49 | 50 | public Table(String database, String name, int numColumns) { 51 | this(database, name); 52 | this.numColumns = numColumns; 53 | } 54 | 55 | public String getCleanName() { 56 | return name; 57 | } 58 | public String getName() { 59 | return Utils.escapeName(name); 60 | } 61 | 62 | public String getFullName() { 63 | return Utils.escapeTableName(database, name); 64 | } 65 | 66 | private void registerValidColumn(Column column) { 67 | allColumnsMap.put(column.getName(), column); 68 | allColumnsList.add(column); 69 | } 70 | 71 | public void addColumn(Column column) { 72 | registerValidColumn(column); 73 | 74 | if (column.isSubColumn()) handleNonRoot(column); 75 | else { 76 | rootColumnsList.add(column); 77 | rootColumnsMap.put(column.getName(), column); 78 | } 79 | } 80 | 81 | private void handleNonRoot(Column column) { 82 | String parentName = column.getName().substring(0, column.getName().lastIndexOf(".")); 83 | Column parent = allColumnsMap.getOrDefault(parentName, null); 84 | if (parent == null) { 85 | LOGGER.error("Got non-root column, but its parent was not found to be updated. {}", column); 86 | return; 87 | } 88 | 89 | updateParent(parent, column); 90 | } 91 | 92 | private void updateParent(Column parent, Column child) { 93 | switch (parent.getType()) { 94 | case VARIANT: 95 | // Variants are handled fully in the Column class because its types are always primitive. Let's ignore them here. 96 | return; 97 | case ARRAY: 98 | if (SIZE_FIELD_MATCHER.test(child.getName())) 99 | return; 100 | 101 | Column parentArrayType = parent.getArrayType(); 102 | switch (parentArrayType.getType()) { 103 | case MAP: 104 | case TUPLE: 105 | updateParent(parent.getArrayType(), child.getArrayType()); 106 | return; 107 | case ARRAY: 108 | do { 109 | child = child.getArrayType(); 110 | parent = parent.getArrayType(); 111 | } while (child.getType() == Type.ARRAY && parent.getType() == Type.ARRAY); 112 | updateParent(parent, child); 113 | return; 114 | case VARIANT: 115 | return; 116 | default: 117 | LOGGER.error("Unhandled complex type '{}' as a child of an array", parentArrayType.getType()); 118 | return; 119 | } 120 | case MAP: 121 | // Keys are parsed fully in the Column class as its type is always primitive. 122 | if (child.getName().endsWith(".keys") || SIZE_FIELD_MATCHER.test(child.getName())) 123 | return; 124 | 125 | if (child.getType() == Type.ARRAY && child.getName().endsWith(".values")) { 126 | int depth = 1; 127 | 128 | Matcher matcher = MULTIPLE_MAP_VALUES_PATTERN.matcher(child.getName()); 129 | while (matcher.find()) depth += 1; 130 | 131 | int remainingDepth = depth; 132 | 133 | // ClickHouse outputs nested maps values as nested array types 134 | while (remainingDepth-- > 0) { 135 | child = child.getArrayType(); 136 | } 137 | 138 | child.setParent(parent); 139 | 140 | parent.setMapDepth(depth); 141 | parent.setMapValueType(child); 142 | registerValidColumn(child); 143 | } 144 | return; 145 | case TUPLE: 146 | Column parentOfParent = parent.getParent(); 147 | 148 | if (parentOfParent != null) { 149 | boolean anyTransitionalParentIsMap = parentOfParent.getType() == Type.MAP; 150 | 151 | if (!anyTransitionalParentIsMap && parentOfParent.getType() == Type.ARRAY) { 152 | Column currentParent = parentOfParent.getParent(); 153 | 154 | while (currentParent != null) { 155 | anyTransitionalParentIsMap = currentParent.getType() == Type.MAP; 156 | 157 | if (anyTransitionalParentIsMap) 158 | break; 159 | 160 | currentParent = currentParent.getParent(); 161 | } 162 | } 163 | 164 | if (anyTransitionalParentIsMap) { 165 | int remainingDepth = getRemainingDepth(parent, parentOfParent); 166 | 167 | while (remainingDepth-- > 0) { 168 | child = child.getArrayType(); 169 | } 170 | } 171 | } 172 | parent.getTupleFields().add(child); 173 | return; 174 | default: 175 | if (child.getName().endsWith(".null")) { 176 | LOGGER.debug("Ignoring complex column: {}", child); 177 | } else { 178 | LOGGER.warn("Unsupported complex parent type: {}", parent.getType()); 179 | } 180 | } 181 | } 182 | 183 | private static int getRemainingDepth(Column parent, Column parentOfParent) { 184 | int compensationDepth = 0; 185 | 186 | // I don't really know why the ClickHouse describe table result wraps the type in an additional 187 | // array only when the parent is a map which is under array. But we have to deal with it. 188 | Matcher matcher = MULTIPLE_MAP_VALUES_PATTERN.matcher(parent.getName()); 189 | while (matcher.find()) compensationDepth += 1; 190 | 191 | return parentOfParent.getMapDepth() + parentOfParent.getArrayDepth() - compensationDepth; 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/db/mapping/Type.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.mapping; 2 | 3 | public enum Type { 4 | UNKNOWN, 5 | INT8, 6 | INT16, 7 | INT32, 8 | INT64, 9 | INT128, 10 | INT256, 11 | STRING, 12 | FLOAT32, 13 | FLOAT64, 14 | BOOLEAN, 15 | ARRAY, 16 | MAP, 17 | TUPLE, 18 | VARIANT, 19 | Date, 20 | Date32, 21 | DateTime, 22 | DateTime64, 23 | UUID, 24 | UINT8, 25 | UINT16, 26 | UINT32, 27 | UINT64, 28 | UINT128, 29 | UINT256, 30 | Decimal, 31 | FIXED_STRING, 32 | Enum8, 33 | Enum16, 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/dedup/DeDup.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.dedup; 2 | 3 | import com.clickhouse.kafka.connect.sink.kafka.OffsetContainer; 4 | 5 | public class DeDup { 6 | 7 | private OffsetContainer currentOffset; 8 | private OffsetContainer previousOffset; 9 | private DeDupStrategy deDupStrategy; 10 | 11 | public DeDup(DeDupStrategy deDupStrategy, OffsetContainer currentOffset) { 12 | this.currentOffset = currentOffset; 13 | this.deDupStrategy = deDupStrategy; 14 | previousOffset = null; 15 | } 16 | 17 | public boolean isNew(int recordOffset) { 18 | return true; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/dedup/DeDupStrategy.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.dedup; 2 | 3 | public enum DeDupStrategy { 4 | 5 | OFF, 6 | PRIMARY_KEY, 7 | ALL_DATA, 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/dlq/DuplicateException.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.dlq; 2 | 3 | public class DuplicateException extends Exception { 4 | public DuplicateException() { 5 | 6 | } 7 | 8 | public DuplicateException(String message) { 9 | super(message); 10 | } 11 | 12 | public DuplicateException(String message, Throwable cause) { 13 | super(message, cause); 14 | } 15 | 16 | public DuplicateException(Throwable cause) { 17 | super(cause); 18 | } 19 | 20 | public DuplicateException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 21 | super(message, cause, enableSuppression, writableStackTrace); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/dlq/ErrorReporter.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.dlq; 2 | 3 | import org.apache.kafka.connect.sink.SinkRecord; 4 | 5 | public interface ErrorReporter { 6 | void report(SinkRecord record, Exception e); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/kafka/OffsetContainer.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.kafka; 2 | 3 | public class OffsetContainer extends TopicPartitionContainer { 4 | private long offset; 5 | 6 | 7 | public OffsetContainer(String topic, int partition, long offset) { 8 | super(topic, partition); 9 | this.offset = offset; 10 | } 11 | 12 | public long getOffset() { 13 | return offset; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/kafka/RangeContainer.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.kafka; 2 | 3 | public class RangeContainer extends TopicPartitionContainer { 4 | 5 | private long maxOffset; 6 | private long minOffset; 7 | 8 | public RangeContainer(String topic, int partition) { 9 | super(topic, partition); 10 | this.maxOffset = -1; 11 | this.minOffset = Long.MAX_VALUE; 12 | } 13 | 14 | public RangeContainer(String topic, int partition, long maxOffset, long minOffset) { 15 | super(topic, partition); 16 | this.maxOffset = maxOffset; 17 | this.minOffset = minOffset; 18 | } 19 | 20 | 21 | /** 22 | * This method will set min/max values for offsets 23 | * 24 | * @param offset 25 | */ 26 | public void defineInRange(long offset) { 27 | maxOffset = Long.max(maxOffset, offset); 28 | minOffset = Long.min(minOffset, offset); 29 | } 30 | 31 | public long getMaxOffset() { 32 | return maxOffset; 33 | } 34 | 35 | public long getMinOffset() { 36 | return minOffset; 37 | } 38 | 39 | public boolean isInRange(long offset) { 40 | if (offset >= minOffset && offset <= maxOffset) 41 | return true; 42 | return false; 43 | } 44 | 45 | /** 46 | * This compares the stored state with the actual state 47 | * @param rangeContainer A container with the actual state 48 | * @return The state of the comparison 49 | */ 50 | public RangeState getOverLappingState(RangeContainer rangeContainer) { 51 | long actualMinOffset = rangeContainer.getMinOffset(); 52 | long actualMaxOffset = rangeContainer.getMaxOffset(); 53 | 54 | // SAME State [0, 10] Actual [0, 10] 55 | if (actualMaxOffset == maxOffset && actualMinOffset <= minOffset) 56 | return RangeState.SAME; 57 | // NEW State [0, 10] Actual [11, 20] 58 | if (actualMinOffset > maxOffset) 59 | return RangeState.NEW; 60 | // CONTAINS [0, 10] Actual [1, 10] 61 | if (actualMaxOffset <= maxOffset && actualMinOffset >= minOffset) 62 | return RangeState.CONTAINS; 63 | // OVER_LAPPING 64 | if (actualMaxOffset > maxOffset) 65 | return RangeState.OVER_LAPPING; 66 | // ZEROED [10, 20] Actual [0, 10] 67 | if (actualMinOffset == 0) 68 | return RangeState.ZERO; 69 | // PREVIOUS [10, 20] Actual [5, 8] 70 | if (actualMaxOffset < minOffset) 71 | return RangeState.PREVIOUS; 72 | // ERROR [10, 20] Actual [8, 19] 73 | return RangeState.ERROR; 74 | } 75 | 76 | 77 | public RangeContainer getRangeContainer() { 78 | return this; 79 | } 80 | 81 | public String toString() { 82 | return "Topic: " + getTopic() + " Partition: " + getPartition() + " MinOffset: " + minOffset + " MaxOffset: " + maxOffset; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/kafka/RangeState.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.kafka; 2 | 3 | public enum RangeState { 4 | ZERO(0), //This is for when it seems like the topic has been deleted/recreated 5 | SAME(1), 6 | PREFIX(2), 7 | SUFFIX(3), 8 | CONTAINS(4), 9 | OVER_LAPPING(5), 10 | NEW(6), 11 | ERROR(7), 12 | PREVIOUS(8); 13 | 14 | 15 | private int rangeState; 16 | 17 | RangeState(int rangeState) { 18 | this.rangeState = rangeState; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/kafka/TopicPartitionContainer.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.kafka; 2 | 3 | public class TopicPartitionContainer { 4 | 5 | protected String topic; 6 | protected int partition; 7 | 8 | public TopicPartitionContainer(String topic, int partition) { 9 | this.topic = topic; 10 | this.partition = partition; 11 | } 12 | 13 | public String getTopic() { 14 | return topic; 15 | } 16 | 17 | public int getPartition() { 18 | return partition; 19 | } 20 | 21 | public String getTopicAndPartitionKey() { 22 | return String.format("%s-%d", topic, partition); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/state/State.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.state; 2 | 3 | public enum State { 4 | NONE(1), 5 | BEFORE_PROCESSING(2), 6 | IN_PROCESSING(3), 7 | AFTER_PROCESSING(4); 8 | 9 | 10 | private int state; 11 | 12 | State(int state) { 13 | this.state = state; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/state/StateProvider.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.state; 2 | 3 | public interface StateProvider { 4 | 5 | public StateRecord getStateRecord(String topic, int partition ); 6 | 7 | 8 | public void setStateRecord(StateRecord stateRecord); 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/state/StateRecord.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.state; 2 | 3 | import com.clickhouse.kafka.connect.sink.kafka.RangeContainer; 4 | 5 | import java.util.Objects; 6 | 7 | public class StateRecord extends RangeContainer { 8 | private State state; 9 | 10 | public StateRecord(String topic, int partition , long maxOffset, long minOffset, State state) { 11 | super(topic, partition, maxOffset, minOffset); 12 | this.state = state; 13 | } 14 | 15 | public State getState() { 16 | return state; 17 | } 18 | 19 | public void setState(State state) { 20 | this.state = state; 21 | } 22 | 23 | public boolean equals(Object o) { 24 | if (this == o) return true; 25 | if (!(o instanceof StateRecord)) return false; 26 | //if (!super.equals(o)) return false; //If we overrode it there 27 | 28 | StateRecord that = (StateRecord) o; 29 | 30 | return Objects.equals(this.topic, that.topic) 31 | && this.partition == that.partition 32 | && this.state == that.state 33 | && this.getMinOffset() == that.getMinOffset() 34 | && this.getMaxOffset() == that.getMaxOffset(); 35 | } 36 | 37 | public String toString() { 38 | return "StateRecord{" + 39 | "topic='" + topic + "'" + 40 | ", partition=" + partition + 41 | ", state='" + state + "'" + 42 | ", minOffset=" + getMinOffset() + 43 | ", maxOffset=" + getMaxOffset() + 44 | '}'; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/state/provider/InMemoryState.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.state.provider; 2 | 3 | import com.clickhouse.kafka.connect.sink.state.State; 4 | import com.clickhouse.kafka.connect.sink.state.StateProvider; 5 | import com.clickhouse.kafka.connect.sink.state.StateRecord; 6 | 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | public class InMemoryState implements StateProvider { 11 | 12 | private Map stateDB = null; 13 | public InMemoryState() { 14 | this.stateDB = new HashMap<>(10); 15 | } 16 | 17 | private String genKey(String topic, int partition) { 18 | return String.format("%s-%d", topic, partition); 19 | } 20 | @Override 21 | public StateRecord getStateRecord(String topic, int partition) { 22 | String key = genKey(topic, partition); 23 | if ( !stateDB.containsKey(key)) 24 | return new StateRecord(topic, partition, -1 , -1, State.NONE); 25 | return stateDB.get(key); 26 | } 27 | 28 | @Override 29 | public void setStateRecord(StateRecord stateRecord) { 30 | String key = genKey(stateRecord.getTopic(), stateRecord.getPartition()); 31 | stateDB.put(key, stateRecord); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/sink/state/provider/KeeperStateProvider.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.state.provider; 2 | 3 | import com.clickhouse.client.ClickHouseClient; 4 | import com.clickhouse.client.ClickHouseException; 5 | import com.clickhouse.client.ClickHouseNode; 6 | import com.clickhouse.client.ClickHouseNodeSelector; 7 | import com.clickhouse.client.ClickHouseProtocol; 8 | import com.clickhouse.client.ClickHouseResponse; 9 | import com.clickhouse.client.api.query.Records; 10 | import com.clickhouse.data.ClickHouseFormat; 11 | import com.clickhouse.data.ClickHouseRecord; 12 | import com.clickhouse.kafka.connect.sink.ClickHouseSinkConfig; 13 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 14 | import com.clickhouse.kafka.connect.sink.state.State; 15 | import com.clickhouse.kafka.connect.sink.state.StateProvider; 16 | import com.clickhouse.kafka.connect.sink.state.StateRecord; 17 | import com.clickhouse.kafka.connect.util.Mask; 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | import java.util.Map; 22 | import java.util.concurrent.ConcurrentHashMap; 23 | 24 | public class KeeperStateProvider implements StateProvider { 25 | 26 | private static final Logger LOGGER = LoggerFactory.getLogger(KeeperStateProvider.class); 27 | private ClickHouseNode server = null; 28 | private int pingTimeOut = 100; 29 | 30 | 31 | private ClickHouseHelperClient chc = null; 32 | private ClickHouseSinkConfig csc = null; 33 | 34 | private Map stateMap = null; 35 | 36 | public KeeperStateProvider(ClickHouseSinkConfig csc) { 37 | this.csc = csc; 38 | this.stateMap = new ConcurrentHashMap<>(); 39 | 40 | String hostname = csc.getHostname(); 41 | int port = csc.getPort(); 42 | String database = csc.getDatabase(); 43 | String username = csc.getUsername(); 44 | String password = csc.getPassword(); 45 | boolean sslEnabled = csc.isSslEnabled(); 46 | String jdbcConnectionProperties = csc.getJdbcConnectionProperties(); 47 | int timeout = csc.getTimeout(); 48 | String clientVersion = csc.getClientVersion(); 49 | boolean useClientV2 = clientVersion.equals("V1") ? false : true; 50 | LOGGER.info(String.format("hostname: [%s] port [%d] database [%s] username [%s] password [%s] sslEnabled [%s] timeout [%d]", hostname, port, database, username, Mask.passwordMask(password), sslEnabled, timeout)); 51 | 52 | chc = new ClickHouseHelperClient.ClickHouseClientBuilder(hostname, port, csc.getProxyType(), csc.getProxyHost(), csc.getProxyPort()) 53 | .setDatabase(database) 54 | .setUsername(username) 55 | .setPassword(password) 56 | .sslEnable(sslEnabled) 57 | .setJdbcConnectionProperties(jdbcConnectionProperties) 58 | .setTimeout(timeout) 59 | .setRetry(csc.getRetry()) 60 | .useClientV2(useClientV2) 61 | .build(); 62 | 63 | if (!chc.ping()) { 64 | LOGGER.error("Unable to ping Clickhouse server."); 65 | // TODO: exit 66 | } 67 | LOGGER.info("Ping is successful."); 68 | init(); 69 | } 70 | 71 | public KeeperStateProvider(ClickHouseHelperClient chc) { 72 | if (!chc.ping()) 73 | throw new RuntimeException("ping"); 74 | this.chc = chc; 75 | init(); 76 | } 77 | 78 | private void init() { 79 | String createTable = String.format("CREATE TABLE IF NOT EXISTS `%s`%s " + 80 | "(`key` String, minOffset BIGINT, maxOffset BIGINT, state String)" + 81 | " ENGINE=KeeperMap('%s') PRIMARY KEY `key`;", 82 | csc.getZkDatabase(), 83 | csc.getKeeperOnCluster().isEmpty() ? "" : " ON CLUSTER " + csc.getKeeperOnCluster(), 84 | csc.getZkPath()); 85 | // TODO: exec instead of query 86 | if (chc.isUseClientV2()) { 87 | chc.queryV2(createTable); 88 | } else { 89 | ClickHouseResponse r = chc.queryV1(createTable); 90 | r.close(); 91 | } 92 | } 93 | 94 | @Override 95 | public StateRecord getStateRecord(String topic, int partition) { 96 | String key = String.format("%s-%d", topic, partition); 97 | String selectStr = String.format("SELECT * FROM `%s` WHERE `key`= '%s'", csc.getZkDatabase(), key); 98 | try (ClickHouseClient client = ClickHouseClient.builder() 99 | .options(chc.getDefaultClientOptions()) 100 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 101 | .build(); 102 | ClickHouseResponse response = client.read(chc.getServer()) // or client.connect(endpoints) 103 | .format(ClickHouseFormat.RowBinaryWithNamesAndTypes) 104 | .query(selectStr) 105 | .executeAndWait()) { 106 | LOGGER.debug("return size: {}", response.getSummary().getReadRows()); 107 | long totalResultsFound = response.getSummary().getResultRows(); 108 | if ( totalResultsFound == 0) { 109 | LOGGER.info("Read state record: topic {} partition {} with NONE state", topic, partition); 110 | return new StateRecord(topic, partition, 0, 0, State.NONE); 111 | } else if(totalResultsFound > 1){ 112 | LOGGER.warn("There was more than 1 state records for query: {} ({} found)", selectStr, totalResultsFound); 113 | } 114 | 115 | ClickHouseRecord r = response.firstRecord(); 116 | long minOffset = r.getValue(1).asLong(); 117 | long maxOffset = r.getValue(2).asLong(); 118 | State state = State.valueOf(r.getValue(3).asString()); 119 | LOGGER.debug("read state record: topic {} partition {} with {} state max {} min {}", topic, partition, state, maxOffset, minOffset); 120 | 121 | StateRecord stateRecord = new StateRecord(topic, partition, maxOffset, minOffset, state); 122 | StateRecord storedRecord = stateMap.get(csc.getZkDatabase() + "-" + key); 123 | if (storedRecord != null && !stateRecord.equals(storedRecord)) { 124 | LOGGER.warn("State record is changed: {} -> {}", storedRecord, stateRecord); 125 | } else { 126 | LOGGER.debug("State record stored: {}", storedRecord); 127 | } 128 | return stateRecord; 129 | } catch (ClickHouseException e) { 130 | throw new RuntimeException(e); 131 | } 132 | } 133 | 134 | @Override 135 | public void setStateRecord(StateRecord stateRecord) { 136 | long minOffset = stateRecord.getMinOffset(); 137 | long maxOffset = stateRecord.getMaxOffset(); 138 | String key = stateRecord.getTopicAndPartitionKey(); 139 | String state = stateRecord.getState().toString(); 140 | String insertStr = String.format("INSERT INTO `%s` SETTINGS wait_for_async_insert=1 VALUES ('%s', %d, %d, '%s');", csc.getZkDatabase(), key, minOffset, maxOffset, state); 141 | LOGGER.info("Write state record: {}", stateRecord); 142 | if (chc.isUseClientV2()) { 143 | try (Records records = this.chc.queryV2(insertStr)) { 144 | LOGGER.debug("Number of written rows (V2) [{}]", records.getWrittenRows()); 145 | } catch (Exception e) { 146 | LOGGER.error("Failed to write state record: {}", stateRecord, e); 147 | throw new RuntimeException(e); 148 | } 149 | } else { 150 | ClickHouseResponse response = this.chc.queryV1(insertStr); 151 | LOGGER.debug("Number of written rows (V1) [{}]", response.getSummary().getWrittenRows()); 152 | response.close(); 153 | } 154 | 155 | stateMap.put(csc.getZkDatabase() + "-" + key, stateRecord); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/transforms/ExtractTopicConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.clickhouse.kafka.connect.transforms; 18 | 19 | import org.apache.kafka.common.config.AbstractConfig; 20 | import org.apache.kafka.common.config.ConfigDef; 21 | 22 | import java.util.Map; 23 | import java.util.Optional; 24 | 25 | class ExtractTopicConfig extends AbstractConfig { 26 | public static final String FIELD_NAME_CONFIG = "field.name"; 27 | private static final String FIELD_NAME_DOC = 28 | "The name of the field which should be used as the topic name. " 29 | + "If null or empty, the entire key or value is used (and assumed to be a string)."; 30 | 31 | public static final String SKIP_MISSING_OR_NULL_CONFIG = "skip.missing.or.null"; 32 | private static final String SKIP_MISSING_OR_NULL_DOC = 33 | "In case the source of the new topic name is null or missing, " 34 | + "should a record be silently passed without transformation."; 35 | 36 | ExtractTopicConfig(final Map originals) { 37 | super(config(), originals); 38 | } 39 | 40 | static ConfigDef config() { 41 | return new ConfigDef() 42 | .define( 43 | FIELD_NAME_CONFIG, 44 | ConfigDef.Type.STRING, 45 | null, 46 | ConfigDef.Importance.HIGH, 47 | FIELD_NAME_DOC) 48 | .define( 49 | SKIP_MISSING_OR_NULL_CONFIG, 50 | ConfigDef.Type.BOOLEAN, 51 | false, 52 | ConfigDef.Importance.LOW, 53 | SKIP_MISSING_OR_NULL_DOC); 54 | } 55 | 56 | Optional fieldName() { 57 | final String rawFieldName = getString(FIELD_NAME_CONFIG); 58 | if (null == rawFieldName || "".equals(rawFieldName)) { 59 | return Optional.empty(); 60 | } 61 | return Optional.of(rawFieldName); 62 | } 63 | 64 | boolean skipMissingOrNull() { 65 | return getBoolean(SKIP_MISSING_OR_NULL_CONFIG); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/transforms/KeyToValue.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.transforms; 2 | 3 | import org.apache.kafka.common.config.AbstractConfig; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | import org.apache.kafka.connect.connector.ConnectRecord; 6 | import org.apache.kafka.connect.data.Schema; 7 | import org.apache.kafka.connect.data.SchemaBuilder; 8 | import org.apache.kafka.connect.data.Struct; 9 | import org.apache.kafka.connect.transforms.Transformation; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.util.Map; 14 | 15 | public class KeyToValue> implements Transformation { 16 | private static final Logger LOGGER = LoggerFactory.getLogger(KeyToValue.class.getName()); 17 | public static final ConfigDef CONFIG_DEF = new ConfigDef().define("field", ConfigDef.Type.STRING, "_key", ConfigDef.Importance.LOW, 18 | "Field name on the record value to extract the record key into."); 19 | 20 | private String keyFieldName; 21 | private Schema valueSchema; 22 | 23 | @Override 24 | public void configure(Map configs) { 25 | final SimpleConfig config = new SimpleConfig(CONFIG_DEF, configs); 26 | keyFieldName = config.getString("field"); 27 | } 28 | 29 | @Override 30 | public R apply(R record) { 31 | LOGGER.debug("Old Key: {}, Old Value: {}", record.key(), record.value()); 32 | if (record.valueSchema() == null) { 33 | return applySchemaless(record); 34 | } else { 35 | return applyWithSchema(record); 36 | } 37 | } 38 | 39 | private R applySchemaless(R record) { 40 | if (!(record.value() instanceof Map)) { 41 | throw new IllegalArgumentException("Schemaless record value must be a Map - make sure you're using the JSON Converter for value."); 42 | } 43 | 44 | final Map value = (Map) record.value(); 45 | value.put(keyFieldName, record.key()); 46 | LOGGER.debug("New schemaless value: {}", value); 47 | return record.newRecord(record.topic(), record.kafkaPartition(), record.keySchema(), record.key(), record.valueSchema(), value, record.timestamp()); 48 | } 49 | 50 | private R applyWithSchema(R record) { 51 | final Struct oldValue = (Struct) record.value(); 52 | 53 | if (valueSchema == null) { 54 | final SchemaBuilder builder = SchemaBuilder.struct(); 55 | builder.name(oldValue.schema().name()); 56 | builder.version(oldValue.schema().version()); 57 | builder.doc(oldValue.schema().doc()); 58 | oldValue.schema().fields().forEach(f -> { 59 | builder.field(f.name(), f.schema()); 60 | }); 61 | builder.field(keyFieldName, record.keySchema() == null ? Schema.OPTIONAL_STRING_SCHEMA : record.keySchema()); 62 | valueSchema = builder.build(); 63 | valueSchema.schema().fields().forEach(f -> LOGGER.debug("Field: {}", f)); 64 | } 65 | 66 | Struct newValue = new Struct(valueSchema); 67 | valueSchema.fields().forEach(f -> { 68 | if (f.name().equals(keyFieldName)) { 69 | newValue.put(f, record.key()); 70 | } else { 71 | newValue.put(f, oldValue.get(f)); 72 | } 73 | }); 74 | LOGGER.debug("New schema value: {}", newValue); 75 | return record.newRecord(record.topic(), record.kafkaPartition(), record.keySchema(), record.key(), valueSchema, newValue, record.timestamp()); 76 | } 77 | 78 | @Override 79 | public ConfigDef config() { 80 | return CONFIG_DEF; 81 | } 82 | 83 | @Override 84 | public void close() { 85 | valueSchema = null; 86 | } 87 | 88 | public static class SimpleConfig extends AbstractConfig { 89 | public SimpleConfig(ConfigDef configDef, Map originals) { 90 | super(configDef, originals, false); 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/Mask.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util; 2 | 3 | public class Mask { 4 | 5 | public static String passwordMask(String password) { 6 | if (password.length() <= 6) { 7 | return "*".repeat(password.length()); 8 | } 9 | String tmpPassword = "***" + password.substring(3, password.length() - 3) + "***"; 10 | return tmpPassword; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/QueryIdentifier.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | public class QueryIdentifier { 7 | private static final Logger LOGGER = LoggerFactory.getLogger(QueryIdentifier.class); 8 | private final String topic; 9 | private final int partition; 10 | private final long minOffset; 11 | private final long maxOffset; 12 | private final String queryId; 13 | 14 | public QueryIdentifier(String topic, String queryId) { 15 | this.topic = topic; 16 | this.queryId = queryId; 17 | 18 | int INVALID = -1; 19 | this.partition = INVALID; 20 | this.minOffset = INVALID; 21 | this.maxOffset = INVALID; 22 | } 23 | public QueryIdentifier(String topic, int partition, long minOffset, long maxOffset, String queryId) { 24 | this.topic = topic; 25 | this.partition = partition; 26 | this.minOffset = minOffset; 27 | this.maxOffset = maxOffset; 28 | this.queryId = queryId; 29 | } 30 | 31 | public String toString() { 32 | if (partition == -1) { 33 | return String.format("Topic: [%s], (QueryId: [%s])", topic, queryId); 34 | } 35 | 36 | return String.format("Topic: [%s], Partition: [%s], MinOffset: [%s], MaxOffset: [%s], (QueryId: [%s])", 37 | topic, partition, minOffset, maxOffset, queryId); 38 | } 39 | 40 | public String getQueryId() { 41 | return queryId; 42 | } 43 | public String getTopic() { 44 | return topic; 45 | } 46 | public int getPartition() { 47 | return partition; 48 | } 49 | public long getMinOffset() { 50 | return minOffset; 51 | } 52 | public long getMaxOffset() { 53 | return maxOffset; 54 | } 55 | 56 | public String getDeduplicationToken() { 57 | if (partition == -1) { 58 | return null; 59 | } 60 | return String.format("%s-%s-%s-%s", topic, partition, minOffset, maxOffset); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/Utils.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util; 2 | 3 | import com.clickhouse.client.ClickHouseException; 4 | import com.clickhouse.kafka.connect.sink.data.Record; 5 | import com.clickhouse.kafka.connect.sink.dlq.ErrorReporter; 6 | import org.apache.kafka.connect.errors.DataException; 7 | import org.apache.kafka.connect.errors.RetriableException; 8 | import org.apache.kafka.connect.sink.SinkRecord; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.io.IOException; 13 | import java.net.SocketTimeoutException; 14 | import java.net.UnknownHostException; 15 | import java.util.ArrayList; 16 | import java.util.Collection; 17 | import java.util.List; 18 | import java.util.Map; 19 | 20 | public class Utils { 21 | 22 | public static String escapeName(String topic) { 23 | String cleanTopic = topic.replace("`", ""); 24 | return String.format("`%s`", cleanTopic); 25 | } 26 | 27 | public static String escapeTableName(String database, String topicName) { 28 | return escapeName(database) + "." + escapeName(topicName); 29 | } 30 | 31 | private static final Logger LOGGER = LoggerFactory.getLogger(Utils.class); 32 | 33 | public static Exception getRootCause(Exception e) { 34 | return getRootCause(e, false); 35 | } 36 | 37 | /** 38 | * This will drill down to the first ClickHouseException in the exception chain 39 | * 40 | * @param e Exception to drill down 41 | * @return ClickHouseException or null if none found 42 | */ 43 | public static Exception getRootCause(Exception e, Boolean prioritizeClickHouseException) { 44 | if (e == null) 45 | return null; 46 | 47 | Throwable runningException = e;//We have to use Throwable because of the getCause() signature 48 | while (runningException.getCause() != null && 49 | (!prioritizeClickHouseException || !(runningException instanceof ClickHouseException))) { 50 | LOGGER.trace("Found exception: {}", runningException.getLocalizedMessage()); 51 | runningException = runningException.getCause(); 52 | } 53 | 54 | return runningException instanceof Exception ? (Exception) runningException : null; 55 | } 56 | 57 | 58 | /** 59 | * This method checks to see if we should retry, otherwise it just throws the exception again 60 | * 61 | * @param e Exception to check 62 | */ 63 | 64 | public static void handleException(Exception e, boolean errorsTolerance, Collection records) { 65 | LOGGER.warn("Deciding how to handle exception: {}", e.getLocalizedMessage()); 66 | 67 | //Let's check if we have a ClickHouseException to reference the error code 68 | //https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ErrorCodes.cpp 69 | Exception rootCause = Utils.getRootCause(e, true); 70 | if (rootCause instanceof ClickHouseException) { 71 | ClickHouseException clickHouseException = (ClickHouseException) rootCause; 72 | LOGGER.warn("ClickHouseException code: {}", clickHouseException.getErrorCode()); 73 | switch (clickHouseException.getErrorCode()) { 74 | case 3: // UNEXPECTED_END_OF_FILE 75 | case 107: // FILE_DOESNT_EXIST 76 | case 159: // TIMEOUT_EXCEEDED 77 | case 164: // READONLY 78 | case 202: // TOO_MANY_SIMULTANEOUS_QUERIES 79 | case 203: // NO_FREE_CONNECTION 80 | case 209: // SOCKET_TIMEOUT 81 | case 210: // NETWORK_ERROR 82 | case 241: // MEMORY_LIMIT_EXCEEDED 83 | case 242: // TABLE_IS_READ_ONLY 84 | case 252: // TOO_MANY_PARTS 85 | case 285: // TOO_FEW_LIVE_REPLICAS 86 | case 319: // UNKNOWN_STATUS_OF_INSERT 87 | case 425: // SYSTEM_ERROR 88 | case 999: // KEEPER_EXCEPTION 89 | throw new RetriableException(e); 90 | default: 91 | LOGGER.error("Error code [{}] wasn't in the acceptable list.", clickHouseException.getErrorCode()); 92 | break; 93 | } 94 | } 95 | 96 | //High-Level Explicit Exception Checking 97 | if (e instanceof DataException && !errorsTolerance) { 98 | LOGGER.warn("DataException thrown, wrapping exception: {}", e.getLocalizedMessage()); 99 | throw (DataException) e; 100 | } 101 | 102 | //Otherwise use Root-Cause Exception Checking 103 | if (rootCause instanceof SocketTimeoutException) { 104 | LOGGER.warn("SocketTimeoutException thrown, wrapping exception: {}", e.getLocalizedMessage()); 105 | throw new RetriableException(e); 106 | } else if (rootCause instanceof UnknownHostException) { 107 | LOGGER.warn("UnknownHostException thrown, wrapping exception: {}", e.getLocalizedMessage()); 108 | throw new RetriableException(e); 109 | } else if (rootCause instanceof IOException) { 110 | final String msg = rootCause.getMessage(); 111 | if (msg.indexOf(CLICKHOUSE_CLIENT_ERROR_READ_TIMEOUT_MSG) == 0 || msg.indexOf(CLICKHOUSE_CLIENT_ERROR_WRITE_TIMEOUT_MSG) == 0) { 112 | LOGGER.warn("IOException thrown, wrapping exception: {}", e.getLocalizedMessage()); 113 | throw new RetriableException(e); 114 | } 115 | } 116 | 117 | if (errorsTolerance) {//Right now this is all exceptions - should we restrict to just ClickHouseExceptions? 118 | LOGGER.warn("Errors tolerance is enabled, ignoring exception: {}", e.getLocalizedMessage()); 119 | } else { 120 | LOGGER.error("Errors tolerance is disabled, wrapping exception: {}", e.getLocalizedMessage()); 121 | if (records != null) { 122 | throw new RuntimeException(String.format("Number of records: %d", records.size()), e); 123 | } else { 124 | throw new RuntimeException("Records was null", e); 125 | } 126 | 127 | } 128 | } 129 | 130 | private static final String CLICKHOUSE_CLIENT_ERROR_READ_TIMEOUT_MSG = "Read timed out after"; 131 | private static final String CLICKHOUSE_CLIENT_ERROR_WRITE_TIMEOUT_MSG = "Write timed out after"; 132 | 133 | public static void sendTODlq(ErrorReporter errorReporter, Record record, Exception exception) { 134 | sendTODlq(errorReporter, record.getSinkRecord(), exception); 135 | } 136 | 137 | public static void sendTODlq(ErrorReporter errorReporter, SinkRecord record, Exception exception) { 138 | if (errorReporter != null && record != null) { 139 | errorReporter.report(record, exception); 140 | } 141 | } 142 | 143 | public static String getTableName(String database, String topicName, Map topicToTableMap) { 144 | String tableName = topicToTableMap.get(topicName); 145 | LOGGER.debug("Topic name: {}, Table Name: {}", topicName, tableName); 146 | if (tableName == null) { 147 | tableName = topicName; 148 | } 149 | 150 | return escapeTableName(database, tableName); 151 | } 152 | 153 | 154 | public static String getOffsets(Collection records) { 155 | long minOffset = Long.MAX_VALUE; 156 | long maxOffset = -1; 157 | 158 | for (SinkRecord record : records) { 159 | if (record.kafkaOffset() > maxOffset) { 160 | maxOffset = record.kafkaOffset(); 161 | } 162 | if (record.kafkaOffset() < minOffset) { 163 | minOffset = record.kafkaOffset(); 164 | } 165 | } 166 | 167 | return String.format("minOffset: %d, maxOffset: %d", minOffset, maxOffset); 168 | } 169 | 170 | public static List splitIgnoringQuotes(String input, char separator) { 171 | List result = new ArrayList<>(); 172 | StringBuilder sb = new StringBuilder(); 173 | boolean inSingleQuotes = false; 174 | boolean inDoubleQuotes = false; 175 | 176 | for (char c : input.toCharArray()) { 177 | if (c == '\'' && !inDoubleQuotes) { 178 | inSingleQuotes = !inSingleQuotes; 179 | sb.append(c); 180 | } else if (c == '"' && !inSingleQuotes) { 181 | inDoubleQuotes = !inDoubleQuotes; 182 | sb.append(c); 183 | } else if (c == separator && !inSingleQuotes && !inDoubleQuotes) { 184 | result.add(sb.toString().trim()); 185 | sb.setLength(0); 186 | } else { 187 | sb.append(c); 188 | } 189 | } 190 | result.add(sb.toString().trim()); 191 | 192 | return result; 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/jmx/ExecutionTimer.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.jmx; 2 | 3 | public class ExecutionTimer { 4 | private final long startTime; 5 | 6 | private ExecutionTimer() { 7 | this.startTime = System.nanoTime(); 8 | } 9 | 10 | public static ExecutionTimer start() { 11 | return new ExecutionTimer(); 12 | } 13 | 14 | public long nanosElapsed() { 15 | return System.nanoTime() - this.startTime; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/jmx/MBeanServerUtils.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.jmx; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import javax.management.InstanceAlreadyExistsException; 7 | import javax.management.MBeanServer; 8 | import javax.management.ObjectName; 9 | import java.lang.management.ManagementFactory; 10 | 11 | public final class MBeanServerUtils { 12 | private static final Logger LOGGER = LoggerFactory.getLogger(MBeanServerUtils.class); 13 | 14 | private MBeanServerUtils() { 15 | 16 | } 17 | 18 | public static T registerMBean(final T mBean, final String mBeanName) { 19 | MBeanServer server = ManagementFactory.getPlatformMBeanServer(); 20 | try { 21 | server.registerMBean(mBean, new ObjectName(mBeanName)); 22 | return mBean; 23 | } catch (InstanceAlreadyExistsException e) { 24 | throw new RuntimeException(e); 25 | } catch (Exception e) { 26 | // JMX might not be available 27 | LOGGER.warn("Unable to register MBean " + mBeanName, e); 28 | return mBean; 29 | } 30 | } 31 | public static void unregisterMBean(final String mBeanName) { 32 | MBeanServer server = ManagementFactory.getPlatformMBeanServer(); 33 | try { 34 | ObjectName objectName = new ObjectName(mBeanName); 35 | if (server.isRegistered(objectName)) { 36 | server.unregisterMBean(objectName); 37 | } 38 | } catch (Exception e) { 39 | // JMX might not be available 40 | LOGGER.warn("Unable to unregister MBean " + mBeanName, e); 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/jmx/SinkTaskStatistics.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.jmx; 2 | 3 | public class SinkTaskStatistics implements SinkTaskStatisticsMBean { 4 | private volatile long receivedRecords; 5 | private volatile long recordProcessingTime; 6 | private volatile long taskProcessingTime; 7 | @Override 8 | public long getReceivedRecords() { 9 | return receivedRecords; 10 | } 11 | 12 | @Override 13 | public long getRecordProcessingTime() { 14 | return recordProcessingTime; 15 | } 16 | 17 | @Override 18 | public long getTaskProcessingTime() { 19 | return taskProcessingTime; 20 | } 21 | 22 | public void receivedRecords(final int n ) { 23 | this.receivedRecords += n; 24 | } 25 | 26 | public void recordProcessingTime(ExecutionTimer timer) { 27 | this.recordProcessingTime += timer.nanosElapsed(); 28 | } 29 | 30 | public void taskProcessingTime(ExecutionTimer timer) { 31 | this.taskProcessingTime += timer.nanosElapsed(); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/jmx/SinkTaskStatisticsMBean.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.jmx; 2 | 3 | public interface SinkTaskStatisticsMBean { 4 | 5 | long getReceivedRecords(); 6 | 7 | long getRecordProcessingTime(); 8 | 9 | long getTaskProcessingTime(); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/reactor/function/Tuple2.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.reactor.function; 2 | 3 | import java.io.Serializable; 4 | import java.util.Arrays; 5 | import java.util.Collections; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.Objects; 9 | import java.util.function.Function; 10 | 11 | /** 12 | * A tuple that holds two non-null values. 13 | * 14 | * @param The type of the first non-null value held by this tuple 15 | * @param The type of the second non-null value held by this tuple 16 | * @author Jon Brisbin 17 | * @author Stephane Maldini 18 | */ 19 | @SuppressWarnings("rawtypes") 20 | public class Tuple2 implements Iterable, Serializable { 21 | 22 | private static final long serialVersionUID = -3518082018884860684L; 23 | 24 | final T1 t1; 25 | final T2 t2; 26 | 27 | Tuple2(T1 t1, T2 t2) { 28 | this.t1 = Objects.requireNonNull(t1, "t1"); 29 | this.t2 = Objects.requireNonNull(t2, "t2"); 30 | } 31 | 32 | /** 33 | * Type-safe way to get the first object of this {@link Tuples}. 34 | * 35 | * @return The first object 36 | */ 37 | public T1 getT1() { 38 | return t1; 39 | } 40 | 41 | /** 42 | * Type-safe way to get the second object of this {@link Tuples}. 43 | * 44 | * @return The second object 45 | */ 46 | public T2 getT2() { 47 | return t2; 48 | } 49 | 50 | /** 51 | * Map the left-hand part (T1) of this {@link reactor.util.function.Tuple2} into a different value and type, 52 | * keeping the right-hand part (T2). 53 | * 54 | * @param mapper the mapping {@link Function} for the left-hand part 55 | * @param the new type for the left-hand part 56 | * @return a new {@link reactor.util.function.Tuple2} with a different left (T1) value 57 | */ 58 | public com.clickhouse.kafka.connect.util.reactor.function.Tuple2 mapT1(Function mapper) { 59 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple2<>(mapper.apply(t1), t2); 60 | } 61 | 62 | /** 63 | * Map the right-hand part (T2) of this {@link reactor.util.function.Tuple2} into a different value and type, 64 | * keeping the left-hand part (T1). 65 | * 66 | * @param mapper the mapping {@link Function} for the right-hand part 67 | * @param the new type for the right-hand part 68 | * @return a new {@link reactor.util.function.Tuple2} with a different right (T2) value 69 | */ 70 | public com.clickhouse.kafka.connect.util.reactor.function.Tuple2 mapT2(Function mapper) { 71 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple2<>(t1, mapper.apply(t2)); 72 | } 73 | 74 | /** 75 | * Get the object at the given index. 76 | * 77 | * @param index The index of the object to retrieve. Starts at 0. 78 | * @return The object or {@literal null} if out of bounds. 79 | */ 80 | public Object get(int index) { 81 | switch (index) { 82 | case 0: 83 | return t1; 84 | case 1: 85 | return t2; 86 | default: 87 | return null; 88 | } 89 | } 90 | 91 | /** 92 | * Turn this {@code Tuple} into a {@link List List<Object>}. 93 | * The list isn't tied to this Tuple but is a copy with limited 94 | * mutability ({@code add} and {@code remove} are not supported, but {@code set} is). 95 | * 96 | * @return A copy of the tuple as a new {@link List List<Object>}. 97 | */ 98 | public List toList() { 99 | return Arrays.asList(toArray()); 100 | } 101 | 102 | /** 103 | * Turn this {@code Tuple} into a plain {@code Object[]}. 104 | * The array isn't tied to this Tuple but is a copy. 105 | * 106 | * @return A copy of the tuple as a new {@link Object Object[]}. 107 | */ 108 | public Object[] toArray() { 109 | return new Object[]{t1, t2}; 110 | } 111 | 112 | /** 113 | * Return an immutable {@link Iterator Iterator<Object>} around 114 | * the content of this {@code Tuple}. 115 | * 116 | * @implNote As an {@link Iterator} is always tied to its {@link Iterable} source by 117 | * definition, the iterator cannot be mutable without the iterable also being mutable. 118 | * Since {@link Tuples} are immutable, so is the {@link Iterator} 119 | * returned by this method. 120 | * 121 | * @return An unmodifiable {@link Iterator} over the elements in this Tuple. 122 | */ 123 | @Override 124 | public Iterator iterator() { 125 | return Collections.unmodifiableList(toList()).iterator(); 126 | } 127 | 128 | @Override 129 | public boolean equals(Object o) { 130 | if (this == o) { 131 | return true; 132 | } 133 | if (o == null || getClass() != o.getClass()) { 134 | return false; 135 | } 136 | 137 | com.clickhouse.kafka.connect.util.reactor.function.Tuple2 tuple2 = (com.clickhouse.kafka.connect.util.reactor.function.Tuple2) o; 138 | 139 | return t1.equals(tuple2.t1) && t2.equals(tuple2.t2); 140 | 141 | } 142 | 143 | @Override 144 | public int hashCode() { 145 | int result = size(); 146 | result = 31 * result + t1.hashCode(); 147 | result = 31 * result + t2.hashCode(); 148 | return result; 149 | } 150 | 151 | /** 152 | * Return the number of elements in this {@literal Tuples}. 153 | * 154 | * @return The size of this {@literal Tuples}. 155 | */ 156 | public int size() { 157 | return 2; 158 | } 159 | 160 | /** 161 | * A Tuple String representation is the comma separated list of values, enclosed 162 | * in square brackets. 163 | * @return the Tuple String representation 164 | */ 165 | @Override 166 | public final String toString() { 167 | return Tuples.tupleStringRepresentation(toArray()).insert(0, '[').append(']').toString(); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/reactor/function/Tuple3.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.util.reactor.function; 2 | 3 | import java.util.Objects; 4 | import java.util.function.Function; 5 | 6 | /** 7 | * A tuple that holds three non-null values. 8 | * 9 | * @param The type of the first non-null value held by this tuple 10 | * @param The type of the second non-null value held by this tuple 11 | * @param The type of the third non-null value held by this tuple 12 | * @author Jon Brisbin 13 | * @author Stephane Maldini 14 | */ 15 | public class Tuple3 extends Tuple2 { 16 | 17 | private static final long serialVersionUID = -4430274211524723033L; 18 | 19 | final T3 t3; 20 | 21 | Tuple3(T1 t1, T2 t2, T3 t3) { 22 | super(t1, t2); 23 | this.t3 = Objects.requireNonNull(t3, "t3"); 24 | } 25 | 26 | /** 27 | * Type-safe way to get the third object of this {@link Tuples}. 28 | * 29 | * @return The third object 30 | */ 31 | public T3 getT3() { 32 | return t3; 33 | } 34 | 35 | /** 36 | * Map the 1st part (T1) of this {@link reactor.util.function.Tuple3} into a different value and type, 37 | * keeping the other parts. 38 | * 39 | * @param mapper the mapping {@link Function} for the T1 part 40 | * @param the new type for the T1 part 41 | * @return a new {@link reactor.util.function.Tuple3} with a different T1 value 42 | */ 43 | public com.clickhouse.kafka.connect.util.reactor.function.Tuple3 mapT1(Function mapper) { 44 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple3<>(mapper.apply(t1), t2, t3); 45 | } 46 | 47 | /** 48 | * Map the 2nd part (T2) of this {@link reactor.util.function.Tuple3} into a different value and type, 49 | * keeping the other parts. 50 | * 51 | * @param mapper the mapping {@link Function} for the T2 part 52 | * @param the new type for the T2 part 53 | * @return a new {@link reactor.util.function.Tuple3} with a different T2 value 54 | */ 55 | public com.clickhouse.kafka.connect.util.reactor.function.Tuple3 mapT2(Function mapper) { 56 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple3<>(t1, mapper.apply(t2), t3); 57 | } 58 | 59 | /** 60 | * Map the 3rd part (T3) of this {@link reactor.util.function.Tuple3} into a different value and type, 61 | * keeping the other parts. 62 | * 63 | * @param mapper the mapping {@link Function} for the T3 part 64 | * @param the new type for the T3 part 65 | * @return a new {@link reactor.util.function.Tuple3} with a different T3 value 66 | */ 67 | public com.clickhouse.kafka.connect.util.reactor.function.Tuple3 mapT3(Function mapper) { 68 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple3<>(t1, t2, mapper.apply(t3)); 69 | } 70 | 71 | @Override 72 | public Object get(int index) { 73 | switch (index) { 74 | case 0: 75 | return t1; 76 | case 1: 77 | return t2; 78 | case 2: 79 | return t3; 80 | default: 81 | return null; 82 | } 83 | } 84 | 85 | @Override 86 | public Object[] toArray() { 87 | return new Object[]{t1, t2, t3}; 88 | } 89 | 90 | @Override 91 | public boolean equals(Object o) { 92 | if (this == o) return true; 93 | if (!(o instanceof com.clickhouse.kafka.connect.util.reactor.function.Tuple3)) return false; 94 | if (!super.equals(o)) return false; 95 | 96 | @SuppressWarnings("rawtypes") 97 | com.clickhouse.kafka.connect.util.reactor.function.Tuple3 tuple3 = (com.clickhouse.kafka.connect.util.reactor.function.Tuple3) o; 98 | 99 | return t3.equals(tuple3.t3); 100 | } 101 | 102 | @Override 103 | public int size() { 104 | return 3; 105 | } 106 | 107 | @Override 108 | public int hashCode() { 109 | int result = super.hashCode(); 110 | result = 31 * result + t3.hashCode(); 111 | return result; 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/main/java/com/clickhouse/kafka/connect/util/reactor/function/Tuples.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2021 VMware Inc. or its affiliates, All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.clickhouse.kafka.connect.util.reactor.function; 18 | 19 | import java.util.Collection; 20 | import java.util.function.Function; 21 | 22 | /** 23 | * A {@literal Tuples} is an immutable {@link Collection} of objects, each of which can be of an arbitrary type. 24 | * 25 | * @author Jon Brisbin 26 | * @author Stephane Maldini 27 | */ 28 | @SuppressWarnings({"rawtypes"}) 29 | public abstract class Tuples implements Function { 30 | 31 | /** 32 | * Create a {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2} with the given array if it is small 33 | * enough to fit inside a {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2} to {@link Tuple3}. 34 | * 35 | * @param list the content of the Tuple (size 1 to 8) 36 | * @return The new {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2}. 37 | * @throws IllegalArgumentException if the array is not of length 1-8 38 | */ 39 | public static com.clickhouse.kafka.connect.util.reactor.function.Tuple2 fromArray(Object[] list) { 40 | //noinspection ConstantConditions 41 | if (list == null || list.length < 2) { 42 | throw new IllegalArgumentException("null or too small array, need between 2 and 8 values"); 43 | } 44 | 45 | switch (list.length){ 46 | case 2: 47 | return of(list[0], list[1]); 48 | case 3: 49 | return of(list[0], list[1], list[2]); 50 | } 51 | throw new IllegalArgumentException("too many arguments ("+ list.length + "), need between 2 and 8 values"); 52 | } 53 | 54 | /** 55 | * Create a {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2} with the given objects. 56 | * 57 | * @param t1 The first value in the tuple. Not null. 58 | * @param t2 The second value in the tuple. Not null. 59 | * @param The type of the first value. 60 | * @param The type of the second value. 61 | * @return The new {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2}. 62 | */ 63 | public static com.clickhouse.kafka.connect.util.reactor.function.Tuple2 of(T1 t1, T2 t2) { 64 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple2<>(t1, t2); 65 | } 66 | 67 | /** 68 | * Create a {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple3} with the given objects. 69 | * 70 | * @param t1 The first value in the tuple. Not null. 71 | * @param t2 The second value in the tuple. Not null. 72 | * @param t3 The third value in the tuple. Not null. 73 | * @param The type of the first value. 74 | * @param The type of the second value. 75 | * @param The type of the third value. 76 | * @return The new {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple3}. 77 | */ 78 | public static com.clickhouse.kafka.connect.util.reactor.function.Tuple3 of(T1 t1, T2 t2, T3 t3) { 79 | return new com.clickhouse.kafka.connect.util.reactor.function.Tuple3<>(t1, t2, t3); 80 | } 81 | 82 | /** 83 | * A converting function from Object array to {@link Tuples} 84 | * 85 | * @return The unchecked conversion function to {@link Tuples}. 86 | */ 87 | @SuppressWarnings("unchecked") 88 | public static Function fnAny() { 89 | return empty; 90 | } 91 | 92 | /** 93 | * A converting function from Object array to {@link Tuples} to R. 94 | * 95 | * @param The type of the return value. 96 | * @param delegate the function to delegate to 97 | * 98 | * @return The unchecked conversion function to R. 99 | */ 100 | public static Function fnAny(final Function delegate) { 101 | return objects -> delegate.apply(Tuples.fnAny().apply(objects)); 102 | } 103 | 104 | /** 105 | * A converting function from Object array to {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2} 106 | * 107 | * @param The type of the first value. 108 | * @param The type of the second value. 109 | * 110 | * @return The unchecked conversion function to {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple2}. 111 | */ 112 | @SuppressWarnings("unchecked") 113 | public static Function> fn2() { 114 | return empty; 115 | } 116 | 117 | 118 | /** 119 | * A converting function from Object array to {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple3} 120 | * 121 | * @param The type of the first value. 122 | * @param The type of the second value. 123 | * @param The type of the third value. 124 | * 125 | * @return The unchecked conversion function to {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple3}. 126 | */ 127 | @SuppressWarnings("unchecked") 128 | public static Function> fn3() { 129 | return empty; 130 | } 131 | 132 | /** 133 | * A converting function from Object array to {@link com.clickhouse.kafka.connect.util.reactor.function.Tuple3} to R. 134 | * 135 | * @param The type of the first value. 136 | * @param The type of the second value. 137 | * @param The type of the third value. 138 | * @param The type of the return value. 139 | * @param delegate the function to delegate to 140 | * 141 | * @return The unchecked conversion function to R. 142 | */ 143 | public static Function fn3(final Function, R> delegate) { 144 | return objects -> delegate.apply(Tuples.fn3().apply(objects)); 145 | } 146 | 147 | @Override 148 | public Tuple2 apply(Object o) { 149 | return fromArray((Object[])o); 150 | } 151 | 152 | /** 153 | * Prepare a string representation of the values suitable for a Tuple of any 154 | * size by accepting an array of elements. This builds a {@link StringBuilder} 155 | * containing the String representation of each object, comma separated. It manages 156 | * nulls as well by putting an empty string and the comma. 157 | * 158 | * @param values the values of the tuple to represent 159 | * @return a {@link StringBuilder} initialized with the string representation of the 160 | * values in the Tuple. 161 | */ 162 | static StringBuilder tupleStringRepresentation(Object... values) { 163 | StringBuilder sb = new StringBuilder(); 164 | for (int i = 0; i < values.length; i++) { 165 | Object t = values[i]; 166 | if (i != 0) { 167 | sb.append(','); 168 | } 169 | if (t != null) { 170 | sb.append(t); 171 | } 172 | } 173 | return sb; 174 | } 175 | 176 | 177 | static final Tuples empty = new Tuples(){}; 178 | 179 | Tuples(){} 180 | } 181 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.apache.kafka.connect.sink.SinkConnector: -------------------------------------------------------------------------------- 1 | com.clickhouse.kafka.connect.ClickHouseSinkConnector 2 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.apache.kafka.connect.transforms.Transformation: -------------------------------------------------------------------------------- 1 | com.clickhouse.kafka.connect.transforms.ExtractTopic$Value 2 | com.clickhouse.kafka.connect.transforms.ExtractTopic$Key 3 | com.clickhouse.kafka.connect.transforms.KeyToValue 4 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/ClickHouseSinkTaskTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink; 2 | 3 | import com.clickhouse.client.ClickHouseClient; 4 | import com.clickhouse.client.ClickHouseException; 5 | import com.clickhouse.client.ClickHouseNodeSelector; 6 | import com.clickhouse.client.ClickHouseProtocol; 7 | import com.clickhouse.client.ClickHouseResponse; 8 | import com.clickhouse.client.ClickHouseResponseSummary; 9 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 10 | import com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers; 11 | import com.clickhouse.kafka.connect.sink.helper.SchemalessTestData; 12 | import com.google.gson.Gson; 13 | import com.google.gson.reflect.TypeToken; 14 | import org.apache.kafka.common.record.TimestampType; 15 | import org.apache.kafka.connect.sink.SinkRecord; 16 | import org.junit.jupiter.api.Test; 17 | 18 | import java.io.PrintWriter; 19 | import java.io.StringWriter; 20 | import java.util.ArrayList; 21 | import java.util.Collection; 22 | import java.util.HashMap; 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.stream.LongStream; 26 | 27 | import static org.junit.jupiter.api.Assertions.*; 28 | 29 | public class ClickHouseSinkTaskTest extends ClickHouseBase { 30 | 31 | public static final int DEFAULT_TOTAL_RECORDS = 1000; 32 | public Collection createDBTopicSplit(int dbRange, long timeStamp, String topic, int partition, String splitChar) { 33 | Gson gson = new Gson(); 34 | List array = new ArrayList<>(); 35 | LongStream.range(0, dbRange).forEachOrdered(i -> { 36 | String newTopic = i + "_" + timeStamp + splitChar + topic ; 37 | LongStream.range(0, DEFAULT_TOTAL_RECORDS).forEachOrdered(n -> { 38 | Map value_struct = new HashMap<>(); 39 | value_struct.put("str", "num" + n); 40 | value_struct.put("off16", (short)n); 41 | value_struct.put("p_int8", (byte)n); 42 | value_struct.put("p_int16", (short)n); 43 | value_struct.put("p_int32", (int)n); 44 | value_struct.put("p_int64", (long)n); 45 | value_struct.put("p_float32", (float)n*1.1); 46 | value_struct.put("p_float64", (double)n*1.111111); 47 | value_struct.put("p_bool", (boolean)true); 48 | 49 | java.lang.reflect.Type gsonType = new TypeToken() { 50 | }.getType(); 51 | String gsonString = gson.toJson(value_struct, gsonType); 52 | 53 | SinkRecord sr = new SinkRecord( 54 | newTopic, 55 | partition, 56 | null, 57 | null, null, 58 | gsonString, 59 | n, 60 | System.currentTimeMillis(), 61 | TimestampType.CREATE_TIME 62 | ); 63 | array.add(sr); 64 | }); 65 | }); 66 | 67 | 68 | 69 | 70 | return array; 71 | } 72 | @Test 73 | public void testExceptionHandling() { 74 | ClickHouseSinkTask task = new ClickHouseSinkTask(); 75 | assertThrows(RuntimeException.class, () -> task.put(null)); 76 | try { 77 | task.put(null); 78 | } catch (Exception e) { 79 | assertEquals(e.getClass(), RuntimeException.class); 80 | assertTrue(e.getCause() instanceof NullPointerException); 81 | 82 | StringWriter sw = new StringWriter(); 83 | PrintWriter pw = new PrintWriter(sw); 84 | e.printStackTrace(pw); 85 | assertTrue(sw.toString().contains("com.clickhouse.kafka.connect.util.Utils.handleException")); 86 | } 87 | } 88 | 89 | public ClickHouseResponseSummary dropTable(ClickHouseHelperClient chc, String tableName) { 90 | String dropTable = String.format("DROP TABLE IF EXISTS %s", tableName); 91 | try (ClickHouseClient client = ClickHouseClient.builder() 92 | .options(chc.getDefaultClientOptions()) 93 | .nodeSelector(ClickHouseNodeSelector.of(ClickHouseProtocol.HTTP)) 94 | .build(); 95 | ClickHouseResponse response = client.read(chc.getServer()) 96 | .query(dropTable) 97 | .executeAndWait()) { 98 | return response.getSummary(); 99 | } catch (ClickHouseException e) { 100 | throw new RuntimeException(e); 101 | } 102 | } 103 | 104 | // @Test TODO: Fix this test 105 | public void testDBTopicSplit() { 106 | Map props = createProps(); 107 | props.put(ClickHouseSinkConfig.ENABLE_DB_TOPIC_SPLIT, "true"); 108 | props.put(ClickHouseSinkConfig.DB_TOPIC_SPLIT_CHAR, "."); 109 | long timeStamp = System.currentTimeMillis(); 110 | createClient(props, false); 111 | String tableName = createTopicName("splitTopic"); 112 | int dbRange = 10; 113 | LongStream.range(0, dbRange).forEachOrdered(i -> { 114 | String databaseName = String.format("%d_%d" , i, timeStamp); 115 | String tmpTableName = String.format("`%s`.`%s`", databaseName, tableName); 116 | dropTable(chc, tmpTableName); 117 | createDatabase(databaseName); 118 | createTable(chc, tmpTableName, "CREATE TABLE %s ( `off16` Int16, `str` String, `p_int8` Int8, `p_int16` Int16, `p_int32` Int32, `p_int64` Int64, `p_float32` Float32, `p_float64` Float64, `p_bool` Bool) Engine = MergeTree ORDER BY off16"); 119 | }); 120 | 121 | ClickHouseSinkTask task = new ClickHouseSinkTask(); 122 | // Generate SinkRecords with different topics and check if they are split correctly 123 | Collection records = createDBTopicSplit(dbRange, timeStamp, tableName, 0, "."); 124 | try { 125 | task.start(props); 126 | task.put(records); 127 | } catch (Exception e) { 128 | fail("Exception should not be thrown"); 129 | } 130 | LongStream.range(0, dbRange).forEachOrdered(i -> { 131 | int count = countRows(chc, String.valueOf(i), tableName); 132 | assertEquals(DEFAULT_TOTAL_RECORDS, count); 133 | }); 134 | } 135 | 136 | 137 | @Test 138 | public void simplifiedBatchingSchemaless() { 139 | Map props = createProps(); 140 | props.put(ClickHouseSinkConfig.IGNORE_PARTITIONS_WHEN_BATCHING, "true"); 141 | ClickHouseHelperClient chc = createClient(props); 142 | String topic = createTopicName("schemaless_simple_batch_test"); 143 | ClickHouseTestHelpers.dropTable(chc, topic); 144 | ClickHouseTestHelpers.createTable(chc, topic, "CREATE TABLE %s ( `off16` Int16, `str` String, `p_int8` Int8, `p_int16` Int16, `p_int32` Int32, " + 145 | "`p_int64` Int64, `p_float32` Float32, `p_float64` Float64, `p_bool` Bool) Engine = MergeTree ORDER BY off16"); 146 | Collection sr = SchemalessTestData.createPrimitiveTypes(topic, 1); 147 | sr.addAll(SchemalessTestData.createPrimitiveTypes(topic, 2)); 148 | sr.addAll(SchemalessTestData.createPrimitiveTypes(topic, 3)); 149 | 150 | ClickHouseSinkTask chst = new ClickHouseSinkTask(); 151 | chst.start(props); 152 | chst.put(sr); 153 | chst.stop(); 154 | assertEquals(sr.size(), ClickHouseTestHelpers.countRows(chc, topic)); 155 | assertTrue(ClickHouseTestHelpers.validateRows(chc, topic, sr)); 156 | //assertEquals(1, ClickHouseTestHelpers.countInsertQueries(chc, topic)); 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/db/helper/ClickHouseHelperClientTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.helper; 2 | 3 | import com.clickhouse.kafka.connect.sink.ClickHouseBase; 4 | import com.clickhouse.kafka.connect.sink.db.mapping.Table; 5 | import com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers; 6 | import com.clickhouse.kafka.connect.sink.junit.extension.FromVersionConditionExtension; 7 | import com.clickhouse.kafka.connect.sink.junit.extension.SinceClickHouseVersion; 8 | import org.junit.jupiter.api.Assertions; 9 | import org.junit.jupiter.api.BeforeEach; 10 | import org.junit.jupiter.api.Test; 11 | import org.junit.jupiter.api.extension.ExtendWith; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.util.List; 16 | import java.util.Map; 17 | import java.util.stream.Collectors; 18 | 19 | @ExtendWith(FromVersionConditionExtension.class) 20 | public class ClickHouseHelperClientTest extends ClickHouseBase { 21 | private static final Logger LOGGER = LoggerFactory.getLogger(ClickHouseHelperClientTest.class); 22 | ClickHouseHelperClient chc = null; 23 | 24 | @BeforeEach 25 | public void setUp() { 26 | LOGGER.info("Setting up..."); 27 | Map props = createProps(); 28 | chc = createClient(props); 29 | } 30 | 31 | @Test 32 | public void ping() { 33 | Assertions.assertTrue(chc.ping()); 34 | } 35 | 36 | @Test 37 | public void showTables() { 38 | String topic = createTopicName("simple_table_test"); 39 | ClickHouseTestHelpers.createTable(chc, topic, 40 | "CREATE TABLE %s ( `num` String ) Engine = MergeTree ORDER BY num"); 41 | try { 42 | List table = chc.showTables(chc.getDatabase()); 43 | List tableNames = table.stream().map(item -> item.getCleanName()).collect(Collectors.toList()); 44 | Assertions.assertTrue(tableNames.contains(topic)); 45 | } finally { 46 | ClickHouseTestHelpers.dropTable(chc, topic); 47 | } 48 | } 49 | 50 | @Test 51 | public void describeNestedFlattenedTable() { 52 | String topic = createTopicName("nested_flattened_table_test"); 53 | ClickHouseTestHelpers.createTable(chc, topic, 54 | "CREATE TABLE %s ( `num` String, " + 55 | "`nested` Nested (innerInt Int32, innerString String)) " + 56 | "Engine = MergeTree ORDER BY num"); 57 | 58 | try { 59 | Table table = chc.describeTable(chc.getDatabase(), topic); 60 | Assertions.assertEquals(3, table.getRootColumnsList().size()); 61 | } finally { 62 | ClickHouseTestHelpers.dropTable(chc, topic); 63 | } 64 | } 65 | 66 | @Test 67 | public void ignoreArrayWithNestedTable() { 68 | String topic = createTopicName("nested_table_test"); 69 | ClickHouseTestHelpers.createTable(chc, topic, 70 | "CREATE TABLE %s ( `num` String, " + 71 | "`nested` Array(Nested (innerInt Int32, innerString String))) " + 72 | "Engine = MergeTree ORDER BY num"); 73 | 74 | try { 75 | Table table = chc.describeTable(chc.getDatabase(), topic); 76 | Assertions.assertNull(table); 77 | } finally { 78 | ClickHouseTestHelpers.dropTable(chc, topic); 79 | } 80 | } 81 | 82 | @Test 83 | @SinceClickHouseVersion("24.1") 84 | public void describeNestedUnFlattenedTable() { 85 | String nestedTopic = createTopicName("nested_unflattened_table_test"); 86 | String normalTopic = createTopicName("normal_unflattened_table_test"); 87 | ClickHouseTestHelpers.query(chc, "CREATE USER IF NOT EXISTS unflatten IDENTIFIED BY '123FOURfive^&*91011' SETTINGS flatten_nested=0"); 88 | ClickHouseTestHelpers.query(chc, "GRANT CURRENT GRANTS ON *.* TO unflatten"); 89 | 90 | Map props = createProps(); 91 | props.put("username", "unflatten"); 92 | props.put("password", "123FOURfive^&*91011"); 93 | chc = createClient(props); 94 | 95 | ClickHouseTestHelpers.createTable(chc, nestedTopic, 96 | "CREATE TABLE %s ( `num` String, " + 97 | "`nested` Nested (innerInt Int32, innerString String)) " + 98 | "Engine = MergeTree ORDER BY num"); 99 | ClickHouseTestHelpers.createTable(chc, normalTopic, 100 | "CREATE TABLE %s ( `num` String ) " + 101 | "Engine = MergeTree ORDER BY num"); 102 | 103 | try { 104 | Table nestedTable = chc.describeTable(chc.getDatabase(), nestedTopic); 105 | Assertions.assertNull(nestedTable); 106 | 107 | Table normalTable = chc.describeTable(chc.getDatabase(), normalTopic); 108 | Assertions.assertEquals(1, normalTable.getRootColumnsList().size()); 109 | } finally { 110 | ClickHouseTestHelpers.dropTable(chc, nestedTopic); 111 | ClickHouseTestHelpers.dropTable(chc, normalTopic); 112 | ClickHouseTestHelpers.query(chc, "DROP USER IF EXISTS unflatten"); 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/db/mapping/ColumnTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.mapping; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.util.List; 6 | 7 | import static com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers.col; 8 | import static com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers.newDescriptor; 9 | import static org.junit.jupiter.api.Assertions.*; 10 | 11 | class ColumnTest { 12 | 13 | @Test 14 | public void extractNullableColumn() { 15 | Column col = Column.extractColumn(newDescriptor("Nullable(String)")); 16 | assertEquals(Type.STRING, col.getType()); 17 | } 18 | 19 | @Test 20 | public void extractLowCardinalityColumn() { 21 | Column col = Column.extractColumn(newDescriptor("LowCardinality(String)")); 22 | assertEquals(Type.STRING, col.getType()); 23 | } 24 | 25 | @Test 26 | public void extractLowCardinalityNullableColumn() { 27 | Column col = Column.extractColumn(newDescriptor("LowCardinality(Nullable(String))")); 28 | assertEquals(Type.STRING, col.getType()); 29 | } 30 | 31 | @Test 32 | public void extractArrayOfLowCardinalityNullableColumn() { 33 | Column col = Column.extractColumn(newDescriptor("Array(LowCardinality(Nullable(String)))")); 34 | assertEquals(Type.ARRAY, col.getType()); 35 | assertEquals(Type.STRING, col.getArrayType().getType()); 36 | 37 | assertNull(col.getMapKeyType()); 38 | assertNull(col.getMapValueType()); 39 | assertNull(col.getTupleFields()); 40 | } 41 | 42 | @Test 43 | public void extractDecimalNullableColumn() { 44 | Column col = Column.extractColumn(newDescriptor("Nullable(Decimal)")); 45 | assertEquals(Type.Decimal, col.getType()); 46 | } 47 | 48 | @Test 49 | public void extractDecimal_default() { 50 | Column col = Column.extractColumn(newDescriptor("Decimal")); 51 | assertEquals(Type.Decimal, col.getType()); 52 | assertEquals(10, col.getPrecision()); 53 | assertEquals(0, col.getScale()); 54 | } 55 | 56 | @Test 57 | public void extractDecimal_default_5() { 58 | Column col = Column.extractColumn(newDescriptor("Decimal(5)")); 59 | assertEquals(Type.Decimal, col.getType()); 60 | assertEquals(5, col.getPrecision()); 61 | assertEquals(0, col.getScale()); 62 | } 63 | 64 | @Test 65 | public void extractDecimal_sized_5() { 66 | Column col = Column.extractColumn(newDescriptor("Decimal256(5)")); 67 | assertEquals(Type.Decimal, col.getType()); 68 | assertEquals(76, col.getPrecision()); 69 | assertEquals(5, col.getScale()); 70 | } 71 | 72 | @Test 73 | public void extractDecimal_14_2() { 74 | Column col = Column.extractColumn(newDescriptor("Decimal(14, 2)")); 75 | assertEquals(Type.Decimal, col.getType()); 76 | assertEquals(14, col.getPrecision()); 77 | assertEquals(2, col.getScale()); 78 | } 79 | 80 | @Test 81 | public void extractArrayOfDecimalNullable_5() { 82 | Column col = Column.extractColumn(newDescriptor("Array(Nullable(Decimal(5)))")); 83 | assertEquals(Type.ARRAY, col.getType()); 84 | 85 | assertNull(col.getMapKeyType()); 86 | assertNull(col.getMapValueType()); 87 | assertNull(col.getTupleFields()); 88 | 89 | Column subType = col.getArrayType(); 90 | assertEquals(Type.Decimal, subType.getType()); 91 | assertEquals(5, subType.getPrecision()); 92 | assertTrue(subType.isNullable()); 93 | } 94 | 95 | @Test 96 | public void extractArrayOfArrayOfArrayOfString() { 97 | Column col = Column.extractColumn(newDescriptor("Array(Array(Array(String)))")); 98 | assertEquals(Type.ARRAY, col.getType()); 99 | 100 | assertNull(col.getMapKeyType()); 101 | assertNull(col.getMapValueType()); 102 | assertNull(col.getTupleFields()); 103 | 104 | Column subType = col.getArrayType(); 105 | assertEquals(Type.ARRAY, subType.getType()); 106 | 107 | Column subSubType = subType.getArrayType(); 108 | assertEquals(Type.ARRAY, subSubType.getType()); 109 | 110 | Column subSubSubType = subSubType.getArrayType(); 111 | assertEquals(Type.STRING, subSubSubType.getType()); 112 | assertNull(subSubSubType.getArrayType()); 113 | } 114 | 115 | @Test 116 | public void extractMapOfPrimitives() { 117 | Column col = Column.extractColumn(newDescriptor("Map(String, Decimal(5)")); 118 | assertEquals(Type.MAP, col.getType()); 119 | 120 | assertEquals(Type.STRING, col.getMapKeyType()); 121 | 122 | assertNull(col.getArrayType()); 123 | assertNull(col.getMapValueType()); 124 | assertNull(col.getTupleFields()); 125 | } 126 | 127 | @Test 128 | public void extractTupleOfPrimitives() { 129 | Column col = Column.extractColumn(newDescriptor("Tuple(first String, second Decimal(5))")); 130 | assertEquals(Type.TUPLE, col.getType()); 131 | 132 | assertNull(col.getArrayType()); 133 | assertNull(col.getMapValueType()); 134 | assertEquals(List.of(), col.getTupleFields()); 135 | } 136 | 137 | @Test 138 | public void extractVariantOfPrimitives() { 139 | Column col = Column.extractColumn(newDescriptor("Variant(String, Decimal256(5), Decimal(14, 2), Decimal(5))")); 140 | assertEquals(Type.VARIANT, col.getType()); 141 | assertEquals(4, col.getVariantTypes().size()); 142 | 143 | List expectedSubtypes = List.of( 144 | col(Type.STRING), 145 | col(Type.Decimal, 76, 5), 146 | col(Type.Decimal, 14, 2), 147 | col(Type.Decimal, 5, 0) 148 | ); 149 | 150 | for (int i = 0; i < expectedSubtypes.size(); i++) { 151 | Column expectedSubtype = expectedSubtypes.get(i); 152 | Column actualSubtype = col.getVariantTypes().get(i).getT1(); 153 | 154 | assertEquals(expectedSubtype.getType(), actualSubtype.getType()); 155 | assertEquals(expectedSubtype.getPrecision(), actualSubtype.getPrecision()); 156 | assertEquals(expectedSubtype.getScale(), actualSubtype.getScale()); 157 | } 158 | } 159 | 160 | @Test 161 | public void extractEnumOfPrimitives() { 162 | Column col = Column.extractColumn(newDescriptor("Enum8('a, valid' = 1, 'b' = 2)")); 163 | assertEquals(Type.Enum8, col.getType()); 164 | assertEquals(2, col.getEnumValues().size()); 165 | assertTrue(col.getEnumValues().containsKey("a, valid")); 166 | assertTrue(col.getEnumValues().containsKey("b")); 167 | } 168 | } 169 | 170 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/db/mapping/TableTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.db.mapping; 2 | 3 | import com.clickhouse.kafka.connect.ClickHouseSinkConnector; 4 | import com.clickhouse.kafka.connect.sink.ClickHouseBase; 5 | import com.clickhouse.kafka.connect.sink.db.helper.ClickHouseHelperClient; 6 | import com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.stream.Collectors; 12 | 13 | import static com.clickhouse.kafka.connect.sink.helper.ClickHouseTestHelpers.newDescriptor; 14 | import static org.junit.jupiter.api.Assertions.*; 15 | 16 | class TableTest extends ClickHouseBase { 17 | 18 | @Test 19 | public void extractMapOfPrimitives() { 20 | Table table = new Table("default", "t"); 21 | 22 | Column map = Column.extractColumn(newDescriptor("map", "Map(String, Decimal(5))")); 23 | Column mapValues = Column.extractColumn(newDescriptor("map.values", "Array(Decimal(5))")); 24 | 25 | assertEquals(Type.MAP, map.getType()); 26 | assertNull(map.getMapValueType()); 27 | 28 | table.addColumn(map); 29 | table.addColumn(mapValues); 30 | 31 | Column mapValueType = map.getMapValueType(); 32 | assertEquals(Type.Decimal, mapValueType.getType()); 33 | assertEquals(5, mapValueType.getPrecision()); 34 | } 35 | 36 | @Test 37 | public void extractNullables() { 38 | Map props = createProps(); 39 | ClickHouseHelperClient chc = createClient(props); 40 | 41 | String tableName = createTopicName("extract-table-test"); 42 | ClickHouseTestHelpers.dropTable(chc, tableName); 43 | ClickHouseTestHelpers.createTable(chc, tableName, "CREATE TABLE `%s` (`off16` Int16, date_number Nullable(Date)) Engine = MergeTree ORDER BY off16"); 44 | 45 | Table table = chc.describeTable(chc.getDatabase(), tableName); 46 | assertNotNull(table); 47 | assertEquals(table.getRootColumnsList().size(), 2); 48 | assertEquals(table.getAllColumnsList().size(), 3); 49 | ClickHouseTestHelpers.dropTable(chc, tableName); 50 | } 51 | 52 | @Test 53 | public void extractCommentV1() { 54 | Map props = createProps(); 55 | props.put(ClickHouseSinkConnector.CLIENT_VERSION, "V1"); 56 | ClickHouseHelperClient chc = createClient(props); 57 | 58 | String tableName = createTopicName("extract-table-test"); 59 | ClickHouseTestHelpers.dropTable(chc, tableName); 60 | ClickHouseTestHelpers.createTable(chc, tableName, "CREATE TABLE `%s` ( c String COMMENT '\\\\', d String COMMENT '\\n'" + 61 | ")" + 62 | "ENGINE = MergeTree()" + 63 | "ORDER BY tuple()"); 64 | 65 | Table table = chc.describeTable(chc.getDatabase(), tableName); 66 | assertNotNull(table); 67 | assertEquals(table.getRootColumnsList().size(), 2); 68 | ClickHouseTestHelpers.dropTable(chc, tableName); 69 | } 70 | 71 | @Test 72 | public void extractMapWithComplexValue() { 73 | Table table = new Table("default", "t"); 74 | 75 | Column map = Column.extractColumn(newDescriptor("map", "Map(String, Map(String, Decimal(5)))")); 76 | Column mapValues = Column.extractColumn(newDescriptor("map.values", "Array(Map(String, Decimal(5)))")); 77 | Column mapValuesValues = Column.extractColumn(newDescriptor("map.values.values", "Array(Array(Decimal(5)))")); 78 | 79 | assertEquals(Type.MAP, map.getType()); 80 | assertNull(map.getMapValueType()); 81 | 82 | table.addColumn(map); 83 | table.addColumn(mapValues); 84 | table.addColumn(mapValuesValues); 85 | 86 | Column mapValueType = map.getMapValueType(); 87 | assertEquals(Type.MAP, mapValueType.getType()); 88 | assertEquals(Type.STRING, mapValueType.getMapKeyType()); 89 | 90 | Column nestedMapValue = mapValueType.getMapValueType(); 91 | assertEquals(Type.Decimal, nestedMapValue.getType()); 92 | assertEquals(5, nestedMapValue.getPrecision()); 93 | } 94 | 95 | @Test 96 | public void extractMapOfMapOfMapOfString() { 97 | Table table = new Table("default", "t"); 98 | 99 | Column map = Column.extractColumn(newDescriptor("map", "Map(String, Map(String, Map(String, String)))")); 100 | Column mapValues = Column.extractColumn(newDescriptor("map.values", "Array(Map(String, Map(String, String)))")); 101 | Column mapValuesValues = Column.extractColumn(newDescriptor("map.values.values", "Array(Array(Map(String, String)))")); 102 | Column mapValuesValuesValues = Column.extractColumn(newDescriptor("map.values.values.values", "Array(Array(Array(String)))")); 103 | 104 | assertEquals(Type.MAP, map.getType()); 105 | assertNull(map.getMapValueType()); 106 | 107 | table.addColumn(map); 108 | table.addColumn(mapValues); 109 | table.addColumn(mapValuesValues); 110 | table.addColumn(mapValuesValuesValues); 111 | 112 | Column mapValueType = map.getMapValueType(); 113 | assertEquals(Type.MAP, mapValueType.getType()); 114 | assertEquals(Type.STRING, mapValueType.getMapKeyType()); 115 | 116 | Column nestedMapValue = mapValueType.getMapValueType(); 117 | assertEquals(Type.MAP, nestedMapValue.getType()); 118 | assertEquals(Type.STRING, nestedMapValue.getMapKeyType()); 119 | 120 | Column againNestedMapValue = nestedMapValue.getMapValueType(); 121 | assertEquals(Type.STRING, againNestedMapValue.getType()); 122 | } 123 | 124 | @Test 125 | public void extractTupleOfPrimitives() { 126 | Table table = new Table("default", "t"); 127 | Column tuple = Column.extractColumn(newDescriptor("tuple", "Tuple(first String, second Decimal(5))")); 128 | Column tupleFirst = Column.extractColumn(newDescriptor("tuple.first", "String")); 129 | Column tupleSecond = Column.extractColumn(newDescriptor("tuple.second", "Decimal(5)")); 130 | 131 | assertEquals(Type.TUPLE, tuple.getType()); 132 | assertEquals(List.of(), tuple.getTupleFields()); 133 | 134 | table.addColumn(tuple); 135 | table.addColumn(tupleFirst); 136 | table.addColumn(tupleSecond); 137 | 138 | assertEquals(2, tuple.getTupleFields().size()); 139 | assertEquals(List.of("tuple.first", "tuple.second"), tuple.getTupleFields().stream().map(Column::getName).collect(Collectors.toList())); 140 | assertEquals(List.of(Type.STRING, Type.Decimal), tuple.getTupleFields().stream().map(Column::getType).collect(Collectors.toList())); 141 | assertEquals(List.of(0, 5), tuple.getTupleFields().stream().map(Column::getPrecision).collect(Collectors.toList())); 142 | } 143 | 144 | @Test 145 | public void extractTupleOfTupleOfTuple() { 146 | Table table = new Table("default", "t"); 147 | Column tuple = Column.extractColumn(newDescriptor("tuple", "Tuple(tuple Tuple(tuple Tuple(string String)))")); 148 | Column tupleTuple = Column.extractColumn(newDescriptor("tuple.tuple", "Tuple(tuple Tuple(string String))")); 149 | Column tupleTupleTuple = Column.extractColumn(newDescriptor("tuple.tuple.tuple", "Tuple(string String)")); 150 | Column tupleTupleTupleString = Column.extractColumn(newDescriptor("tuple.tuple.tuple.string", "String")); 151 | 152 | assertEquals(Type.TUPLE, tuple.getType()); 153 | assertEquals(List.of(), tuple.getTupleFields()); 154 | 155 | table.addColumn(tuple); 156 | table.addColumn(tupleTuple); 157 | table.addColumn(tupleTupleTuple); 158 | table.addColumn(tupleTupleTupleString); 159 | 160 | assertEquals(1, tuple.getTupleFields().size()); 161 | assertEquals(1, tuple.getTupleFields().get(0).getTupleFields().size()); 162 | assertEquals(1, tuple.getTupleFields().get(0).getTupleFields().get(0).getTupleFields().size()); 163 | 164 | Column stringColumn = tuple.getTupleFields().get(0).getTupleFields().get(0).getTupleFields().get(0); 165 | assertEquals("tuple.tuple.tuple.string", stringColumn.getName()); 166 | assertEquals(Type.STRING, stringColumn.getType()); 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/dlq/InMemoryDLQ.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.dlq; 2 | 3 | import org.apache.kafka.connect.sink.SinkRecord; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | public class InMemoryDLQ implements ErrorReporter { 9 | 10 | 11 | private class DLQRecord { 12 | private SinkRecord record = null; 13 | private Throwable t = null; 14 | 15 | public DLQRecord(SinkRecord record, Throwable t) { 16 | this.record = record; 17 | this.t = t; 18 | } 19 | } 20 | private List dlq = null; 21 | 22 | public InMemoryDLQ() { 23 | this.dlq = new ArrayList(); 24 | 25 | } 26 | 27 | @Override 28 | public void report(SinkRecord record, Exception e) { 29 | dlq.add(new DLQRecord(record, e)); 30 | } 31 | 32 | public int size() { 33 | return dlq.size(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/junit/extension/FromVersionConditionExtension.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.junit.extension; 2 | 3 | import org.junit.jupiter.api.extension.BeforeTestExecutionCallback; 4 | import org.junit.jupiter.api.extension.ExtensionContext; 5 | import org.junit.platform.commons.util.AnnotationUtils; 6 | 7 | import java.util.List; 8 | import java.util.Optional; 9 | 10 | public class FromVersionConditionExtension implements BeforeTestExecutionCallback { 11 | 12 | @Override 13 | public void beforeTestExecution(ExtensionContext context) { 14 | Optional optionalFromVersion = AnnotationUtils.findAnnotation(context.getElement(), SinceClickHouseVersion.class); 15 | if (optionalFromVersion.isPresent()) { 16 | String requiredVersion = optionalFromVersion.get().value(); 17 | String currentVersion = System.getenv("CLICKHOUSE_VERSION"); 18 | if (currentVersion == null) { 19 | // We assume latest if the version env is not set 20 | return; 21 | } 22 | if (compareVersions(currentVersion, requiredVersion) < 0) { 23 | throw new org.junit.AssumptionViolatedException("Test skipped because CLICKHOUSE_VERSION is lower than required"); 24 | } 25 | } 26 | } 27 | 28 | private int compareVersions(String currentVersion, String requiredVersion) { 29 | if (List.of("latest", "cloud").contains(currentVersion)) 30 | return 0; 31 | 32 | String[] currentParts = currentVersion.split("\\."); 33 | String[] requiredParts = requiredVersion.split("\\."); 34 | 35 | try { 36 | int length = Math.max(currentParts.length, requiredParts.length); 37 | for (int i = 0; i < length; i++) { 38 | int currentPart = i < currentParts.length ? Integer.parseInt(currentParts[i]) : 0; 39 | int requiredPart = i < requiredParts.length ? Integer.parseInt(requiredParts[i]) : 0; 40 | if (currentPart != requiredPart) { 41 | return Integer.compare(currentPart, requiredPart); 42 | } 43 | } 44 | } catch (NumberFormatException e) { 45 | return 0; 46 | } 47 | return 0; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/junit/extension/SinceClickHouseVersion.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.junit.extension; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | @Target({ElementType.METHOD}) 9 | @Retention(RetentionPolicy.RUNTIME) 10 | public @interface SinceClickHouseVersion { 11 | String value(); 12 | } 13 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/kafa/RangeContainerTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.kafa; 2 | 3 | 4 | import com.clickhouse.kafka.connect.sink.kafka.RangeContainer; 5 | import com.clickhouse.kafka.connect.sink.kafka.RangeState; 6 | import org.junit.jupiter.api.DisplayName; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import static org.junit.jupiter.api.Assertions.assertEquals; 10 | 11 | public class RangeContainerTest { 12 | 13 | private String topic = "test"; 14 | private int partition = 1; 15 | 16 | @Test 17 | @DisplayName("sameRangeTest") 18 | public void sameRangeTest() { 19 | RangeContainer rangeContainerFirst = new RangeContainer(topic, partition, 10, 1); 20 | RangeContainer rangeContainerSecond01 = new RangeContainer(topic, partition, 10, 1); 21 | RangeContainer rangeContainerSecond02 = new RangeContainer(topic, partition, 10, 0); 22 | assertEquals(RangeState.SAME, rangeContainerFirst.getOverLappingState(rangeContainerSecond01)); 23 | assertEquals(RangeState.SAME, rangeContainerFirst.getOverLappingState(rangeContainerSecond02)); 24 | 25 | } 26 | 27 | 28 | @Test 29 | @DisplayName("newRangeTest") 30 | public void newRangeTest() { 31 | RangeContainer rangeContainerFirst = new RangeContainer(topic, partition, 10, 0); 32 | RangeContainer rangeContainerSecond = new RangeContainer(topic, partition, 20, 11); 33 | assertEquals(RangeState.NEW, rangeContainerFirst.getOverLappingState(rangeContainerSecond)); 34 | 35 | } 36 | 37 | 38 | @Test 39 | @DisplayName("containsRangeTest") 40 | public void containsRangeTest() { 41 | RangeContainer rangeContainerFirst = new RangeContainer(topic, partition, 10, 0); 42 | RangeContainer rangeContainerSecond01 = new RangeContainer(topic, partition, 9, 1); 43 | RangeContainer rangeContainerSecond02 = new RangeContainer(topic, partition, 9, 0); 44 | RangeContainer rangeContainerSecond03 = new RangeContainer(topic, partition, 10, 1); 45 | 46 | assertEquals(RangeState.CONTAINS, rangeContainerFirst.getOverLappingState(rangeContainerSecond01)); 47 | assertEquals(RangeState.CONTAINS, rangeContainerFirst.getOverLappingState(rangeContainerSecond02)); 48 | assertEquals(RangeState.CONTAINS, rangeContainerFirst.getOverLappingState(rangeContainerSecond03)); 49 | 50 | } 51 | 52 | @Test 53 | @DisplayName("errorRangeTest") 54 | public void errorRangeTest() { 55 | RangeContainer rangeContainerFirst = new RangeContainer(topic, partition, 10, 4); 56 | RangeContainer rangeContainerSecond = new RangeContainer(topic, partition, 9, 2); 57 | assertEquals(RangeState.ERROR, rangeContainerFirst.getOverLappingState(rangeContainerSecond)); 58 | 59 | } 60 | 61 | @Test 62 | @DisplayName("overlapRangeTest") 63 | public void overlapRangeTest() { 64 | RangeContainer rangeContainerFirst = new RangeContainer(topic, partition, 10, 2); 65 | RangeContainer rangeContainerSecond01 = new RangeContainer(topic, partition, 19, 10); 66 | RangeContainer rangeContainerSecond02 = new RangeContainer(topic, partition, 19, 3); 67 | RangeContainer rangeContainerSecond03 = new RangeContainer(topic, partition, 20, 6); 68 | RangeContainer rangeContainerSecond04 = new RangeContainer(topic, partition, 11, 0); 69 | RangeContainer rangeContainerSecond05 = new RangeContainer(topic, partition, 11, 1); 70 | 71 | assertEquals(RangeState.OVER_LAPPING, rangeContainerFirst.getOverLappingState(rangeContainerSecond01)); 72 | assertEquals(RangeState.OVER_LAPPING, rangeContainerFirst.getOverLappingState(rangeContainerSecond02)); 73 | assertEquals(RangeState.OVER_LAPPING, rangeContainerFirst.getOverLappingState(rangeContainerSecond03)); 74 | assertEquals(RangeState.OVER_LAPPING, rangeContainerFirst.getOverLappingState(rangeContainerSecond04)); 75 | assertEquals(RangeState.OVER_LAPPING, rangeContainerFirst.getOverLappingState(rangeContainerSecond05)); 76 | 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/provider/LocalProviderTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.provider; 2 | 3 | 4 | import com.clickhouse.kafka.connect.sink.state.State; 5 | import com.clickhouse.kafka.connect.sink.state.StateProvider; 6 | import com.clickhouse.kafka.connect.sink.state.StateRecord; 7 | import com.clickhouse.kafka.connect.sink.state.provider.InMemoryState; 8 | import org.junit.jupiter.api.DisplayName; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | 13 | 14 | public class LocalProviderTest { 15 | 16 | @Test 17 | @DisplayName("Set & get state record") 18 | public void setAndGet() { 19 | StateProvider stateProvider = new InMemoryState(); 20 | stateProvider.setStateRecord(new StateRecord("test", 1, 10, 0, State.BEFORE_PROCESSING)); 21 | StateRecord stateRecord = stateProvider.getStateRecord("test", 1); 22 | assertEquals(10 , stateRecord.getMaxOffset()); 23 | assertEquals(0 , stateRecord.getMinOffset()); 24 | assertEquals(State.BEFORE_PROCESSING , stateRecord.getState()); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/util/MaskTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.util; 2 | 3 | import com.clickhouse.kafka.connect.util.Mask; 4 | import org.junit.jupiter.api.DisplayName; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | 9 | public class MaskTest { 10 | 11 | @Test 12 | @DisplayName("MaskPasswordBiggerThan6") 13 | public void MaskPasswordBiggerThan6() { 14 | String password = "DATBwWKXvYQnce"; 15 | String maskedPassword = "***BwWKXvYQ***"; 16 | assertEquals(maskedPassword, Mask.passwordMask(password)); 17 | } 18 | 19 | @Test 20 | @DisplayName("MaskPasswordSmallerThan6") 21 | public void MaskPasswordSmallerThan6() { 22 | String password = "DATBw"; 23 | String maskedPassword = "*****"; 24 | assertEquals(maskedPassword, Mask.passwordMask(password)); 25 | } 26 | 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/clickhouse/kafka/connect/sink/util/UtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.clickhouse.kafka.connect.sink.util; 2 | 3 | import com.clickhouse.client.ClickHouseException; 4 | import com.clickhouse.kafka.connect.util.Utils; 5 | import org.apache.kafka.connect.errors.RetriableException; 6 | import org.junit.jupiter.api.DisplayName; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | 12 | import static org.junit.jupiter.api.Assertions.*; 13 | 14 | public class UtilsTest { 15 | 16 | @Test 17 | @DisplayName("Test Exception Root Cause") 18 | public void TestRootCause() { 19 | Exception e1 = new Exception("The Actual Root Cause"); 20 | Exception e2 = new Exception(e1); 21 | Exception e3 = new Exception(e2); 22 | Exception e4 = new Exception(e3); 23 | assertEquals(e1, Utils.getRootCause(e4)); 24 | } 25 | 26 | 27 | @Test 28 | @DisplayName("Test ClickHouseException Root Cause") 29 | public void TestClickHouseRootCause() { 30 | Exception e1 = new Exception(); 31 | Exception e2 = new Exception(e1); 32 | Exception e3 = new ClickHouseException(123, e2, null); 33 | Exception e4 = new RuntimeException(e3); 34 | Exception e5 = new Exception(e4); 35 | 36 | Exception clickHouseException = Utils.getRootCause(e5, true); 37 | assertEquals(e3, clickHouseException); 38 | assertTrue(clickHouseException instanceof ClickHouseException); 39 | assertEquals(123, ((ClickHouseException) clickHouseException).getErrorCode()); 40 | } 41 | 42 | @Test 43 | @DisplayName("Test ClickHouseClient Timeout Throw Cause") 44 | public void TestClickHouseClientTimeoutCause(){ 45 | assertThrows(RetriableException.class, () -> { 46 | Exception timeout = new IOException("Write timed out after 30000 ms"); 47 | Utils.handleException(timeout, false, new ArrayList<>()); 48 | }); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/java/transforms/KeyToValueTest.java: -------------------------------------------------------------------------------- 1 | package transforms; 2 | 3 | import com.clickhouse.kafka.connect.transforms.KeyToValue; 4 | import org.apache.kafka.connect.data.Schema; 5 | import org.apache.kafka.connect.data.SchemaBuilder; 6 | import org.apache.kafka.connect.data.Struct; 7 | import org.apache.kafka.connect.data.Time; 8 | import org.apache.kafka.connect.data.Timestamp; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | import org.junit.jupiter.api.Assertions; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import java.util.Date; 14 | import java.util.HashMap; 15 | import java.util.Random; 16 | import java.util.UUID; 17 | 18 | public class KeyToValueTest { 19 | @Test 20 | public void applySchemalessTest() { 21 | try(KeyToValue keyToValue = new KeyToValue<>()) { 22 | keyToValue.configure(new HashMap<>()); 23 | SinkRecord record = new SinkRecord(UUID.randomUUID().toString(), 0, null, "Sample Key", null, new HashMap<>(), 0); 24 | SinkRecord newRecord = keyToValue.apply(record); 25 | Assertions.assertTrue(newRecord.value() instanceof HashMap && ((HashMap) newRecord.value()).containsKey("_key")); 26 | } 27 | } 28 | 29 | @Test 30 | public void applyWithSchemaTest() { 31 | try(KeyToValue keyToValue = new KeyToValue<>()) { 32 | keyToValue.configure(new HashMap<>()); 33 | SinkRecord record = generateSampleRecord(UUID.randomUUID().toString(), 0, 0); 34 | SinkRecord newRecord = keyToValue.apply(record); 35 | Assertions.assertTrue(newRecord.value() instanceof Struct && 36 | ((Struct) newRecord.value()).get("off16") instanceof Short && 37 | ((Struct) newRecord.value()).get("_key") != null); 38 | } 39 | } 40 | 41 | 42 | 43 | private SinkRecord generateSampleRecord(String topic, int partition, long offset) { 44 | Schema schema = SchemaBuilder.struct() 45 | .field("off16", Schema.INT16_SCHEMA) 46 | .field("timestamp_int64", Timestamp.SCHEMA) 47 | .field("date_date", Time.SCHEMA) 48 | .build(); 49 | Struct value_struct = new Struct(schema) 50 | .put("off16", (short) new Random().nextInt(Short.MAX_VALUE + 1)) 51 | .put("timestamp_int64", new Date(System.currentTimeMillis())) 52 | .put("date_date", new Date(System.currentTimeMillis())); 53 | return new SinkRecord(topic, partition, null, "{\"sample\": \"keys\"}", schema, value_struct, offset); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define the root logger with appender X 2 | log4j.rootLogger=INFO, console 3 | #log4j.logger.org.testcontainers=WARN 4 | log4j.logger.com.clickhouse.kafka=DEBUG 5 | 6 | log4j.appender.console= org.apache.log4j.ConsoleAppender 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.conversionPattern=[%d] %p %C %m%n --------------------------------------------------------------------------------