├── .github └── workflows │ ├── create_release.yml │ ├── prs_and_commits.yml │ └── release_pr_workflow.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── config ├── checkstyle │ └── suppressions.xml └── copyright │ └── custom-header-styles.xml ├── docs └── sink-connector-config-options.rst ├── kcbq-api ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── wepay │ └── kafka │ └── connect │ └── bigquery │ └── api │ ├── KafkaSchemaRecordType.java │ └── SchemaRetriever.java ├── kcbq-connector ├── pom.xml └── src │ ├── main │ ├── assembly │ │ ├── release-tar.xml │ │ └── release-zip.xml │ ├── java │ │ └── com │ │ │ └── wepay │ │ │ └── kafka │ │ │ └── connect │ │ │ └── bigquery │ │ │ ├── BigQuerySinkConnector.java │ │ │ ├── BigQuerySinkTask.java │ │ │ ├── ErrantRecordHandler.java │ │ │ ├── GcpClientBuilder.java │ │ │ ├── GcsToBqLoadRunnable.java │ │ │ ├── MergeQueries.java │ │ │ ├── SchemaManager.java │ │ │ ├── config │ │ │ ├── BigQuerySinkConfig.java │ │ │ ├── BigQuerySinkTaskConfig.java │ │ │ ├── CredentialsValidator.java │ │ │ ├── GcsBucketValidator.java │ │ │ ├── MultiPropertyValidator.java │ │ │ ├── PartitioningModeValidator.java │ │ │ ├── PartitioningTypeValidator.java │ │ │ ├── StorageWriteApiValidator.java │ │ │ └── UpsertDeleteValidator.java │ │ │ ├── convert │ │ │ ├── BigQueryRecordConverter.java │ │ │ ├── BigQuerySchemaConverter.java │ │ │ ├── KafkaDataBuilder.java │ │ │ ├── RecordConverter.java │ │ │ ├── SchemaConverter.java │ │ │ └── logicaltype │ │ │ │ ├── DebeziumLogicalConverters.java │ │ │ │ ├── KafkaLogicalConverters.java │ │ │ │ ├── LogicalConverterRegistry.java │ │ │ │ └── LogicalTypeConverter.java │ │ │ ├── exception │ │ │ ├── BigQueryConnectException.java │ │ │ ├── BigQueryErrorResponses.java │ │ │ ├── BigQueryStorageWriteApiConnectException.java │ │ │ ├── BigQueryStorageWriteApiErrorResponses.java │ │ │ ├── ConversionConnectException.java │ │ │ ├── ExpectedInterruptException.java │ │ │ └── GcsConnectException.java │ │ │ ├── retrieve │ │ │ └── IdentitySchemaRetriever.java │ │ │ ├── utils │ │ │ ├── FieldNameSanitizer.java │ │ │ ├── PartitionedTableId.java │ │ │ ├── SinkRecordConverter.java │ │ │ ├── SleepUtils.java │ │ │ ├── TableNameUtils.java │ │ │ ├── Time.java │ │ │ └── Version.java │ │ │ └── write │ │ │ ├── RecordBatches.java │ │ │ ├── batch │ │ │ ├── CountDownRunnable.java │ │ │ ├── GcsBatchTableWriter.java │ │ │ ├── KcbqThreadPoolExecutor.java │ │ │ ├── MergeBatches.java │ │ │ ├── TableWriter.java │ │ │ └── TableWriterBuilder.java │ │ │ ├── row │ │ │ ├── AdaptiveBigQueryWriter.java │ │ │ ├── BigQueryWriter.java │ │ │ ├── GcsToBqWriter.java │ │ │ ├── SimpleBigQueryWriter.java │ │ │ └── UpsertDeleteBigQueryWriter.java │ │ │ └── storage │ │ │ ├── ApplicationStream.java │ │ │ ├── ConvertedRecord.java │ │ │ ├── StorageApiBatchModeHandler.java │ │ │ ├── StorageWriteApiBase.java │ │ │ ├── StorageWriteApiBatchApplicationStream.java │ │ │ ├── StorageWriteApiDefaultStream.java │ │ │ ├── StorageWriteApiRetryHandler.java │ │ │ ├── StorageWriteApiWriter.java │ │ │ ├── StreamState.java │ │ │ └── StreamWriter.java │ └── resources │ │ └── META-INF │ │ └── services │ │ └── org.apache.kafka.connect.sink.SinkConnector │ └── test │ ├── java │ └── com │ │ └── wepay │ │ └── kafka │ │ └── connect │ │ └── bigquery │ │ ├── BigQuerySinkConnectorTest.java │ │ ├── BigQuerySinkTaskTest.java │ │ ├── BigQueryStorageApiBatchSinkTaskTest.java │ │ ├── BigQueryStorageApiSinkTaskTest.java │ │ ├── ErrantRecordHandlerTest.java │ │ ├── GcsToBqLoadRunnableTest.java │ │ ├── MergeQueriesTest.java │ │ ├── SchemaManagerTest.java │ │ ├── SinkPropertiesFactory.java │ │ ├── SinkTaskPropertiesFactory.java │ │ ├── config │ │ ├── BigQuerySinkConfigTest.java │ │ ├── CredentialsValidatorTest.java │ │ ├── GcsBucketValidatorTest.java │ │ ├── MultiPropertyValidatorTest.java │ │ ├── PartitioningModeValidatorTest.java │ │ ├── PartitioningTypeValidatorTest.java │ │ └── StorageWriteApiValidatorTest.java │ │ ├── convert │ │ ├── BigQueryRecordConverterTest.java │ │ ├── BigQuerySchemaConverterTest.java │ │ ├── KafkaDataConverterTest.java │ │ └── logicaltype │ │ │ ├── DebeziumLogicalConvertersTest.java │ │ │ └── KafkaLogicalConvertersTest.java │ │ ├── exception │ │ ├── BigQueryErrorResponsesTest.java │ │ ├── BigQueryStorageWriteApiConnectExceptionTest.java │ │ └── BigQueryStorageWriteApiErrorResponsesTest.java │ │ ├── integration │ │ ├── ApplicationStreamIT.java │ │ ├── BaseConnectorIT.java │ │ ├── BigQueryErrantRecordHandlerIT.java │ │ ├── BigQueryErrorResponsesIT.java │ │ ├── BigQuerySinkConnectorIT.java │ │ ├── GcpClientBuilderIT.java │ │ ├── StorageWriteApiBatchBigQuerySinkConnectorIT.java │ │ ├── StorageWriteApiBigQuerySinkConnectorIT.java │ │ ├── TimePartitioningIT.java │ │ ├── UpsertDeleteBigQuerySinkConnectorIT.java │ │ ├── UpsertDeleteBigQuerySinkConnectorWithSRIT.java │ │ └── utils │ │ │ ├── BigQueryTestUtils.java │ │ │ ├── BucketClearer.java │ │ │ ├── SchemaRegistryTestUtils.java │ │ │ ├── TableClearer.java │ │ │ └── TestCaseLogger.java │ │ ├── utils │ │ ├── FieldNameSanitizerTest.java │ │ ├── MockTime.java │ │ └── PartitionedTableIdTest.java │ │ └── write │ │ ├── row │ │ ├── BigQueryWriterTest.java │ │ └── GcsToBqWriterTest.java │ │ └── storage │ │ ├── BigQueryBuilderTest.java │ │ ├── BigQueryWriteSettingsBuilderTest.java │ │ ├── GcsBuilderTest.java │ │ ├── StorageApiBatchModeHandlerTest.java │ │ ├── StorageWriteApiBatchApplicationStreamTest.java │ │ ├── StorageWriteApiDefaultStreamTest.java │ │ └── StorageWriteApiWriterTest.java │ └── resources │ ├── integration_test_cases │ ├── gcs-load │ │ ├── data.json │ │ └── schema.json │ ├── logical-types │ │ ├── data.json │ │ └── schema.json │ ├── matryoshka-dolls │ │ ├── data.json │ │ └── schema.json │ ├── nulls │ │ ├── data.json │ │ └── schema.json │ └── primitives │ │ ├── data.json │ │ └── schema.json │ └── log4j.properties └── pom.xml /.github/workflows/create_release.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | name: Create release 25 | 26 | on: 27 | workflow_dispatch: 28 | inputs: 29 | commit_hash: 30 | description: "Hash of 'Release version x.y.z' commit" 31 | required: true 32 | 33 | permissions: 34 | contents: write 35 | pull-requests: write 36 | issues: write 37 | 38 | jobs: 39 | build: 40 | name: Create Release 41 | runs-on: ubuntu-latest 42 | steps: 43 | - name: Checkout code 44 | uses: actions/checkout@v2 45 | with: 46 | ref: ${{ github.event.inputs.commit_hash }} 47 | 48 | - name: Check commit title and extract version 49 | run: | 50 | export commit_title=$(git log --pretty=format:%s -1 ${{ github.event.inputs.commit_hash }}) 51 | echo "Commit title: $commit_title" 52 | if [[ $commit_title =~ ^Release\ version\ [0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta|rc[0-9]+))?$ ]]; then 53 | echo "Valid commit title" 54 | else 55 | echo "Invalid commit title" 56 | exit 1 57 | fi 58 | export version=$(echo ${commit_title} | sed s/^Release\ version\ //g) 59 | echo "Will use version ${version}" 60 | echo "version=${version}" >> $GITHUB_ENV 61 | 62 | - name: Set up JDK 8 63 | uses: actions/setup-java@v4 64 | with: 65 | distribution: 'adopt' 66 | java-version: 8 67 | cache: maven 68 | 69 | - name: Build 70 | run: | 71 | mvn install -DskipTests 72 | mvn -f kcbq-connector clean package assembly:single@release-artifacts -DskipTests 73 | 74 | export tar_file=$(ls ./kcbq-connector/target/ | grep tar) 75 | export zip_file=$(ls ./kcbq-connector/target/ | grep zip) 76 | echo tar_file=${tar_file} >> $GITHUB_ENV 77 | echo zip_file=${zip_file} >> $GITHUB_ENV 78 | 79 | echo tar_path=`realpath ./kcbq-connector/target/${tar_file}` >> $GITHUB_ENV 80 | echo zip_path=`realpath ./kcbq-connector/target/${zip_file}` >> $GITHUB_ENV 81 | 82 | - name: Create tag 83 | run: | 84 | git config --local user.name "GitHub Action" 85 | git config --local user.email "action@github.com" 86 | git tag -a "v${{ env.version }}" -m "Release version ${{ env.version }}" 87 | git push origin "v${{ env.version }}" 88 | 89 | - name: Create release draft 90 | id: create_release 91 | uses: actions/create-release@v1 92 | env: 93 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 94 | with: 95 | tag_name: "v${{ env.version }}" 96 | release_name: "v${{ env.version }}" 97 | commitish: ${{ github.event.inputs.commit_hash }} 98 | body: | 99 | *Fill in* 100 | draft: true 101 | prerelease: false 102 | 103 | - name: Upload tar 104 | uses: actions/upload-release-asset@v1 105 | env: 106 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 107 | with: 108 | upload_url: ${{ steps.create_release.outputs.upload_url }} 109 | asset_path: ${{ env.tar_path }} 110 | asset_name: ${{ env.tar_file }} 111 | asset_content_type: application/tar 112 | 113 | - name: Upload zip 114 | uses: actions/upload-release-asset@v1 115 | env: 116 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 117 | with: 118 | upload_url: ${{ steps.create_release.outputs.upload_url }} 119 | asset_path: ${{ env.zip_path }} 120 | asset_name: ${{ env.zip_file }} 121 | asset_content_type: application/zip 122 | -------------------------------------------------------------------------------- /.github/workflows/prs_and_commits.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # Workflow to check pull requests and new commits to main branches 25 | # This checks the source in the state as if after the merge. 26 | name: Pull request checks 27 | on: 28 | pull_request: 29 | branches: [ main ] 30 | push: 31 | branches: [ main ] 32 | 33 | # Disallow concurrent runs for the same PR by cancelling in-progress runs 34 | # when new commits are pushed 35 | concurrency: 36 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 37 | cancel-in-progress: true 38 | 39 | jobs: 40 | build: 41 | name: Build 42 | runs-on: ubuntu-latest 43 | steps: 44 | - name: Checkout code 45 | uses: actions/checkout@v2 46 | - name: Set up JDK 17 47 | uses: actions/setup-java@v4 48 | with: 49 | distribution: 'adopt' 50 | java-version: 17 51 | cache: maven 52 | - name: License header check 53 | run: | 54 | mvn license:remove license:format 55 | if [[ -n $(git status -s) ]]; then 56 | echo 1>&2 'Some files do not have the correct license header:' 57 | git diff --name-only 1>&2 58 | echo 1>&2 'Please update the license headers for these files by running `mvn license:remove license:format`' 59 | exit 1 60 | fi 61 | - name: Build (Maven) 62 | run: mvn -P ci --batch-mode clean package -DskipTests 63 | - name: Unit tests (Maven) 64 | run: mvn -P ci --batch-mode test 65 | - name: Integration tests (Maven) 66 | env: 67 | # Necessary for client builder integration tests that run with 68 | # default application credentials 69 | CREDENTIALS_JSON: ${{ secrets.GCP_CREDENTIALS }} 70 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/creds.json 71 | KCBQ_TEST_KEYFILE: /tmp/creds.json 72 | KCBQ_TEST_KEY_SOURCE: FILE 73 | KCBQ_TEST_PROJECT: ${{ secrets.KCBQ_TEST_PROJECT }} 74 | KCBQ_TEST_DATASET: ${{ secrets.KCBQ_TEST_DATASET }} 75 | KCBQ_TEST_BUCKET: ${{ secrets.KCBQ_TEST_BUCKET }} 76 | run: | 77 | echo "$CREDENTIALS_JSON" > /tmp/creds.json 78 | export KCBQ_TEST_TABLE_SUFFIX=_$(date +%s)_$RANDOM 79 | mvn -P ci -Dskip.unit.tests=true verify 80 | - name: Upload integration test results (Maven) 81 | if: always() 82 | uses: actions/upload-artifact@v4 83 | with: 84 | path: kcbq-connector/target/failsafe-reports/* 85 | name: integration-test-results 86 | -------------------------------------------------------------------------------- /.github/workflows/release_pr_workflow.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # The workflow to create PRs with release commits. 25 | name: Create release PR 26 | on: 27 | workflow_dispatch: 28 | inputs: 29 | release_version: 30 | description: "Release version '0.1.2' (without 'v')" 31 | required: true 32 | snapshot_version: 33 | description: "Snapshot version '0.2.0-SNAPSHOT' (without 'v')" 34 | required: true 35 | 36 | permissions: 37 | contents: write 38 | pull-requests: write 39 | issues: write 40 | 41 | jobs: 42 | create_release_pr: 43 | name: Create release PR (job) 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Check versions 47 | run: | 48 | echo "Checking release version..." 49 | if echo ${{ github.event.inputs.release_version }} | '^[0-9]\+\.[0-9]\+\.[0-9]\+\(-\(alpha\|beta\|rc[0-9]\+\)\)\?$' > /dev/null; then 50 | echo "Release version is invalid" 51 | exit 1 52 | fi 53 | 54 | echo "Checking snapshot version..." 55 | if echo ${{ github.event.inputs.snapshot_version }} | grep --invert-match '^[0-9]\+\.[0-9]\+\.[0-9]\+-SNAPSHOT$' > /dev/null; then 56 | echo "Snapshot version is invalid" 57 | exit 1 58 | fi 59 | 60 | - name: Checkout main 61 | uses: actions/checkout@v2 62 | with: 63 | ref: main 64 | fetch-depth: 0 65 | 66 | - name: Set up JDK 8 67 | uses: actions/setup-java@v4 68 | with: 69 | distribution: 'adopt' 70 | java-version: 8 71 | cache: maven 72 | 73 | - name: Create release commits 74 | run: | 75 | git config --local user.name "GitHub Action" 76 | git config --local user.email "action@github.com" 77 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.release_version }} 78 | git add pom.xml kcbq-api/pom.xml kcbq-connector/pom.xml 79 | git commit -m "Release version ${{ github.event.inputs.release_version }}" 80 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.snapshot_version }} 81 | git add pom.xml kcbq-api/pom.xml kcbq-connector/pom.xml 82 | git commit -m "Bump version to ${{ github.event.inputs.snapshot_version }}" 83 | 84 | - name: Create Pull Request 85 | uses: peter-evans/create-pull-request@v3 86 | with: 87 | branch: release-${{ github.event.inputs.release_version }} 88 | delete-branch: true 89 | draft: true 90 | title: Release version ${{ github.event.inputs.release_version }} 91 | body: | 92 | Proposed changelog: 93 | - *fill in* 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | build/ 3 | target/ 4 | *.class 5 | *.jar 6 | *.tar 7 | *.zip 8 | 9 | .gradle 10 | 11 | # Intellij 12 | .idea 13 | *.iml 14 | *.iws 15 | *.ipr 16 | .DS_STORE 17 | 18 | # Eclipse 19 | .classpath 20 | .project 21 | .settings 22 | .metadata 23 | 24 | key.json 25 | 26 | test.conf 27 | kcbq-connector/src/test/resources/test.properties 28 | kcbq-connector/test/docker/connect/properties/ 29 | kcbq-connector/out/ 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect BigQuery Connector 2 | 3 | This is an implementation of a sink connector from [Apache Kafka] to [Google BigQuery], built on top 4 | of [Apache Kafka Connect]. 5 | 6 | ## History 7 | 8 | This connector was [originally developed by WePay](https://github.com/wepay/kafka-connect-bigquery). 9 | In late 2020 the project moved to [Confluent](https://github.com/confluentinc/kafka-connect-bigquery), 10 | with both companies taking on maintenance duties. 11 | In 2024, Aiven created [its own fork](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/) 12 | based off the Confluent project in order to continue maintaining an open source, Apache 2-licensed 13 | version of the connector. 14 | 15 | ## Configuration 16 | 17 | ### Sample 18 | 19 | An example connector configuration, that reads records from Kafka with 20 | JSON-encoded values and writes their values to BigQuery: 21 | 22 | ```json 23 | { 24 | "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector", 25 | "topics": "users, clicks, payments", 26 | "tasks.max": "3", 27 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 28 | 29 | "project": "kafka-ingest-testing", 30 | "defaultDataset": "kcbq-example", 31 | "keyfile": "/tmp/bigquery-credentials.json" 32 | } 33 | ``` 34 | 35 | ### Complete docs 36 | See [here](docs/sink-connector-config-options.rst) for a list of the connector's 37 | configuration properties. 38 | 39 | ## Download 40 | 41 | Releases are available in the GitHub release tab. 42 | 46 | 47 | [Apache Kafka Connect]: https://kafka.apache.org/documentation.html#connect 48 | [Apache Kafka]: http://kafka.apache.org 49 | [Google BigQuery]: https://cloud.google.com/bigquery/ 50 | [Kafka]: http://kafka.apache.org 51 | -------------------------------------------------------------------------------- /config/checkstyle/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /config/copyright/custom-header-styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 27 | 28 | /* 29 | * 30 | */EOL 31 | (\s|\t)*/\*.*$ 32 | .*\*/(\s|\t)*$ 33 | false 34 | true 35 | false 36 | 37 | 38 | /* 39 | * 40 | */ 41 | #!.* 42 | (\s|\t)*/\*.* 43 | .*\*/(\s|\t)*$ 44 | false 45 | true 46 | false 47 | 48 | -------------------------------------------------------------------------------- /kcbq-api/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 27 | 4.0.0 28 | 29 | 30 | com.wepay.kcbq 31 | kcbq-parent 32 | 2.6.1-SNAPSHOT 33 | .. 34 | 35 | 36 | kcbq-api 37 | kafka-connect-bigquery-api 38 | 39 | 40 | ${project.parent.basedir} 41 | 42 | 43 | 44 | 45 | org.apache.kafka 46 | connect-api 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | org.apache.maven.plugins 55 | maven-compiler-plugin 56 | 57 | 58 | org.apache.maven.plugins 59 | maven-checkstyle-plugin 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.api; 25 | 26 | 27 | /** 28 | * Enum class for Kafka schema or record type, either value or key. 29 | */ 30 | public enum KafkaSchemaRecordType { 31 | 32 | VALUE("value"), 33 | KEY("key"); 34 | 35 | private final String str; 36 | 37 | KafkaSchemaRecordType(String str) { 38 | this.str = str; 39 | } 40 | 41 | public String toString() { 42 | return this.str; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.api; 25 | 26 | import java.util.Map; 27 | import org.apache.kafka.connect.data.Schema; 28 | import org.apache.kafka.connect.sink.SinkRecord; 29 | 30 | /** 31 | * Interface for retrieving the most up-to-date schemas for a given Sink Record. Used in 32 | * automatic table creation and schema updates. 33 | */ 34 | public interface SchemaRetriever { 35 | /** 36 | * Called with all of the configuration settings passed to the connector via its 37 | * {@link org.apache.kafka.connect.sink.SinkConnector#start(Map)} method. 38 | * 39 | * @param properties The configuration settings of the connector. 40 | */ 41 | void configure(Map properties); 42 | 43 | /** 44 | * Retrieve the most current key schema for the given sink record. 45 | * 46 | * @param record The record to retrieve a key schema for. 47 | * @return The key Schema for the given record. 48 | */ 49 | Schema retrieveKeySchema(SinkRecord record); 50 | 51 | /** 52 | * Retrieve the most current value schema for the given sink record. 53 | * 54 | * @param record The record to retrieve a value schema for. 55 | * @return The value Schema for the given record. 56 | */ 57 | Schema retrieveValueSchema(SinkRecord record); 58 | } 59 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/assembly/release-tar.xml: -------------------------------------------------------------------------------- 1 | 25 | 28 | release-tar 29 | 30 | tar 31 | 32 | false 33 | 34 | 35 | / 36 | true 37 | false 38 | runtime 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/assembly/release-zip.xml: -------------------------------------------------------------------------------- 1 | 25 | 28 | release-zip 29 | 30 | zip 31 | 32 | false 33 | 34 | 35 | / 36 | true 37 | false 38 | runtime 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 28 | import com.wepay.kafka.connect.bigquery.utils.Version; 29 | import java.util.ArrayList; 30 | import java.util.HashMap; 31 | import java.util.List; 32 | import java.util.Map; 33 | import org.apache.kafka.common.config.Config; 34 | import org.apache.kafka.common.config.ConfigDef; 35 | import org.apache.kafka.common.config.ConfigValue; 36 | import org.apache.kafka.connect.connector.Task; 37 | import org.apache.kafka.connect.sink.SinkConnector; 38 | import org.slf4j.Logger; 39 | import org.slf4j.LoggerFactory; 40 | 41 | /** 42 | * A {@link SinkConnector} used to delegate BigQuery data writes to 43 | * {@link org.apache.kafka.connect.sink.SinkTask SinkTasks}. 44 | */ 45 | public class BigQuerySinkConnector extends SinkConnector { 46 | 47 | private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkConnector.class); 48 | private BigQuerySinkConfig config; 49 | private Map configProperties; 50 | 51 | @Override 52 | public ConfigDef config() { 53 | logger.trace("connector.config()"); 54 | return BigQuerySinkConfig.getConfig(); 55 | } 56 | 57 | @Override 58 | public Config validate(Map properties) { 59 | List singlePropertyValidations = config().validate(properties); 60 | // If any of our properties had malformed syntax or failed a validation to ensure, e.g., that it fell within an 61 | // acceptable numeric range, we only report those errors since they prevent us from being able to construct a 62 | // valid BigQuerySinkConfig instance 63 | if (singlePropertyValidations.stream().anyMatch(v -> !v.errorMessages().isEmpty())) { 64 | return new Config(singlePropertyValidations); 65 | } 66 | return new BigQuerySinkConfig(properties).validate(); 67 | } 68 | 69 | @Override 70 | public void start(Map properties) { 71 | logger.trace("connector.start()"); 72 | configProperties = properties; 73 | config = new BigQuerySinkConfig(properties); 74 | } 75 | 76 | @Override 77 | public void stop() { 78 | logger.trace("connector.stop()"); 79 | } 80 | 81 | @Override 82 | public Class taskClass() { 83 | logger.trace("connector.taskClass()"); 84 | return BigQuerySinkTask.class; 85 | } 86 | 87 | @Override 88 | public List> taskConfigs(int maxTasks) { 89 | logger.trace("connector.taskConfigs()"); 90 | List> taskConfigs = new ArrayList<>(); 91 | for (int i = 0; i < maxTasks; i++) { 92 | HashMap taskConfig = new HashMap<>(configProperties); 93 | if (i == 0 && !config.getList(BigQuerySinkConfig.ENABLE_BATCH_CONFIG).isEmpty()) { 94 | // if batch loading is enabled, configure first task to do the GCS -> BQ loading 95 | taskConfig.put(BigQuerySinkTaskConfig.GCS_BQ_TASK_CONFIG, "true"); 96 | } 97 | taskConfig.put(BigQuerySinkTaskConfig.TASK_ID_CONFIG, Integer.toString(i)); 98 | taskConfigs.add(taskConfig); 99 | } 100 | return taskConfigs; 101 | } 102 | 103 | @Override 104 | public String version() { 105 | String version = Version.version(); 106 | logger.trace("connector.version() = {}", version); 107 | return version; 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/ErrantRecordHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.google.cloud.bigquery.BigQueryError; 27 | import java.util.Arrays; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.Set; 31 | import org.apache.kafka.connect.sink.ErrantRecordReporter; 32 | import org.apache.kafka.connect.sink.SinkRecord; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class ErrantRecordHandler { 37 | private static final Logger logger = LoggerFactory.getLogger(ErrantRecordHandler.class); 38 | private static final List allowedBigQueryErrorReason = Arrays.asList("invalid"); 39 | private final ErrantRecordReporter errantRecordReporter; 40 | 41 | public ErrantRecordHandler(ErrantRecordReporter errantRecordReporter) { 42 | this.errantRecordReporter = errantRecordReporter; 43 | } 44 | 45 | public void reportErrantRecords(Set records, Exception e) { 46 | if (errantRecordReporter != null) { 47 | logger.debug("Sending {} records to DLQ", records.size()); 48 | for (SinkRecord r : records) { 49 | // Reporting records in async mode 50 | errantRecordReporter.report(r, e); 51 | } 52 | } else { 53 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null"); 54 | } 55 | } 56 | 57 | public void reportErrantRecords(Map rowToError) { 58 | if (errantRecordReporter != null) { 59 | logger.debug("Sending {} records to DLQ", rowToError.size()); 60 | for (Map.Entry rowToErrorEntry : rowToError.entrySet()) { 61 | // Reporting records in async mode 62 | errantRecordReporter.report(rowToErrorEntry.getKey(), rowToErrorEntry.getValue()); 63 | } 64 | } else { 65 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null"); 66 | } 67 | } 68 | 69 | public ErrantRecordReporter getErrantRecordReporter() { 70 | return errantRecordReporter; 71 | } 72 | 73 | public boolean isErrorReasonAllowed(List bqErrorList) { 74 | for (BigQueryError bqError : bqErrorList) { 75 | boolean errorMatch = false; 76 | String bqErrorReason = bqError.getReason(); 77 | for (String allowedBqErrorReason : allowedBigQueryErrorReason) { 78 | if (bqErrorReason.equalsIgnoreCase(allowedBqErrorReason)) { 79 | errorMatch = true; 80 | break; 81 | } 82 | } 83 | if (!errorMatch) { 84 | return false; 85 | } 86 | } 87 | return true; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import java.util.Map; 27 | import org.apache.kafka.common.config.ConfigDef; 28 | 29 | /** 30 | * Class for task-specific configuration properties. 31 | */ 32 | public class BigQuerySinkTaskConfig extends BigQuerySinkConfig { 33 | 34 | public static final String GCS_BQ_TASK_CONFIG = "GCSBQTask"; 35 | public static final String TASK_ID_CONFIG = "taskId"; 36 | public static final ConfigDef.Importance TASK_ID_IMPORTANCE = ConfigDef.Importance.LOW; 37 | private static final ConfigDef.Type GCS_BQ_TASK_TYPE = ConfigDef.Type.BOOLEAN; 38 | private static final boolean GCS_BQ_TASK_DEFAULT = false; 39 | private static final ConfigDef.Importance GCS_BQ_TASK_IMPORTANCE = ConfigDef.Importance.LOW; 40 | private static final ConfigDef.Type TASK_ID_TYPE = ConfigDef.Type.INT; 41 | 42 | /** 43 | * @param properties A Map detailing configuration properties and their respective values. 44 | */ 45 | public BigQuerySinkTaskConfig(Map properties) { 46 | super(config(), properties); 47 | } 48 | 49 | /** 50 | * Return a ConfigDef object used to define this config's fields. 51 | * 52 | * @return A ConfigDef object used to define this config's fields. 53 | */ 54 | public static ConfigDef config() { 55 | return BigQuerySinkConfig.getConfig() 56 | .defineInternal( 57 | GCS_BQ_TASK_CONFIG, 58 | GCS_BQ_TASK_TYPE, 59 | GCS_BQ_TASK_DEFAULT, 60 | GCS_BQ_TASK_IMPORTANCE 61 | ).defineInternal( 62 | TASK_ID_CONFIG, 63 | TASK_ID_TYPE, 64 | ConfigDef.NO_DEFAULT_VALUE, 65 | TASK_ID_IMPORTANCE 66 | ); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.AUTO_CREATE_BUCKET_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; 29 | 30 | import com.google.cloud.storage.Bucket; 31 | import com.google.cloud.storage.Storage; 32 | import com.google.common.annotations.VisibleForTesting; 33 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 34 | import java.util.Arrays; 35 | import java.util.Collection; 36 | import java.util.Collections; 37 | import java.util.List; 38 | import java.util.Optional; 39 | 40 | public class GcsBucketValidator extends MultiPropertyValidator { 41 | 42 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 43 | ENABLE_BATCH_CONFIG, AUTO_CREATE_BUCKET_CONFIG 44 | )); 45 | 46 | public GcsBucketValidator() { 47 | super(GCS_BUCKET_NAME_CONFIG); 48 | } 49 | 50 | @Override 51 | protected Collection dependents() { 52 | return DEPENDENTS; 53 | } 54 | 55 | @Override 56 | protected Optional doValidate(BigQuerySinkConfig config) { 57 | Storage gcs; 58 | try { 59 | gcs = new GcpClientBuilder.GcsBuilder() 60 | .withConfig(config) 61 | .build(); 62 | } catch (RuntimeException e) { 63 | return Optional.of(String.format( 64 | "Failed to construct GCS client%s", 65 | e.getMessage() != null ? ": " + e.getMessage() : "" 66 | )); 67 | } 68 | return doValidate(gcs, config); 69 | } 70 | 71 | @VisibleForTesting 72 | Optional doValidate(Storage gcs, BigQuerySinkConfig config) { 73 | List batchLoadedTopics = config.getList(ENABLE_BATCH_CONFIG); 74 | if (batchLoadedTopics == null || batchLoadedTopics.isEmpty()) { 75 | // Batch loading is disabled; no need to validate the GCS bucket 76 | return Optional.empty(); 77 | } 78 | 79 | String bucketName = config.getString(GCS_BUCKET_NAME_CONFIG); 80 | if (bucketName == null || bucketName.trim().isEmpty()) { 81 | return Optional.of("When GCS batch loading is enabled, a bucket must be provided"); 82 | } 83 | 84 | if (config.getBoolean(AUTO_CREATE_BUCKET_CONFIG)) { 85 | return Optional.empty(); 86 | } 87 | 88 | Bucket bucket = gcs.get(bucketName); 89 | if (bucket == null) { 90 | return Optional.of(String.format( 91 | "Automatic bucket creation is disabled but the GCS bucket %s does not exist. " 92 | + "Please either manually create this table before restarting the connector or enable automatic bucket creation " 93 | + "by the connector", 94 | bucketName 95 | )); 96 | } 97 | 98 | return Optional.empty(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import java.util.Collection; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.Objects; 30 | import java.util.Optional; 31 | import org.apache.kafka.common.config.ConfigValue; 32 | 33 | public abstract class MultiPropertyValidator { 34 | 35 | private final String propertyName; 36 | 37 | protected MultiPropertyValidator(String propertyName) { 38 | this.propertyName = propertyName; 39 | } 40 | 41 | public String propertyName() { 42 | return propertyName; 43 | } 44 | 45 | public Optional validate(ConfigValue value, ConfigT config, Map valuesByName) { 46 | // Only perform follow-up validation if the property doesn't already have an error associated with it 47 | if (!value.errorMessages().isEmpty()) { 48 | return Optional.empty(); 49 | } 50 | 51 | boolean dependentsAreValid = dependents().stream() 52 | .map(valuesByName::get) 53 | .filter(Objects::nonNull) 54 | .map(ConfigValue::errorMessages) 55 | .allMatch(List::isEmpty); 56 | // Also ensure that all of the other properties that the validation for this one depends on don't already have errors 57 | if (!dependentsAreValid) { 58 | return Optional.empty(); 59 | } 60 | 61 | try { 62 | return doValidate(config); 63 | } catch (RuntimeException e) { 64 | return Optional.of( 65 | "An unexpected error occurred during validation" 66 | + (e.getMessage() != null ? ": " + e.getMessage() : "") 67 | ); 68 | } 69 | } 70 | 71 | protected abstract Collection dependents(); 72 | 73 | protected abstract Optional doValidate(ConfigT config); 74 | } 75 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG; 28 | 29 | import java.util.Arrays; 30 | import java.util.Collection; 31 | import java.util.Collections; 32 | import java.util.Optional; 33 | 34 | public class PartitioningModeValidator extends MultiPropertyValidator { 35 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 36 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG 37 | )); 38 | 39 | public PartitioningModeValidator() { 40 | super(BIGQUERY_PARTITION_DECORATOR_CONFIG); 41 | } 42 | 43 | @Override 44 | protected Collection dependents() { 45 | return DEPENDENTS; 46 | } 47 | 48 | @Override 49 | protected Optional doValidate(BigQuerySinkConfig config) { 50 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) { 51 | return Optional.empty(); 52 | } 53 | 54 | if (config.getTimestampPartitionFieldName().isPresent()) { 55 | return Optional.of(String.format("Only one partitioning mode may be specified for the connector. " 56 | + "Use either %s OR %s.", 57 | BIGQUERY_PARTITION_DECORATOR_CONFIG, 58 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG 59 | )); 60 | } else { 61 | return Optional.empty(); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningTypeValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TIME_PARTITIONING_TYPE_CONFIG; 29 | 30 | import com.google.cloud.bigquery.TimePartitioning; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import java.util.Collections; 34 | import java.util.Optional; 35 | 36 | public class PartitioningTypeValidator extends MultiPropertyValidator { 37 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 38 | BIGQUERY_PARTITION_DECORATOR_CONFIG, TABLE_CREATE_CONFIG 39 | )); 40 | 41 | public PartitioningTypeValidator() { 42 | super(TIME_PARTITIONING_TYPE_CONFIG); 43 | } 44 | 45 | @Override 46 | protected Collection dependents() { 47 | return DEPENDENTS; 48 | } 49 | 50 | @Override 51 | protected Optional doValidate(BigQuerySinkConfig config) { 52 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG) || !config.getBoolean(TABLE_CREATE_CONFIG)) { 53 | return Optional.empty(); 54 | } 55 | 56 | Optional timePartitioningType = config.getTimePartitioningType(); 57 | 58 | if (!Optional.of(TimePartitioning.Type.DAY).equals(timePartitioningType)) { 59 | return Optional.of( 60 | "Tables must be partitioned by DAY when using partition decorator syntax. " 61 | + "Either configure the connector with the DAY time partitioning type, " 62 | + "disable automatic table creation, or disable partition decorator syntax." 63 | ); 64 | } 65 | 66 | return Optional.empty(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/StorageWriteApiValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.DELETE_ENABLED_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; 29 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_MODE_CONFIG; 30 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.UPSERT_ENABLED_CONFIG; 31 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG; 32 | 33 | import java.util.Arrays; 34 | import java.util.Collection; 35 | import java.util.Collections; 36 | import java.util.Optional; 37 | 38 | public class StorageWriteApiValidator extends MultiPropertyValidator { 39 | 40 | public static final String upsertNotSupportedError = "Upsert mode is not supported with Storage Write API." 41 | + " Either disable Upsert mode or disable Storage Write API"; 42 | public static final String legacyBatchNotSupportedError = "Legacy Batch mode is not supported with Storage Write API." 43 | + " Either disable Legacy Batch mode or disable Storage Write API"; 44 | public static final String newBatchNotSupportedError = "Storage Write Api Batch load is supported only when useStorageWriteApi is " 45 | + "enabled. Either disable batch mode or enable Storage Write API"; 46 | public static final String deleteNotSupportedError = "Delete mode is not supported with Storage Write API. Either disable Delete mode " 47 | + "or disable Storage Write API"; 48 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 49 | UPSERT_ENABLED_CONFIG, DELETE_ENABLED_CONFIG, ENABLE_BATCH_CONFIG 50 | )); 51 | 52 | protected StorageWriteApiValidator(String propertyName) { 53 | super(propertyName); 54 | } 55 | 56 | protected StorageWriteApiValidator() { 57 | super(USE_STORAGE_WRITE_API_CONFIG); 58 | } 59 | 60 | @Override 61 | protected Collection dependents() { 62 | return DEPENDENTS; 63 | } 64 | 65 | @Override 66 | protected Optional doValidate(BigQuerySinkConfig config) { 67 | if (!config.getBoolean(USE_STORAGE_WRITE_API_CONFIG)) { 68 | if (config.getBoolean(ENABLE_BATCH_MODE_CONFIG)) { 69 | return Optional.of(newBatchNotSupportedError); 70 | } 71 | //No legacy modes validation needed if not using new api 72 | return Optional.empty(); 73 | } 74 | if (config.getBoolean(UPSERT_ENABLED_CONFIG)) { 75 | return Optional.of(upsertNotSupportedError); 76 | } else if (config.getBoolean(DELETE_ENABLED_CONFIG)) { 77 | return Optional.of(deleteNotSupportedError); 78 | } else if (!config.getList(ENABLE_BATCH_CONFIG).isEmpty()) { 79 | return Optional.of(legacyBatchNotSupportedError); 80 | } else if (config.originals().containsKey(BIGQUERY_PARTITION_DECORATOR_CONFIG) 81 | && config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG) 82 | ) { 83 | // Only report an error if the user explicitly requested partition decorator syntax; 84 | // if they didn't, then we can silently disable it when using the Storage Write API 85 | // TODO: Recommend alternatives to users 86 | return Optional.of("Partition decorator syntax cannot be used with the Storage Write API"); 87 | } 88 | 89 | return Optional.empty(); 90 | } 91 | 92 | static class StorageWriteApiBatchValidator extends StorageWriteApiValidator { 93 | StorageWriteApiBatchValidator() { 94 | super(ENABLE_BATCH_MODE_CONFIG); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/UpsertDeleteValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.DELETE_ENABLED_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KAFKA_KEY_FIELD_NAME_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_INTERVAL_MS_CONFIG; 29 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_RECORDS_THRESHOLD_CONFIG; 30 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.UPSERT_ENABLED_CONFIG; 31 | 32 | import java.util.Arrays; 33 | import java.util.Collection; 34 | import java.util.Collections; 35 | import java.util.Optional; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | public abstract class UpsertDeleteValidator extends MultiPropertyValidator { 40 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 41 | MERGE_INTERVAL_MS_CONFIG, MERGE_RECORDS_THRESHOLD_CONFIG, KAFKA_KEY_FIELD_NAME_CONFIG 42 | )); 43 | private static final Logger logger = LoggerFactory.getLogger(UpsertDeleteValidator.class); 44 | 45 | private UpsertDeleteValidator(String propertyName) { 46 | super(propertyName); 47 | } 48 | 49 | @Override 50 | protected Collection dependents() { 51 | return DEPENDENTS; 52 | } 53 | 54 | @Override 55 | protected Optional doValidate(BigQuerySinkConfig config) { 56 | if (!modeEnabled(config)) { 57 | return Optional.empty(); 58 | } 59 | 60 | long mergeInterval = config.getLong(MERGE_INTERVAL_MS_CONFIG); 61 | long mergeRecordsThreshold = config.getLong(MERGE_RECORDS_THRESHOLD_CONFIG); 62 | 63 | if (mergeInterval == -1 && mergeRecordsThreshold == -1) { 64 | return Optional.of(String.format( 65 | "%s and %s cannot both be -1", 66 | MERGE_INTERVAL_MS_CONFIG, 67 | MERGE_RECORDS_THRESHOLD_CONFIG 68 | )); 69 | } 70 | 71 | if (mergeInterval != -1 && mergeInterval < 10_000L) { 72 | logger.warn(String.format( 73 | "%s should not be set to less than 10 seconds. A validation would be introduced in a future release to " 74 | + "this effect.", 75 | MERGE_INTERVAL_MS_CONFIG 76 | )); 77 | } 78 | 79 | if (!config.getKafkaKeyFieldName().isPresent()) { 80 | return Optional.of(String.format( 81 | "%s must be specified when %s is set to true", 82 | KAFKA_KEY_FIELD_NAME_CONFIG, 83 | propertyName() 84 | )); 85 | } 86 | 87 | return Optional.empty(); 88 | } 89 | 90 | /** 91 | * @param config the user-provided configuration 92 | * @return whether the write mode for the validator (i.e., either upsert or delete) is enabled 93 | */ 94 | protected abstract boolean modeEnabled(BigQuerySinkConfig config); 95 | 96 | public static class UpsertValidator extends UpsertDeleteValidator { 97 | public UpsertValidator() { 98 | super(UPSERT_ENABLED_CONFIG); 99 | } 100 | 101 | @Override 102 | protected boolean modeEnabled(BigQuerySinkConfig config) { 103 | return config.getBoolean(UPSERT_ENABLED_CONFIG); 104 | } 105 | } 106 | 107 | public static class DeleteValidator extends UpsertDeleteValidator { 108 | public DeleteValidator() { 109 | super(DELETE_ENABLED_CONFIG); 110 | } 111 | 112 | @Override 113 | protected boolean modeEnabled(BigQuerySinkConfig config) { 114 | return config.getBoolean(DELETE_ENABLED_CONFIG); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | 27 | import com.google.cloud.bigquery.Field; 28 | import com.google.cloud.bigquery.LegacySQLTypeName; 29 | import java.util.HashMap; 30 | import java.util.Map; 31 | import org.apache.kafka.connect.sink.SinkRecord; 32 | 33 | /** 34 | * Helper class to construct schema and record for Kafka Data Field. 35 | */ 36 | public class KafkaDataBuilder { 37 | 38 | public static final String KAFKA_DATA_TOPIC_FIELD_NAME = "topic"; 39 | public static final String KAFKA_DATA_PARTITION_FIELD_NAME = "partition"; 40 | public static final String KAFKA_DATA_OFFSET_FIELD_NAME = "offset"; 41 | public static final String KAFKA_DATA_INSERT_TIME_FIELD_NAME = "insertTime"; 42 | 43 | /** 44 | * Construct schema for Kafka Data Field 45 | * 46 | * @param kafkaDataFieldName The configured name of Kafka Data Field 47 | * @return Field of Kafka Data, with definitions of kafka topic, partition, offset, and insertTime. 48 | */ 49 | public static Field buildKafkaDataField(String kafkaDataFieldName) { 50 | Field topicField = com.google.cloud.bigquery.Field.of(KAFKA_DATA_TOPIC_FIELD_NAME, LegacySQLTypeName.STRING); 51 | Field partitionField = com.google.cloud.bigquery.Field.of(KAFKA_DATA_PARTITION_FIELD_NAME, LegacySQLTypeName.INTEGER); 52 | Field offsetField = com.google.cloud.bigquery.Field.of(KAFKA_DATA_OFFSET_FIELD_NAME, LegacySQLTypeName.INTEGER); 53 | Field.Builder insertTimeBuilder = com.google.cloud.bigquery.Field.newBuilder( 54 | KAFKA_DATA_INSERT_TIME_FIELD_NAME, LegacySQLTypeName.TIMESTAMP) 55 | .setMode(com.google.cloud.bigquery.Field.Mode.NULLABLE); 56 | Field insertTimeField = insertTimeBuilder.build(); 57 | 58 | return Field.newBuilder(kafkaDataFieldName, LegacySQLTypeName.RECORD, 59 | topicField, partitionField, offsetField, insertTimeField) 60 | .setMode(com.google.cloud.bigquery.Field.Mode.NULLABLE).build(); 61 | } 62 | 63 | /** 64 | * Construct a map of Kafka Data record 65 | * 66 | * @param kafkaConnectRecord Kafka sink record to build kafka data from. 67 | * @return HashMap which contains the values of kafka topic, partition, offset, and insertTime. 68 | */ 69 | public static Map buildKafkaDataRecord(SinkRecord kafkaConnectRecord) { 70 | HashMap kafkaData = new HashMap<>(); 71 | kafkaData.put(KAFKA_DATA_TOPIC_FIELD_NAME, kafkaConnectRecord.topic()); 72 | kafkaData.put(KAFKA_DATA_PARTITION_FIELD_NAME, kafkaConnectRecord.kafkaPartition()); 73 | kafkaData.put(KAFKA_DATA_OFFSET_FIELD_NAME, kafkaConnectRecord.kafkaOffset()); 74 | kafkaData.put(KAFKA_DATA_INSERT_TIME_FIELD_NAME, System.currentTimeMillis() / 1000.0); 75 | return kafkaData; 76 | } 77 | 78 | /** 79 | * Construct a map of Kafka Data record for sending to Storage Write API 80 | * 81 | * @param kafkaConnectRecord Kafka sink record to build kafka data from. 82 | * @return HashMap which contains the values of kafka topic, partition, offset, and insertTime in microseconds. 83 | */ 84 | public static Map buildKafkaDataRecordStorageApi(SinkRecord kafkaConnectRecord) { 85 | Map kafkaData = buildKafkaDataRecord(kafkaConnectRecord); 86 | kafkaData.put(KAFKA_DATA_INSERT_TIME_FIELD_NAME, System.currentTimeMillis() * 1000); 87 | return kafkaData; 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType; 27 | import org.apache.kafka.connect.sink.SinkRecord; 28 | 29 | /** 30 | * Interface for converting from a {@link SinkRecord} to some other kind of record. 31 | * 32 | * @param The type of record to convert to. 33 | */ 34 | public interface RecordConverter { 35 | /** 36 | * @param record The record to convert. 37 | * @param recordType The type of the record to convert, either value or key. 38 | * @return The converted record. 39 | */ 40 | R convertRecord(SinkRecord record, KafkaSchemaRecordType recordType); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | import org.apache.kafka.connect.data.Schema; 27 | 28 | /** 29 | * Interface for converting from a {@link Schema Kafka Connect Schema} to some other kind of schema. 30 | * 31 | * @param The kind of schema to convert to. 32 | */ 33 | public interface SchemaConverter { 34 | /** 35 | * @param schema The schema to convert. 36 | * @return The converted schema. 37 | */ 38 | S convertSchema(Schema schema); 39 | } 40 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConverters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype; 25 | 26 | import com.google.cloud.bigquery.LegacySQLTypeName; 27 | import java.math.BigDecimal; 28 | import org.apache.kafka.connect.data.Date; 29 | import org.apache.kafka.connect.data.Decimal; 30 | import org.apache.kafka.connect.data.Schema; 31 | import org.apache.kafka.connect.data.Time; 32 | import org.apache.kafka.connect.data.Timestamp; 33 | 34 | /** 35 | * Class containing all the Kafka logical type converters. 36 | */ 37 | public class KafkaLogicalConverters { 38 | 39 | static { 40 | LogicalConverterRegistry.register(Date.LOGICAL_NAME, new DateConverter()); 41 | LogicalConverterRegistry.register(Decimal.LOGICAL_NAME, new DecimalConverter()); 42 | LogicalConverterRegistry.register(Timestamp.LOGICAL_NAME, new TimestampConverter()); 43 | LogicalConverterRegistry.register(Time.LOGICAL_NAME, new TimeConverter()); 44 | } 45 | 46 | /** 47 | * Class for converting Kafka date logical types to Bigquery dates. 48 | */ 49 | public static class DateConverter extends LogicalTypeConverter { 50 | /** 51 | * Create a new DateConverter. 52 | */ 53 | public DateConverter() { 54 | super(Date.LOGICAL_NAME, 55 | Schema.Type.INT32, 56 | LegacySQLTypeName.DATE); 57 | } 58 | 59 | @Override 60 | public String convert(Object kafkaConnectObject) { 61 | return getBqDateFormat().format((java.util.Date) kafkaConnectObject); 62 | } 63 | } 64 | 65 | /** 66 | * Class for converting Kafka decimal logical types to Bigquery floating points. 67 | */ 68 | public static class DecimalConverter extends LogicalTypeConverter { 69 | /** 70 | * Create a new DecimalConverter. 71 | */ 72 | public DecimalConverter() { 73 | super(Decimal.LOGICAL_NAME, 74 | Schema.Type.BYTES, 75 | LegacySQLTypeName.FLOAT); 76 | } 77 | 78 | @Override 79 | public BigDecimal convert(Object kafkaConnectObject) { 80 | // cast to get ClassCastException 81 | return (BigDecimal) kafkaConnectObject; 82 | } 83 | } 84 | 85 | /** 86 | * Class for converting Kafka timestamp logical types to BigQuery timestamps. 87 | */ 88 | public static class TimestampConverter extends LogicalTypeConverter { 89 | /** 90 | * Create a new TimestampConverter. 91 | */ 92 | public TimestampConverter() { 93 | super(Timestamp.LOGICAL_NAME, 94 | Schema.Type.INT64, 95 | LegacySQLTypeName.TIMESTAMP); 96 | } 97 | 98 | @Override 99 | public String convert(Object kafkaConnectObject) { 100 | return getBqTimestampFormat().format((java.util.Date) kafkaConnectObject); 101 | } 102 | } 103 | 104 | 105 | /** 106 | * Class for converting Kafka time logical types to BigQuery time types. 107 | */ 108 | public static class TimeConverter extends LogicalTypeConverter { 109 | /** 110 | * Create a new TimestampConverter. 111 | */ 112 | public TimeConverter() { 113 | super(Time.LOGICAL_NAME, 114 | Schema.Type.INT32, 115 | LegacySQLTypeName.TIME); 116 | } 117 | 118 | @Override 119 | public String convert(Object kafkaConnectObject) { 120 | return getBqTimeFormat().format((java.util.Date) kafkaConnectObject); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype; 25 | 26 | import java.util.Map; 27 | import java.util.concurrent.ConcurrentHashMap; 28 | 29 | /** 30 | * Registry for finding and accessing {@link LogicalTypeConverter}s. 31 | */ 32 | public class LogicalConverterRegistry { 33 | 34 | private static Map converterMap = new ConcurrentHashMap<>(); 35 | 36 | public static void register(String logicalTypeName, LogicalTypeConverter converter) { 37 | converterMap.put(logicalTypeName, converter); 38 | } 39 | 40 | public static LogicalTypeConverter getConverter(String logicalTypeName) { 41 | return converterMap.get(logicalTypeName); 42 | } 43 | 44 | public static boolean isRegisteredLogicalType(String typeName) { 45 | return typeName != null && converterMap.containsKey(typeName); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalTypeConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype; 25 | 26 | import com.google.cloud.bigquery.LegacySQLTypeName; 27 | import com.wepay.kafka.connect.bigquery.exception.ConversionConnectException; 28 | import java.text.SimpleDateFormat; 29 | import java.util.TimeZone; 30 | import org.apache.kafka.connect.data.Schema; 31 | 32 | /** 33 | * Abstract class for logical type converters. 34 | * Contains logic for both schema and record conversions. 35 | */ 36 | public abstract class LogicalTypeConverter { 37 | 38 | // BigQuery uses UTC timezone by default 39 | protected static final TimeZone utcTimeZone = TimeZone.getTimeZone("UTC"); 40 | 41 | private final String logicalName; 42 | private final Schema.Type encodingType; 43 | private final LegacySQLTypeName bqSchemaType; 44 | 45 | /** 46 | * Create a new LogicalConverter. 47 | * 48 | * @param logicalName The name of the logical type. 49 | * @param encodingType The encoding type of the logical type. 50 | * @param bqSchemaType The corresponding BigQuery Schema type of the logical type. 51 | */ 52 | public LogicalTypeConverter(String logicalName, 53 | Schema.Type encodingType, 54 | LegacySQLTypeName bqSchemaType) { 55 | this.logicalName = logicalName; 56 | this.encodingType = encodingType; 57 | this.bqSchemaType = bqSchemaType; 58 | } 59 | 60 | protected static SimpleDateFormat getBqTimestampFormat() { 61 | SimpleDateFormat bqTimestampFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 62 | bqTimestampFormat.setTimeZone(utcTimeZone); 63 | return bqTimestampFormat; 64 | } 65 | 66 | protected static SimpleDateFormat getBqDateFormat() { 67 | SimpleDateFormat bqDateFormat = new SimpleDateFormat("yyyy-MM-dd"); 68 | bqDateFormat.setTimeZone(utcTimeZone); 69 | return bqDateFormat; 70 | } 71 | 72 | protected static SimpleDateFormat getBqTimeFormat() { 73 | SimpleDateFormat bqTimeFormat = new SimpleDateFormat("HH:mm:ss.SSS"); 74 | bqTimeFormat.setTimeZone(utcTimeZone); 75 | return bqTimeFormat; 76 | } 77 | 78 | /** 79 | * @param encodingType the encoding type to check. 80 | * @throws ConversionConnectException if the given schema encoding type is not the same as the 81 | * expected encoding type. 82 | */ 83 | public void checkEncodingType(Schema.Type encodingType) throws ConversionConnectException { 84 | if (encodingType != this.encodingType) { 85 | throw new ConversionConnectException( 86 | "Logical Type " + logicalName + " must be encoded as " + this.encodingType + "; " 87 | + "instead, found " + encodingType 88 | ); 89 | } 90 | } 91 | 92 | public LegacySQLTypeName getBqSchemaType() { 93 | return bqSchemaType; 94 | } 95 | 96 | /** 97 | * Convert the given KafkaConnect Record Object to a BigQuery Record Object. 98 | * 99 | * @param kafkaConnectObject the kafkaConnectObject 100 | * @return the converted Object 101 | */ 102 | public abstract Object convert(Object kafkaConnectObject); 103 | 104 | } 105 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import com.google.cloud.bigquery.BigQueryError; 27 | import java.util.List; 28 | import java.util.Map; 29 | import org.apache.kafka.connect.errors.ConnectException; 30 | 31 | /** 32 | * Class for exceptions that occur while interacting with BigQuery, such as login failures, schema 33 | * update failures, and table insertion failures. 34 | */ 35 | public class BigQueryConnectException extends ConnectException { 36 | public BigQueryConnectException(String msg) { 37 | super(msg); 38 | } 39 | 40 | public BigQueryConnectException(String msg, Throwable thr) { 41 | super(msg, thr); 42 | } 43 | 44 | public BigQueryConnectException(Throwable thr) { 45 | super(thr); 46 | } 47 | 48 | public BigQueryConnectException(String tableInfo, Map> errors) { 49 | super(formatInsertAllErrors(tableInfo, errors)); 50 | } 51 | 52 | private static String formatInsertAllErrors(String tableInfo, Map> errorsMap) { 53 | StringBuilder messageBuilder = new StringBuilder(); 54 | messageBuilder.append(String.format("table: %s insertion failed for the following rows:", tableInfo)); 55 | for (Map.Entry> errorsEntry : errorsMap.entrySet()) { 56 | for (BigQueryError error : errorsEntry.getValue()) { 57 | messageBuilder.append(String.format( 58 | "%n\t[row index %d] (location %s, reason: %s): %s", 59 | errorsEntry.getKey(), 60 | error.getLocation(), 61 | error.getReason(), 62 | error.getMessage() 63 | )); 64 | } 65 | } 66 | return messageBuilder.toString(); 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return getCause() != null 72 | ? super.toString() + "\nCaused by: " + getCause().getLocalizedMessage() 73 | : super.toString(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryStorageWriteApiConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | 27 | import com.google.cloud.bigquery.storage.v1.RowError; 28 | import java.util.List; 29 | import java.util.Map; 30 | import org.apache.kafka.connect.errors.ConnectException; 31 | 32 | /** 33 | * Exception Class for exceptions that occur while interacting with BigQuery Storage Write API, such as login failures, schema 34 | * update failures, and table insertion failures. 35 | */ 36 | public class BigQueryStorageWriteApiConnectException extends ConnectException { 37 | 38 | public BigQueryStorageWriteApiConnectException(String message) { 39 | super(message); 40 | } 41 | 42 | public BigQueryStorageWriteApiConnectException(String message, Throwable error) { 43 | super(message, error); 44 | } 45 | 46 | public BigQueryStorageWriteApiConnectException(String tableName, List errors) { 47 | super(formatRowErrors(tableName, errors)); 48 | } 49 | 50 | public BigQueryStorageWriteApiConnectException(String tableName, Map errors) { 51 | super(formatRowErrors(tableName, errors)); 52 | } 53 | 54 | private static String formatRowErrors(String tableName, List errors) { 55 | StringBuilder builder = new StringBuilder(); 56 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName)); 57 | for (RowError error : errors) { 58 | builder.append(String.format( 59 | "\n [row index %d] (Failure reason : %s) ", 60 | error.getIndex(), 61 | error.getMessage()) 62 | ); 63 | } 64 | return builder.toString(); 65 | } 66 | 67 | private static String formatRowErrors(String tableName, Map errors) { 68 | StringBuilder builder = new StringBuilder(); 69 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName)); 70 | for (Map.Entry error : errors.entrySet()) { 71 | builder.append(String.format( 72 | "\n [row index %d] (Failure reason : %s) ", 73 | error.getKey(), 74 | error.getValue() 75 | )); 76 | } 77 | return builder.toString(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | /** 29 | * Class for exceptions that occur while converting between Kafka Connect and BigQuery schemas and 30 | * records. 31 | */ 32 | public class ConversionConnectException extends ConnectException { 33 | public ConversionConnectException(String msg) { 34 | super(msg); 35 | } 36 | 37 | public ConversionConnectException(String msg, Throwable thr) { 38 | super(msg, thr); 39 | } 40 | 41 | public ConversionConnectException(Throwable thr) { 42 | super(thr); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ExpectedInterruptException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | public class ExpectedInterruptException extends ConnectException { 29 | 30 | public ExpectedInterruptException(String message) { 31 | super(message); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GcsConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | /** 29 | * Class for exceptions that occur while interacting with Google Cloud Storage, such as login 30 | * failures. 31 | */ 32 | public class GcsConnectException extends ConnectException { 33 | public GcsConnectException(String msg) { 34 | super(msg); 35 | } 36 | 37 | public GcsConnectException(String msg, Throwable thr) { 38 | super(msg, thr); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/IdentitySchemaRetriever.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.retrieve; 25 | 26 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; 27 | import java.util.Map; 28 | import org.apache.kafka.connect.data.Schema; 29 | import org.apache.kafka.connect.sink.SinkRecord; 30 | 31 | /** 32 | * Fetches the key Schema and value Schema from a Sink Record 33 | */ 34 | public class IdentitySchemaRetriever implements SchemaRetriever { 35 | 36 | @Override 37 | public void configure(Map properties) { 38 | } 39 | 40 | @Override 41 | public Schema retrieveKeySchema(SinkRecord record) { 42 | return record.keySchema(); 43 | } 44 | 45 | @Override 46 | public Schema retrieveValueSchema(SinkRecord record) { 47 | return record.valueSchema(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | public class FieldNameSanitizer { 30 | 31 | // Replace all non-letter, non-digit characters with underscore. Append underscore in front of 32 | // name if it does not begin with alphabet or underscore. 33 | public static String sanitizeName(String name) { 34 | String sanitizedName = name.replaceAll("[^a-zA-Z0-9_]", "_"); 35 | if (sanitizedName.matches("^[^a-zA-Z_].*")) { 36 | sanitizedName = "_" + sanitizedName; 37 | } 38 | return sanitizedName; 39 | } 40 | 41 | 42 | // Big Query specifies field name must begin with a alphabet or underscore and can only contain 43 | // letters, numbers, and underscores. 44 | // Note: a.b and a/b will have the same value after sanitization which will cause Duplicate key 45 | // Exception. 46 | @SuppressWarnings("unchecked") 47 | public static Map replaceInvalidKeys(Map map) { 48 | Map result = new HashMap<>(); 49 | map.forEach((key, value) -> { 50 | String sanitizedKey = sanitizeName(key); 51 | if (value instanceof Map) { 52 | result.put(sanitizedKey, replaceInvalidKeys((Map) value)); 53 | } else { 54 | result.put(sanitizedKey, value); 55 | } 56 | }); 57 | return result; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/SleepUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import java.util.concurrent.ThreadLocalRandom; 27 | 28 | public final class SleepUtils { 29 | 30 | public static void waitRandomTime(Time time, long sleepMs, long jitterMs) throws InterruptedException { 31 | time.sleep(sleepMs + ThreadLocalRandom.current().nextLong(jitterMs)); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TableNameUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import com.google.cloud.bigquery.TableId; 27 | import com.google.cloud.bigquery.storage.v1.TableName; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 30 | import java.util.Map; 31 | import org.apache.kafka.connect.errors.ConnectException; 32 | 33 | public class TableNameUtils { 34 | 35 | public static String table(TableId table) { 36 | return String.format("table `%s`.`%s`", table.getDataset(), table.getTable()); 37 | } 38 | 39 | public static TableName tableName(TableId id) { 40 | return TableName.of(id.getProject(), id.getDataset(), id.getTable()); 41 | } 42 | 43 | public static String intTable(TableId table) { 44 | return "intermediate " + table(table); 45 | } 46 | 47 | public static String destTable(TableId table) { 48 | return "destination " + table(table); 49 | } 50 | 51 | public static TableId tableId(TableName name) { 52 | return TableId.of(name.getProject(), name.getDataset(), name.getTable()); 53 | } 54 | 55 | public static String[] getDataSetAndTableName(BigQuerySinkTaskConfig config, String topic) { 56 | String tableName; 57 | Map topic2TableMap = config.getTopic2TableMap().orElse(null); 58 | String dataset = config.getString(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG); 59 | 60 | if (topic2TableMap != null) { 61 | tableName = topic2TableMap.getOrDefault(topic, topic); 62 | } else { 63 | String[] smtReplacement = topic.split(":"); 64 | 65 | if (smtReplacement.length == 2) { 66 | dataset = smtReplacement[0]; 67 | tableName = smtReplacement[1]; 68 | } else if (smtReplacement.length == 1) { 69 | tableName = smtReplacement[0]; 70 | } else { 71 | throw new ConnectException(String.format( 72 | "Incorrect regex replacement format in topic name '%s'. " 73 | + "SMT replacement should either produce the : format " 74 | + "or just the format.", 75 | topic 76 | )); 77 | } 78 | if (config.getBoolean(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG)) { 79 | tableName = FieldNameSanitizer.sanitizeName(tableName); 80 | } 81 | } 82 | 83 | return new String[]{dataset, tableName}; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Time.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | /** 27 | * Largely adapted from the 28 | * Kafka Time interface, 29 | * which is not public API and therefore cannot be relied upon as a dependency. 30 | */ 31 | public interface Time { 32 | 33 | Time SYSTEM = new Time() { 34 | @Override 35 | public void sleep(long durationMs) throws InterruptedException { 36 | Thread.sleep(durationMs); 37 | } 38 | 39 | @Override 40 | public long milliseconds() { 41 | return System.currentTimeMillis(); 42 | } 43 | }; 44 | 45 | void sleep(long durationMs) throws InterruptedException; 46 | 47 | long milliseconds(); 48 | 49 | } 50 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Version.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | /** 27 | * Utility class for unifying the version of a project. All other references to version number 28 | * should come from here. 29 | */ 30 | public class Version { 31 | private static String version = "unknown"; 32 | 33 | static { 34 | String implementationVersion = Version.class.getPackage().getImplementationVersion(); 35 | if (implementationVersion != null) { 36 | version = implementationVersion; 37 | } 38 | } 39 | 40 | /** 41 | * The version of the project. 42 | * 43 | * @return The version of the project, in String format to allow for multiple decimals. 44 | */ 45 | public static String version() { 46 | return version; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/RecordBatches.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write; 25 | 26 | import java.util.List; 27 | 28 | public class RecordBatches { 29 | 30 | private final List records; 31 | 32 | private int batchStart; 33 | private int batchSize; 34 | 35 | public RecordBatches(List records) { 36 | this.records = records; 37 | this.batchStart = 0; 38 | this.batchSize = records.size(); 39 | } 40 | 41 | public List currentBatch() { 42 | int size = Math.min(records.size() - batchStart, batchSize); 43 | return records.subList(batchStart, batchStart + size); 44 | } 45 | 46 | public void advanceToNextBatch() { 47 | batchStart += batchSize; 48 | } 49 | 50 | public void reduceBatchSize() { 51 | if (batchSize <= 1) { 52 | throw new IllegalStateException("Cannot reduce batch size any further"); 53 | } 54 | batchSize /= 2; 55 | } 56 | 57 | public boolean completed() { 58 | return batchStart >= records.size(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.batch; 25 | 26 | import java.util.concurrent.CountDownLatch; 27 | import org.apache.kafka.connect.errors.ConnectException; 28 | 29 | /** 30 | * A Runnable that counts down, and then waits for the countdown to be finished. 31 | */ 32 | public class CountDownRunnable implements Runnable { 33 | 34 | private CountDownLatch countDownLatch; 35 | 36 | public CountDownRunnable(CountDownLatch countDownLatch) { 37 | this.countDownLatch = countDownLatch; 38 | } 39 | 40 | @Override 41 | public void run() { 42 | countDownLatch.countDown(); 43 | try { 44 | /* 45 | * Hog this thread until ALL threads are finished counting down. This is needed so that 46 | * this thread doesn't start processing another countdown. If countdown tasks are holding onto 47 | * all the threads, then we know that nothing that went in before the countdown is still 48 | * processing. 49 | */ 50 | countDownLatch.await(); 51 | } catch (InterruptedException err) { 52 | throw new ConnectException("Thread interrupted while waiting for countdown.", err); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/KcbqThreadPoolExecutor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.batch; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 27 | import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; 28 | import com.wepay.kafka.connect.bigquery.exception.ExpectedInterruptException; 29 | import java.util.Optional; 30 | import java.util.concurrent.BlockingQueue; 31 | import java.util.concurrent.CountDownLatch; 32 | import java.util.concurrent.ThreadFactory; 33 | import java.util.concurrent.ThreadPoolExecutor; 34 | import java.util.concurrent.TimeUnit; 35 | import java.util.concurrent.atomic.AtomicReference; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | /** 40 | * ThreadPoolExecutor for writing Rows to BigQuery. 41 | * 42 | *

Keeps track of the number of threads actively writing for each topic. 43 | * Keeps track of the number of failed threads in each batch of requests. 44 | */ 45 | public class KcbqThreadPoolExecutor extends ThreadPoolExecutor { 46 | 47 | private static final Logger logger = LoggerFactory.getLogger(KcbqThreadPoolExecutor.class); 48 | 49 | private final AtomicReference encounteredError = new AtomicReference<>(); 50 | 51 | /** 52 | * @param config the {@link BigQuerySinkTaskConfig} 53 | * @param workQueue the queue for storing tasks. 54 | */ 55 | public KcbqThreadPoolExecutor( 56 | BigQuerySinkTaskConfig config, 57 | BlockingQueue workQueue, 58 | ThreadFactory threadFactory 59 | ) { 60 | super( 61 | config.getInt(BigQuerySinkTaskConfig.THREAD_POOL_SIZE_CONFIG), 62 | config.getInt(BigQuerySinkTaskConfig.THREAD_POOL_SIZE_CONFIG), 63 | // the following line is irrelevant because the core and max thread counts are the same. 64 | 1, TimeUnit.SECONDS, 65 | workQueue, 66 | threadFactory 67 | ); 68 | } 69 | 70 | @Override 71 | protected void afterExecute(Runnable runnable, Throwable throwable) { 72 | super.afterExecute(runnable, throwable); 73 | 74 | if (throwable != null && !(throwable instanceof ExpectedInterruptException)) { 75 | // Log at debug level since this will be shown to the user at error level by the Connect framework if it causes 76 | // the task to fail, and will otherwise just pollute logs and potentially mislead users 77 | logger.debug("A write thread has failed with an unrecoverable error", throwable); 78 | encounteredError.compareAndSet(null, throwable); 79 | } 80 | } 81 | 82 | /** 83 | * Wait for all the currently queued tasks to complete, and then return. 84 | * 85 | * @throws BigQueryConnectException if any of the tasks failed. 86 | * @throws InterruptedException if interrupted while waiting. 87 | */ 88 | public void awaitCurrentTasks() throws InterruptedException, BigQueryConnectException { 89 | /* 90 | * create CountDownRunnables equal to the number of threads in the pool and add them to the 91 | * queue. Then wait for all CountDownRunnables to complete. This way we can be sure that all 92 | * tasks added before this method was called are complete. 93 | */ 94 | int maximumPoolSize = getMaximumPoolSize(); 95 | CountDownLatch countDownLatch = new CountDownLatch(maximumPoolSize); 96 | for (int i = 0; i < maximumPoolSize; i++) { 97 | execute(new CountDownRunnable(countDownLatch)); 98 | } 99 | countDownLatch.await(); 100 | maybeThrowEncounteredError(); 101 | } 102 | 103 | /** 104 | * Immediately throw an exception if any unrecoverable errors were encountered by any of the write 105 | * tasks. 106 | * 107 | * @throws BigQueryConnectException if any of the tasks failed. 108 | */ 109 | public void maybeThrowEncounteredError() { 110 | Optional.ofNullable(encounteredError.get()).ifPresent(t -> { 111 | throw new BigQueryConnectException("A write thread has failed with an unrecoverable error", t); 112 | }); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.batch; 25 | 26 | import com.google.cloud.bigquery.TableId; 27 | import org.apache.kafka.connect.sink.SinkRecord; 28 | 29 | /** 30 | * Interface for building a {@link TableWriter} or TableWriterGCS. 31 | */ 32 | public interface TableWriterBuilder { 33 | 34 | /** 35 | * Add a record to the builder. 36 | * 37 | * @param sinkRecord the row to add. 38 | * @param table the table the row will be written to. 39 | */ 40 | void addRow(SinkRecord sinkRecord, TableId table); 41 | 42 | /** 43 | * Create a {@link TableWriter} from this builder. 44 | * 45 | * @return a TableWriter containing the given writer, table, topic, and all added rows. 46 | */ 47 | Runnable build(); 48 | } 49 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/SimpleBigQueryWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.row; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.google.cloud.bigquery.BigQueryError; 28 | import com.google.cloud.bigquery.InsertAllRequest; 29 | import com.google.cloud.bigquery.InsertAllResponse; 30 | import com.wepay.kafka.connect.bigquery.ErrantRecordHandler; 31 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 32 | import com.wepay.kafka.connect.bigquery.utils.PartitionedTableId; 33 | import com.wepay.kafka.connect.bigquery.utils.Time; 34 | import java.util.HashMap; 35 | import java.util.List; 36 | import java.util.Map; 37 | import java.util.SortedMap; 38 | import org.apache.kafka.connect.sink.SinkRecord; 39 | import org.slf4j.Logger; 40 | import org.slf4j.LoggerFactory; 41 | 42 | /** 43 | * A simple BigQueryWriter implementation. Sends the request to BigQuery, and throws an exception if 44 | * any errors occur as a result. 45 | */ 46 | public class SimpleBigQueryWriter extends BigQueryWriter { 47 | private static final Logger logger = LoggerFactory.getLogger(SimpleBigQueryWriter.class); 48 | 49 | private final BigQuery bigQuery; 50 | 51 | /** 52 | * @param bigQuery The object used to send write requests to BigQuery. 53 | * @param retry How many retries to make in the event of a 500/503 error. 54 | * @param retryWait How long to wait in between retries. 55 | * @param errantRecordHandler Used to handle errant records 56 | * @param time used to wait during backoff periods 57 | */ 58 | public SimpleBigQueryWriter(BigQuery bigQuery, int retry, long retryWait, ErrantRecordHandler errantRecordHandler, Time time) { 59 | super(retry, retryWait, errantRecordHandler, time); 60 | this.bigQuery = bigQuery; 61 | } 62 | 63 | /** 64 | * Sends the request to BigQuery, and return a map of insertErrors in case of partial failure. 65 | * Throws an exception if any other errors occur as a result of doing so. 66 | * 67 | * @see BigQueryWriter#performWriteRequest(PartitionedTableId, SortedMap) 68 | */ 69 | @Override 70 | public Map> performWriteRequest(PartitionedTableId tableId, 71 | SortedMap rows) { 72 | InsertAllRequest request = createInsertAllRequest(tableId, rows.values()); 73 | InsertAllResponse writeResponse = bigQuery.insertAll(request); 74 | if (writeResponse.hasErrors()) { 75 | logger.warn( 76 | "You may want to enable schema updates by specifying " 77 | + "{}=true or {}=true in the properties file", 78 | BigQuerySinkConfig.ALLOW_NEW_BIGQUERY_FIELDS_CONFIG, BigQuerySinkConfig.ALLOW_BIGQUERY_REQUIRED_FIELD_RELAXATION_CONFIG 79 | ); 80 | return writeResponse.getInsertErrors(); 81 | } else { 82 | logger.debug("table insertion completed with no reported errors"); 83 | return new HashMap<>(); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/row/UpsertDeleteBigQueryWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.row; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.google.cloud.bigquery.BigQueryException; 28 | import com.google.cloud.bigquery.TableId; 29 | import com.wepay.kafka.connect.bigquery.ErrantRecordHandler; 30 | import com.wepay.kafka.connect.bigquery.SchemaManager; 31 | import com.wepay.kafka.connect.bigquery.exception.BigQueryConnectException; 32 | import com.wepay.kafka.connect.bigquery.utils.PartitionedTableId; 33 | import com.wepay.kafka.connect.bigquery.utils.Time; 34 | import java.util.List; 35 | import java.util.Map; 36 | import org.apache.kafka.connect.sink.SinkRecord; 37 | 38 | public class UpsertDeleteBigQueryWriter extends AdaptiveBigQueryWriter { 39 | 40 | private final SchemaManager schemaManager; 41 | private final boolean autoCreateTables; 42 | private final Map intermediateToDestinationTables; 43 | 44 | /** 45 | * @param bigQuery Used to send write requests to BigQuery. 46 | * @param schemaManager Used to update BigQuery tables. 47 | * @param retry How many retries to make in the event of a 500/503 error. 48 | * @param retryWait How long to wait in between retries. 49 | * @param autoCreateTables Whether destination tables should be automatically created 50 | * @param intermediateToDestinationTables A mapping used to determine the destination table for 51 | * given intermediate tables; used for create/update 52 | * operations in order to propagate them to the destination 53 | * table 54 | * @param errantRecordHandler Used to handle errant records 55 | * @param time used to wait during backoff periods 56 | */ 57 | public UpsertDeleteBigQueryWriter(BigQuery bigQuery, 58 | SchemaManager schemaManager, 59 | int retry, 60 | long retryWait, 61 | boolean autoCreateTables, 62 | Map intermediateToDestinationTables, 63 | ErrantRecordHandler errantRecordHandler, 64 | Time time) { 65 | // Hardcode autoCreateTables to true in the superclass so that intermediate tables will be 66 | // automatically created 67 | // The super class will handle all of the logic for writing to, creating, and updating 68 | // intermediate tables; this class will handle logic for creating/updating the destination table 69 | super(bigQuery, schemaManager.forIntermediateTables(), retry, retryWait, true, errantRecordHandler, time); 70 | this.schemaManager = schemaManager; 71 | this.autoCreateTables = autoCreateTables; 72 | this.intermediateToDestinationTables = intermediateToDestinationTables; 73 | } 74 | 75 | @Override 76 | protected void attemptSchemaUpdate(PartitionedTableId tableId, List records) { 77 | // Update the intermediate table here... 78 | super.attemptSchemaUpdate(tableId, records); 79 | try { 80 | // ... and update the destination table here 81 | schemaManager.updateSchema(intermediateToDestinationTables.get(tableId.getBaseTableId()), records); 82 | } catch (BigQueryException exception) { 83 | throw new BigQueryConnectException( 84 | "Failed to update destination table schema for: " + tableId.getBaseTableId(), exception); 85 | } 86 | } 87 | 88 | @Override 89 | protected void attemptTableCreate(TableId tableId, List records) { 90 | // Create the intermediate table here... 91 | super.attemptTableCreate(tableId, records); 92 | if (autoCreateTables) { 93 | try { 94 | // ... and create or update the destination table here, if it doesn't already exist and auto 95 | // table creation is enabled 96 | schemaManager.createOrUpdateTable(intermediateToDestinationTables.get(tableId), records); 97 | } catch (BigQueryException exception) { 98 | throw new BigQueryConnectException( 99 | "Failed to create table " + tableId, exception); 100 | } 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/ConvertedRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import org.apache.kafka.connect.sink.SinkRecord; 27 | import org.json.JSONObject; 28 | 29 | public class ConvertedRecord { 30 | 31 | private final SinkRecord original; 32 | private final JSONObject converted; 33 | 34 | public ConvertedRecord(SinkRecord original, JSONObject converted) { 35 | this.original = original; 36 | this.converted = converted; 37 | } 38 | 39 | public SinkRecord original() { 40 | return original; 41 | } 42 | 43 | public JSONObject converted() { 44 | return converted; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StorageApiBatchModeHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 27 | import java.util.List; 28 | import java.util.Map; 29 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 30 | import org.apache.kafka.common.TopicPartition; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | /** 35 | * Handles all operations related to Batch Storage Write API 36 | */ 37 | public class StorageApiBatchModeHandler { 38 | 39 | private static final Logger logger = LoggerFactory.getLogger(StorageApiBatchModeHandler.class); 40 | private final StorageWriteApiBatchApplicationStream streamApi; 41 | 42 | public StorageApiBatchModeHandler(StorageWriteApiBatchApplicationStream streamApi, BigQuerySinkTaskConfig config) { 43 | this.streamApi = streamApi; 44 | } 45 | 46 | /** 47 | * Used by the scheduler to commit all eligible streams and create new active 48 | * streams. 49 | */ 50 | public void refreshStreams() { 51 | logger.trace("Storage Write API commit stream attempt by scheduler"); 52 | streamApi.refreshStreams(); 53 | } 54 | 55 | /** 56 | * Saves the offsets assigned to a particular stream on a table. This is required to commit offsets sequentially 57 | * even if the execution takes place in parallel at different times. 58 | * 59 | * @param tableName Name of tha table in project/dataset/tablename format 60 | * @param rows Records which would be written to table {tableName} sent to define schema if table creation is 61 | * attempted 62 | * @return Returns the streamName on which offsets are updated 63 | */ 64 | public String updateOffsetsOnStream( 65 | String tableName, 66 | List rows) { 67 | logger.trace("Updating offsets on current stream of table {}", tableName); 68 | return this.streamApi.updateOffsetsOnStream(tableName, rows); 69 | } 70 | 71 | /** 72 | * Gets offsets which are committed on BigQuery table. 73 | * 74 | * @return Returns Map of topic, partition, offset mapping 75 | */ 76 | public Map getCommitableOffsets() { 77 | logger.trace("Getting list of commitable offsets for batch mode"); 78 | return this.streamApi.getCommitableOffsets(); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | /** 27 | * Enums for Stream states 28 | */ 29 | public enum StreamState { 30 | CREATED, 31 | APPEND, 32 | FINALISED, 33 | COMMITTED, 34 | INACTIVE 35 | } 36 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.api.core.ApiFuture; 27 | import com.google.cloud.bigquery.storage.v1.AppendRowsResponse; 28 | import com.google.protobuf.Descriptors; 29 | import java.io.IOException; 30 | import org.json.JSONArray; 31 | 32 | public interface StreamWriter { 33 | 34 | /** 35 | * Write the provided rows 36 | * 37 | * @param rows the rows to write; may not be null 38 | * @return the response from BigQuery for the write attempt 39 | */ 40 | ApiFuture appendRows( 41 | JSONArray rows 42 | ) throws Descriptors.DescriptorValidationException, IOException; 43 | 44 | /** 45 | * Invoked if the underlying stream appears to be closed. Implementing classes 46 | * should respond by re-initialize the underlying stream. 47 | */ 48 | void refresh(); 49 | 50 | /** 51 | * Invoked when all rows have either been written to BigQuery or intentionally 52 | * discarded (e.g., reported to an {@link com.wepay.kafka.connect.bigquery.ErrantRecordHandler}). 53 | */ 54 | void onSuccess(); 55 | 56 | String streamName(); 57 | 58 | } 59 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/resources/META-INF/services/org.apache.kafka.connect.sink.SinkConnector: -------------------------------------------------------------------------------- 1 | com.wepay.kafka.connect.bigquery.BigQuerySinkConnector 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNotNull; 28 | import static org.junit.jupiter.api.Assertions.assertNotSame; 29 | 30 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; 31 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 32 | import java.util.HashMap; 33 | import java.util.List; 34 | import java.util.Map; 35 | import org.apache.kafka.connect.data.Schema; 36 | import org.apache.kafka.connect.sink.SinkRecord; 37 | import org.junit.jupiter.api.BeforeAll; 38 | import org.junit.jupiter.api.Test; 39 | 40 | public class BigQuerySinkConnectorTest { 41 | private static SinkPropertiesFactory propertiesFactory; 42 | 43 | @BeforeAll 44 | public static void initializePropertiesFactory() { 45 | propertiesFactory = new SinkPropertiesFactory(); 46 | } 47 | 48 | @Test 49 | public void testTaskClass() { 50 | assertEquals(BigQuerySinkTask.class, new BigQuerySinkConnector().taskClass()); 51 | } 52 | 53 | @Test 54 | public void testTaskConfigs() { 55 | Map properties = propertiesFactory.getProperties(); 56 | 57 | BigQuerySinkConnector testConnector = new BigQuerySinkConnector(); 58 | 59 | testConnector.start(properties); 60 | 61 | for (int i : new int[]{1, 2, 10, 100}) { 62 | Map expectedProperties = new HashMap<>(properties); 63 | List> taskConfigs = testConnector.taskConfigs(i); 64 | assertEquals(i, taskConfigs.size()); 65 | for (int j = 0; j < i; j++) { 66 | expectedProperties.put(BigQuerySinkTaskConfig.TASK_ID_CONFIG, Integer.toString(j)); 67 | assertEquals( 68 | expectedProperties, 69 | taskConfigs.get(j), 70 | "Connector properties should match task configs" 71 | ); 72 | assertNotSame( 73 | properties, 74 | taskConfigs.get(j), 75 | "Properties should not be referentially equal to task config" 76 | ); 77 | // A little overboard, sure, but since it's only in the ballpark of 10,000 iterations this 78 | // should be fine 79 | for (int k = j + 1; k < i; k++) { 80 | assertNotSame( 81 | taskConfigs.get(j), 82 | taskConfigs.get(k), 83 | "Task configs should not be referentially equal to each other" 84 | ); 85 | } 86 | } 87 | } 88 | } 89 | 90 | @Test 91 | public void testConfig() { 92 | assertNotNull(new BigQuerySinkConnector().config()); 93 | } 94 | 95 | @Test 96 | public void testVersion() { 97 | assertNotNull(new BigQuerySinkConnector().version()); 98 | } 99 | 100 | // Doesn't do anything at the moment, but having this here will encourage tests to be written if 101 | // the stop() method ever does anything significant 102 | @Test 103 | public void testStop() { 104 | new BigQuerySinkConnector().stop(); 105 | } 106 | 107 | // Would just use Mockito, but can't provide the name of an anonymous class to the config file 108 | public static class MockSchemaRetriever implements SchemaRetriever { 109 | @Override 110 | public void configure(Map properties) { 111 | // Shouldn't be called 112 | } 113 | 114 | @Override 115 | public Schema retrieveKeySchema(SinkRecord record) { 116 | return null; 117 | } 118 | 119 | @Override 120 | public Schema retrieveValueSchema(SinkRecord record) { 121 | return null; 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/ErrantRecordHandlerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertFalse; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import com.google.cloud.bigquery.BigQueryError; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import org.junit.jupiter.api.Test; 33 | 34 | public class ErrantRecordHandlerTest { 35 | 36 | @Test 37 | public void shouldReturnTrueOnAllowedBigQueryReason() { 38 | ErrantRecordHandler errantRecordHandler = new ErrantRecordHandler(null); 39 | List bqErrorList = new ArrayList<>(); 40 | bqErrorList.add(new BigQueryError("invalid", "location", "message", "info")); 41 | 42 | // should allow sending records to dlq for bigquery reason:invalid (present in 43 | // allowedBigQueryErrorReason list) 44 | boolean expected = errantRecordHandler.isErrorReasonAllowed(bqErrorList); 45 | assertTrue(expected); 46 | } 47 | 48 | @Test 49 | public void shouldReturnFalseOnNonAllowedReason() { 50 | ErrantRecordHandler errantRecordHandler = new ErrantRecordHandler(null); 51 | List bqErrorList = new ArrayList<>(); 52 | bqErrorList.add(new BigQueryError("backendError", "location", "message", "info")); 53 | 54 | // Should not allow sending records to dlq for reason not present in 55 | // allowedBigQueryErrorReason list 56 | boolean expected = errantRecordHandler.isErrorReasonAllowed(bqErrorList); 57 | assertFalse(expected); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/GcsToBqLoadRunnableTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNull; 28 | import static org.mockito.Mockito.mock; 29 | 30 | import com.google.cloud.bigquery.TableId; 31 | import com.google.cloud.storage.Blob; 32 | import java.util.Collections; 33 | import java.util.Map; 34 | import org.junit.jupiter.api.Test; 35 | import org.mockito.Mockito; 36 | 37 | public class GcsToBqLoadRunnableTest { 38 | 39 | @Test 40 | public void testGetTableFromBlobWithProject() { 41 | final TableId expectedTableId = TableId.of("project", "dataset", "table"); 42 | 43 | Map metadata = 44 | Collections.singletonMap("sinkTable", serializeTableId(expectedTableId)); 45 | Blob mockBlob = createMockBlobWithTableMetadata(metadata); 46 | 47 | TableId actualTableId = GcsToBqLoadRunnable.getTableFromBlob(mockBlob); 48 | assertEquals(expectedTableId, actualTableId); 49 | } 50 | 51 | @Test 52 | public void testGetTableFromBlobWithoutProject() { 53 | final TableId expectedTableId = TableId.of("dataset", "table"); 54 | 55 | Map metadata = 56 | Collections.singletonMap("sinkTable", serializeTableId(expectedTableId)); 57 | Blob mockBlob = createMockBlobWithTableMetadata(metadata); 58 | 59 | TableId actualTableId = GcsToBqLoadRunnable.getTableFromBlob(mockBlob); 60 | assertEquals(expectedTableId, actualTableId); 61 | } 62 | 63 | @Test 64 | public void testGetTableFromBlobWithoutMetadata() { 65 | Blob mockBlob = mock(Blob.class); 66 | Mockito.when(mockBlob.getMetadata()).thenReturn(null); 67 | 68 | TableId tableId = GcsToBqLoadRunnable.getTableFromBlob(mockBlob); 69 | assertNull(tableId); 70 | } 71 | 72 | @Test 73 | public void testGetTableFromBlobWithBadMetadata() { 74 | Map metadata = Collections.singletonMap("sinkTable", "bar/baz"); 75 | Blob mockBlob = createMockBlobWithTableMetadata(metadata); 76 | 77 | TableId tableId = GcsToBqLoadRunnable.getTableFromBlob(mockBlob); 78 | assertNull(tableId); 79 | } 80 | 81 | private String serializeTableId(TableId tableId) { 82 | final String project = tableId.getProject(); 83 | final String dataset = tableId.getDataset(); 84 | final String table = tableId.getTable(); 85 | StringBuilder sb = new StringBuilder(); 86 | if (project != null) { 87 | sb.append(project).append(":"); 88 | } 89 | return sb.append(dataset).append(".").append(table).toString(); 90 | } 91 | 92 | private Blob createMockBlobWithTableMetadata(Map metadata) { 93 | Blob mockBlob = mock(Blob.class); 94 | Mockito.when(mockBlob.getMetadata()).thenReturn(metadata); 95 | return mockBlob; 96 | } 97 | 98 | 99 | } 100 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | /** 31 | * Factory for generating default configuration maps, useful for testing. 32 | */ 33 | public class SinkPropertiesFactory { 34 | /** 35 | * A default configuration map for the tested class. 36 | */ 37 | public Map getProperties() { 38 | Map properties = new HashMap<>(); 39 | 40 | properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "false"); 41 | properties.put(BigQuerySinkConfig.TOPICS_CONFIG, "kcbq-test"); 42 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "test-project"); 43 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "kcbq-test"); 44 | 45 | properties.put(BigQuerySinkConfig.KEYFILE_CONFIG, "key.json"); 46 | 47 | properties.put(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG, "false"); 48 | properties.put(BigQuerySinkConfig.AVRO_DATA_CACHE_SIZE_CONFIG, "10"); 49 | 50 | properties.put(BigQuerySinkConfig.ALLOW_NEW_BIGQUERY_FIELDS_CONFIG, "false"); 51 | properties.put(BigQuerySinkConfig.ALLOW_BIGQUERY_REQUIRED_FIELD_RELAXATION_CONFIG, "false"); 52 | properties.put(BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG, "false"); 53 | return properties; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkTaskPropertiesFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 27 | import java.util.Map; 28 | 29 | public class SinkTaskPropertiesFactory extends SinkPropertiesFactory { 30 | 31 | @Override 32 | public Map getProperties() { 33 | Map properties = super.getProperties(); 34 | 35 | properties.put(BigQuerySinkTaskConfig.TASK_ID_CONFIG, "1"); 36 | 37 | return properties; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 28 | import static org.junit.jupiter.api.Assertions.assertTrue; 29 | import static org.mockito.ArgumentMatchers.eq; 30 | import static org.mockito.Mockito.mock; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 34 | import java.util.Optional; 35 | import org.junit.jupiter.api.Test; 36 | 37 | public class CredentialsValidatorTest { 38 | 39 | @Test 40 | public void testNoCredentialsSkipsValidation() { 41 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 42 | when(config.getKey()).thenReturn(null); 43 | 44 | assertEquals( 45 | Optional.empty(), 46 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 47 | ); 48 | assertEquals( 49 | Optional.empty(), 50 | new CredentialsValidator.GcsCredentialsValidator().doValidate(config) 51 | ); 52 | assertEquals( 53 | Optional.empty(), 54 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 55 | ); 56 | } 57 | 58 | @Test 59 | public void testFailureToConstructClient() { 60 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 61 | when(config.getKey()).thenReturn("key"); 62 | 63 | @SuppressWarnings("unchecked") 64 | GcpClientBuilder mockClientBuilder = mock(GcpClientBuilder.class); 65 | when(mockClientBuilder.withConfig(eq(config))).thenReturn(mockClientBuilder); 66 | when(mockClientBuilder.build()).thenThrow(new RuntimeException("Provided credentials are invalid")); 67 | 68 | assertNotEquals( 69 | Optional.empty(), 70 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 71 | ); 72 | assertNotEquals( 73 | Optional.empty(), 74 | new CredentialsValidator.GcsCredentialsValidator().doValidate(config) 75 | ); 76 | assertNotEquals( 77 | Optional.empty(), 78 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 79 | ); 80 | } 81 | 82 | @Test 83 | public void testKeyShouldNotBeProvidedIfUsingApplicationDefaultCredentials() { 84 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 85 | when(config.getKey()).thenReturn("key"); 86 | when(config.getKeySource()).thenReturn(GcpClientBuilder.KeySource.APPLICATION_DEFAULT); 87 | 88 | assertTrue( 89 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 90 | .get().contains("should not be provided") 91 | ); 92 | assertTrue( 93 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 94 | .get().contains("should not be provided") 95 | ); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 30 | import static org.mockito.ArgumentMatchers.eq; 31 | import static org.mockito.Mockito.mock; 32 | import static org.mockito.Mockito.when; 33 | 34 | import com.google.cloud.storage.Bucket; 35 | import com.google.cloud.storage.Storage; 36 | import java.util.Collections; 37 | import java.util.Optional; 38 | import org.junit.jupiter.api.Test; 39 | 40 | public class GcsBucketValidatorTest { 41 | 42 | private final Storage gcs = mock(Storage.class); 43 | 44 | @Test 45 | public void testNullBatchLoadingSkipsValidation() { 46 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 47 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(null); 48 | 49 | assertEquals( 50 | Optional.empty(), 51 | new GcsBucketValidator().doValidate(gcs, config) 52 | ); 53 | } 54 | 55 | @Test 56 | public void testEmptyBatchLoadingSkipsValidation() { 57 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 58 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.emptyList()); 59 | 60 | assertEquals( 61 | Optional.empty(), 62 | new GcsBucketValidator().doValidate(gcs, config) 63 | ); 64 | } 65 | 66 | @Test 67 | public void testNullBucketWithBatchLoading() { 68 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 69 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 70 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(null); 71 | 72 | assertNotEquals( 73 | Optional.empty(), 74 | new GcsBucketValidator().doValidate(gcs, config) 75 | ); 76 | } 77 | 78 | @Test 79 | public void testBlankBucketWithBatchLoading() { 80 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 81 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 82 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(" \t "); 83 | 84 | assertNotEquals( 85 | Optional.empty(), 86 | new GcsBucketValidator().doValidate(gcs, config) 87 | ); 88 | } 89 | 90 | @Test 91 | public void testValidBucketWithBatchLoading() { 92 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 93 | final String bucketName = "gee_cs"; 94 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 95 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(bucketName); 96 | 97 | Bucket bucket = mock(Bucket.class); 98 | when(gcs.get(eq(bucketName))).thenReturn(bucket); 99 | 100 | assertEquals( 101 | Optional.empty(), 102 | new GcsBucketValidator().doValidate(gcs, config) 103 | ); 104 | } 105 | 106 | @Test 107 | public void testMissingBucketAndBucketCreationDisabledWithBatchLoading() { 108 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 109 | final String bucketName = "gee_cs"; 110 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 111 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(bucketName); 112 | 113 | when(gcs.get(eq(bucketName))).thenReturn(null); 114 | 115 | assertNotEquals( 116 | Optional.empty(), 117 | new GcsBucketValidator().doValidate(gcs, config) 118 | ); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 28 | import static org.junit.jupiter.api.Assertions.fail; 29 | 30 | import com.google.common.collect.ImmutableMap; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import java.util.Collections; 34 | import java.util.List; 35 | import java.util.Map; 36 | import java.util.Optional; 37 | import java.util.function.Function; 38 | import org.apache.kafka.common.config.ConfigValue; 39 | import org.junit.jupiter.api.Test; 40 | 41 | public class MultiPropertyValidatorTest { 42 | 43 | @Test 44 | public void testExistingErrorSkipsValidation() { 45 | MultiPropertyValidator validator = new TestValidator<>( 46 | "p", 47 | Arrays.asList("d1", "d2", "d3"), 48 | o -> { 49 | fail("Validation should have been performed on property that already has an error"); 50 | return null; 51 | } 52 | ); 53 | 54 | ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.singletonList("an error")); 55 | 56 | assertEquals( 57 | Optional.empty(), 58 | validator.validate(configValue, null, Collections.emptyMap()) 59 | ); 60 | } 61 | 62 | @Test 63 | public void testDependentErrorSkipsValidation() { 64 | MultiPropertyValidator validator = new TestValidator<>( 65 | "p", 66 | Arrays.asList("d1", "d2", "d3"), 67 | o -> { 68 | fail("Validation should have been performed on property whose dependent already has an error"); 69 | return null; 70 | } 71 | ); 72 | 73 | ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); 74 | Map valuesByName = ImmutableMap.of( 75 | "d1", new ConfigValue("d1", "v1", Collections.emptyList(), Collections.emptyList()), 76 | "d2", new ConfigValue("d2", "v1", Collections.emptyList(), Collections.singletonList("an error")) 77 | ); 78 | 79 | assertEquals( 80 | Optional.empty(), 81 | validator.validate(configValue, null, valuesByName) 82 | ); 83 | } 84 | 85 | @Test 86 | public void testValidationFails() { 87 | Optional expectedError = Optional.of("an error"); 88 | MultiPropertyValidator validator = new TestValidator<>( 89 | "p", 90 | Collections.emptyList(), 91 | o -> expectedError 92 | ); 93 | 94 | ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); 95 | 96 | assertEquals( 97 | expectedError, 98 | validator.validate(configValue, null, Collections.emptyMap()) 99 | ); 100 | } 101 | 102 | @Test 103 | public void testUnexpectedErrorDuringValidation() { 104 | MultiPropertyValidator validator = new TestValidator<>( 105 | "p", 106 | Collections.emptyList(), 107 | o -> { 108 | throw new RuntimeException("Some unexpected error"); 109 | } 110 | ); 111 | 112 | ConfigValue configValue = new ConfigValue("p", "v", Collections.emptyList(), Collections.emptyList()); 113 | 114 | assertNotEquals( 115 | Optional.empty(), 116 | validator.validate(configValue, null, Collections.emptyMap()) 117 | ); 118 | } 119 | 120 | private static class TestValidator extends MultiPropertyValidator { 121 | 122 | private final List dependents; 123 | private final Function> validationFunction; 124 | 125 | public TestValidator(String propertyName, List dependents, Function> validationFunction) { 126 | super(propertyName); 127 | this.dependents = dependents; 128 | this.validationFunction = validationFunction; 129 | } 130 | 131 | @Override 132 | protected Collection dependents() { 133 | return dependents; 134 | } 135 | 136 | @Override 137 | protected Optional doValidate(Config config) { 138 | return validationFunction.apply(config); 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static org.junit.jupiter.api.Assertions.assertEquals; 28 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 29 | import static org.mockito.Mockito.mock; 30 | import static org.mockito.Mockito.when; 31 | 32 | import java.util.Optional; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class PartitioningModeValidatorTest { 36 | 37 | @Test 38 | public void testDisabledDecoratorSyntaxSkipsValidation() { 39 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 40 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 41 | 42 | assertEquals( 43 | Optional.empty(), 44 | new PartitioningModeValidator().doValidate(config) 45 | ); 46 | } 47 | 48 | @Test 49 | public void testDecoratorSyntaxWithoutTimestampPartitionFieldName() { 50 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 51 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 52 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.empty()); 53 | 54 | assertEquals( 55 | Optional.empty(), 56 | new PartitioningModeValidator().doValidate(config) 57 | ); 58 | } 59 | 60 | @Test 61 | public void testDecoratorSyntaxWithTimestampPartitionFieldName() { 62 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 63 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 64 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); 65 | 66 | assertNotEquals( 67 | Optional.empty(), 68 | new PartitioningModeValidator().doValidate(config) 69 | ); 70 | } 71 | 72 | @Test 73 | public void testTimestampPartitionFieldNameWithoutDecoratorSyntax() { 74 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 75 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 76 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); 77 | 78 | assertEquals( 79 | Optional.empty(), 80 | new PartitioningModeValidator().doValidate(config) 81 | ); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningTypeValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 30 | import static org.mockito.Mockito.mock; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.google.cloud.bigquery.TimePartitioning; 34 | import java.util.Optional; 35 | import org.junit.jupiter.api.Test; 36 | 37 | public class PartitioningTypeValidatorTest { 38 | 39 | @Test 40 | public void testDisabledDecoratorSyntaxSkipsValidation() { 41 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 42 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 43 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 44 | 45 | assertEquals( 46 | Optional.empty(), 47 | new PartitioningTypeValidator().doValidate(config) 48 | ); 49 | } 50 | 51 | @Test 52 | public void testDisabledTableCreationSkipsValidation() { 53 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 54 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 55 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(false); 56 | 57 | assertEquals( 58 | Optional.empty(), 59 | new PartitioningTypeValidator().doValidate(config) 60 | ); 61 | } 62 | 63 | @Test 64 | public void testNonDayTimePartitioningWithTableCreationAndDecoratorSyntax() { 65 | // TODO: This can be refactored into programmatically-generated test cases once we start using JUnit 5 66 | for (TimePartitioning.Type timePartitioningType : TimePartitioning.Type.values()) { 67 | if (TimePartitioning.Type.DAY.equals(timePartitioningType)) { 68 | continue; 69 | } 70 | 71 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 72 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 73 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 74 | when(config.getTimePartitioningType()).thenReturn(Optional.of(timePartitioningType)); 75 | 76 | assertNotEquals( 77 | Optional.empty(), 78 | new PartitioningTypeValidator().doValidate(config) 79 | ); 80 | } 81 | } 82 | 83 | @Test 84 | public void testDayTimePartitioningWithTableCreationAndDecoratorSyntax() { 85 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 86 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 87 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 88 | when(config.getTimePartitioningType()).thenReturn(Optional.of(TimePartitioning.Type.DAY)); 89 | 90 | assertEquals( 91 | Optional.empty(), 92 | new PartitioningTypeValidator().doValidate(config) 93 | ); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/KafkaDataConverterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | 27 | import static org.junit.jupiter.api.Assertions.assertEquals; 28 | import static org.junit.jupiter.api.Assertions.assertTrue; 29 | 30 | import com.google.cloud.bigquery.Field; 31 | import com.google.cloud.bigquery.LegacySQLTypeName; 32 | import java.util.HashMap; 33 | import java.util.Map; 34 | import org.apache.kafka.connect.sink.SinkRecord; 35 | import org.junit.jupiter.api.BeforeEach; 36 | import org.junit.jupiter.api.Test; 37 | 38 | public class KafkaDataConverterTest { 39 | 40 | public static final String kafkaDataFieldName = "kafkaData"; 41 | private static final String kafkaDataTopicName = "topic"; 42 | private static final String kafkaDataPartitionName = "partition"; 43 | private static final String kafkaDataOffsetName = "offset"; 44 | private static final String kafkaDataInsertTimeName = "insertTime"; 45 | private static final String kafkaDataTopicValue = "testTopic"; 46 | private static final int kafkaDataPartitionValue = 101; 47 | private static final long kafkaDataOffsetValue = 1337; 48 | Map expectedKafkaDataFields = new HashMap<>(); 49 | 50 | @BeforeEach 51 | public void setup() { 52 | expectedKafkaDataFields.put(kafkaDataTopicName, kafkaDataTopicValue); 53 | expectedKafkaDataFields.put(kafkaDataPartitionName, kafkaDataPartitionValue); 54 | expectedKafkaDataFields.put(kafkaDataOffsetName, kafkaDataOffsetValue); 55 | } 56 | 57 | @Test 58 | public void testBuildKafkaDataRecord() { 59 | SinkRecord record = new SinkRecord(kafkaDataTopicValue, kafkaDataPartitionValue, null, null, null, null, kafkaDataOffsetValue); 60 | Map actualKafkaDataFields = KafkaDataBuilder.buildKafkaDataRecord(record); 61 | 62 | assertTrue(actualKafkaDataFields.containsKey(kafkaDataInsertTimeName)); 63 | assertTrue(actualKafkaDataFields.get(kafkaDataInsertTimeName) instanceof Double); 64 | 65 | actualKafkaDataFields.remove(kafkaDataInsertTimeName); 66 | 67 | assertEquals(expectedKafkaDataFields, actualKafkaDataFields); 68 | } 69 | 70 | @Test 71 | public void testBuildKafkaDataRecordStorageWriteApi() { 72 | SinkRecord record = new SinkRecord(kafkaDataTopicValue, kafkaDataPartitionValue, null, null, null, null, kafkaDataOffsetValue); 73 | Map actualKafkaDataFields = KafkaDataBuilder.buildKafkaDataRecordStorageApi(record); 74 | 75 | assertTrue(actualKafkaDataFields.containsKey(kafkaDataInsertTimeName)); 76 | assertTrue(actualKafkaDataFields.get(kafkaDataInsertTimeName) instanceof Long); 77 | 78 | actualKafkaDataFields.remove(kafkaDataInsertTimeName); 79 | 80 | assertEquals(expectedKafkaDataFields, actualKafkaDataFields); 81 | } 82 | 83 | @Test 84 | public void testBuildKafkaDataField() { 85 | Field topicField = Field.of("topic", LegacySQLTypeName.STRING); 86 | Field partitionField = Field.of("partition", LegacySQLTypeName.INTEGER); 87 | Field offsetField = Field.of("offset", LegacySQLTypeName.INTEGER); 88 | Field insertTimeField = Field.newBuilder("insertTime", LegacySQLTypeName.TIMESTAMP) 89 | .setMode(Field.Mode.NULLABLE) 90 | .build(); 91 | 92 | Field expectedBigQuerySchema = Field.newBuilder(kafkaDataFieldName, 93 | LegacySQLTypeName.RECORD, 94 | topicField, 95 | partitionField, 96 | offsetField, 97 | insertTimeField) 98 | .setMode(Field.Mode.NULLABLE) 99 | .build(); 100 | Field actualBigQuerySchema = KafkaDataBuilder.buildKafkaDataField(kafkaDataFieldName); 101 | assertEquals(expectedBigQuerySchema, actualBigQuerySchema); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/KafkaLogicalConvertersTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertThrows; 28 | 29 | import com.google.cloud.bigquery.LegacySQLTypeName; 30 | import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.DateConverter; 31 | import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.DecimalConverter; 32 | import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.TimeConverter; 33 | import com.wepay.kafka.connect.bigquery.convert.logicaltype.KafkaLogicalConverters.TimestampConverter; 34 | import java.math.BigDecimal; 35 | import java.util.Date; 36 | import org.apache.kafka.connect.data.Schema; 37 | import org.junit.jupiter.api.Test; 38 | 39 | public class KafkaLogicalConvertersTest { 40 | 41 | //corresponds to March 1 2017, 22:20:38.808 42 | private static final Long TIMESTAMP = 1488406838808L; 43 | 44 | @Test 45 | public void testDateConversion() { 46 | DateConverter converter = new DateConverter(); 47 | 48 | assertEquals(LegacySQLTypeName.DATE, converter.getBqSchemaType()); 49 | 50 | converter.checkEncodingType(Schema.Type.INT32); 51 | 52 | Date date = new Date(TIMESTAMP); 53 | String formattedDate = converter.convert(date); 54 | assertEquals("2017-03-01", formattedDate); 55 | } 56 | 57 | @Test 58 | public void testDecimalConversion() { 59 | DecimalConverter converter = new DecimalConverter(); 60 | 61 | assertEquals(LegacySQLTypeName.FLOAT, converter.getBqSchemaType()); 62 | 63 | converter.checkEncodingType(Schema.Type.BYTES); 64 | 65 | BigDecimal bigDecimal = new BigDecimal("3.14159"); 66 | 67 | BigDecimal convertedDecimal = converter.convert(bigDecimal); 68 | 69 | // expecting no-op 70 | assertEquals(bigDecimal, convertedDecimal); 71 | } 72 | 73 | @Test 74 | public void testTimestampConversion() { 75 | TimestampConverter converter = new TimestampConverter(); 76 | 77 | assertEquals(LegacySQLTypeName.TIMESTAMP, converter.getBqSchemaType()); 78 | 79 | converter.checkEncodingType(Schema.Type.INT64); 80 | 81 | assertThrows( 82 | Exception.class, 83 | () -> converter.checkEncodingType(Schema.Type.INT32) 84 | ); 85 | 86 | Date date = new Date(TIMESTAMP); 87 | String formattedTimestamp = converter.convert(date); 88 | 89 | assertEquals("2017-03-01 22:20:38.808", formattedTimestamp); 90 | } 91 | 92 | 93 | @Test 94 | public void testTimeConversion() { 95 | TimeConverter converter = new KafkaLogicalConverters.TimeConverter(); 96 | 97 | assertEquals(LegacySQLTypeName.TIME, converter.getBqSchemaType()); 98 | 99 | converter.checkEncodingType(Schema.Type.INT32); 100 | 101 | assertThrows( 102 | Exception.class, 103 | () -> converter.checkEncodingType(Schema.Type.INT64) 104 | ); 105 | 106 | // Can't use the same timestamp here as the one in other tests as the Time type 107 | // should only fall on January 1st, 1970 108 | Date date = new Date(166838808); 109 | String formattedTimestamp = converter.convert(date); 110 | 111 | assertEquals("22:20:38.808", formattedTimestamp); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/exception/BigQueryErrorResponsesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertFalse; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import com.google.cloud.bigquery.BigQueryException; 30 | import org.junit.jupiter.api.Test; 31 | 32 | public class BigQueryErrorResponsesTest { 33 | 34 | @Test 35 | public void testIsAuthenticationError() { 36 | BigQueryException error = new BigQueryException(0, "......401.....Unauthorized error....."); 37 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 38 | 39 | error = new BigQueryException(0, "......401.....Unauthorized error...invalid_grant.."); 40 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 41 | 42 | error = new BigQueryException(0, "......400........invalid_grant.."); 43 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 44 | 45 | error = new BigQueryException(0, "......400.....invalid_request.."); 46 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 47 | 48 | error = new BigQueryException(0, "......400.....invalid_client.."); 49 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 50 | 51 | error = new BigQueryException(0, "......400.....unauthorized_client.."); 52 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 53 | 54 | error = new BigQueryException(0, "......400.....unsupported_grant_type.."); 55 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 56 | 57 | error = new BigQueryException(0, "......403..Access denied error....."); 58 | assertFalse(BigQueryErrorResponses.isAuthenticationError(error)); 59 | 60 | error = new BigQueryException(0, "......500...Internal Server Error..."); 61 | assertFalse(BigQueryErrorResponses.isAuthenticationError(error)); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/exception/BigQueryStorageWriteApiConnectExceptionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.storage.v1.RowError; 29 | import java.util.ArrayList; 30 | import java.util.HashMap; 31 | import java.util.List; 32 | import java.util.Map; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class BigQueryStorageWriteApiConnectExceptionTest { 36 | 37 | @Test 38 | public void testFormatRowErrorBigQueryStorageWriteApi() { 39 | String expectedMessage = "Insertion failed at table abc for following rows: \n " + 40 | "[row index 0] (Failure reason : f1 is not valid) "; 41 | List errors = new ArrayList<>(); 42 | errors.add(RowError.newBuilder().setIndex(0).setMessage("f1 is not valid").build()); 43 | BigQueryStorageWriteApiConnectException exception = new BigQueryStorageWriteApiConnectException("abc", errors); 44 | assertEquals(expectedMessage, exception.getMessage()); 45 | } 46 | 47 | @Test 48 | public void testFormatAppendSerializationErrorBigQueryStorageWriteApi() { 49 | String expectedMessage = "Insertion failed at table abc for following rows: \n " + 50 | "[row index 0] (Failure reason : f1 is not valid) \n [row index 1] (Failure reason : f2 is not valid) "; 51 | Map errors = new HashMap<>(); 52 | errors.put(0, "f1 is not valid"); 53 | errors.put(1, "f2 is not valid"); 54 | BigQueryStorageWriteApiConnectException exception = new BigQueryStorageWriteApiConnectException("abc", errors); 55 | assertEquals(expectedMessage, exception.getMessage()); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/StorageWriteApiBatchBigQuerySinkConnectorIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import java.util.Map; 28 | import org.junit.jupiter.api.Tag; 29 | 30 | @Tag("integration") 31 | public class StorageWriteApiBatchBigQuerySinkConnectorIT extends StorageWriteApiBigQuerySinkConnectorIT { 32 | 33 | @Override 34 | protected Map configs(String topic) { 35 | Map result = super.configs(topic); 36 | result.put(BigQuerySinkConfig.ENABLE_BATCH_MODE_CONFIG, "true"); 37 | result.put(BigQuerySinkConfig.COMMIT_INTERVAL_SEC_CONFIG, "15"); 38 | return result; 39 | } 40 | 41 | @Override 42 | protected String topic(String basename) { 43 | return super.topic(basename + "-batch-mode"); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/BigQueryTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.google.cloud.bigquery.BigQueryException; 28 | import com.google.cloud.bigquery.Schema; 29 | import com.google.cloud.bigquery.StandardTableDefinition; 30 | import com.google.cloud.bigquery.TableId; 31 | import com.google.cloud.bigquery.TableInfo; 32 | import com.google.cloud.bigquery.TimePartitioning; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class BigQueryTestUtils { 37 | 38 | private static final Logger logger = LoggerFactory.getLogger(BigQueryTestUtils.class); 39 | 40 | public static void createPartitionedTable(BigQuery bigQuery, String datasetName, String tableName, 41 | Schema schema) { 42 | try { 43 | TableId tableId = TableId.of(datasetName, tableName); 44 | 45 | TimePartitioning partitioning = 46 | TimePartitioning.newBuilder(TimePartitioning.Type.DAY) 47 | .build(); 48 | 49 | StandardTableDefinition tableDefinition = 50 | StandardTableDefinition.newBuilder() 51 | .setSchema(schema) 52 | .setTimePartitioning(partitioning) 53 | .build(); 54 | TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build(); 55 | 56 | bigQuery.create(tableInfo); 57 | logger.info("Partitioned table {} created successfully", tableName); 58 | } catch (BigQueryException e) { 59 | logger.error("Failed to create partitioned table {} in dataset {}", tableName, datasetName); 60 | throw e; 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/BucketClearer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import com.google.api.gax.paging.Page; 27 | import com.google.cloud.storage.Blob; 28 | import com.google.cloud.storage.Bucket; 29 | import com.google.cloud.storage.Storage; 30 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | public class BucketClearer { 35 | 36 | private static final Logger logger = LoggerFactory.getLogger(BucketClearer.class); 37 | 38 | /** 39 | * Clear out a GCS bucket. Useful in integration testing to provide a clean slate before creating 40 | * a connector and writing to that bucket. 41 | * 42 | * @param key The GCP credentials to use (can be a filename or a raw JSON string). 43 | * @param project The GCP project the bucket belongs to. 44 | * @param bucketName The bucket to clear. 45 | * @param folderName The folder to clear (can be empty or null). 46 | * @param keySource The key source. If "FILE", then the {@code key} parameter will be treated as a 47 | * filename; if "JSON", then {@code key} will be treated as a raw JSON string. 48 | */ 49 | public static void clearBucket(String key, String project, String bucketName, String folderName, String keySource) { 50 | Storage gcs = new GcpClientBuilder.GcsBuilder() 51 | .withKeySource(GcpClientBuilder.KeySource.valueOf(keySource)) 52 | .withKey(key) 53 | .withProject(project) 54 | .build(); 55 | Bucket bucket = gcs.get(bucketName); 56 | if (bucket != null) { 57 | logger.info("Deleting objects in the {} folder for bucket {}", 58 | humanReadableFolderName(folderName), bucketName); 59 | for (Blob blob : listBlobs(bucket, folderName)) { 60 | gcs.delete(blob.getBlobId()); 61 | } 62 | bucket.delete(); 63 | logger.info("Bucket {} deleted successfully", bucketName); 64 | } else { 65 | logger.info("Bucket {} does not exist", bucketName); 66 | } 67 | } 68 | 69 | private static String humanReadableFolderName(String folderName) { 70 | return folderName == null || folderName.isEmpty() 71 | ? "root" 72 | : "'" + folderName + "'"; 73 | } 74 | 75 | private static Iterable listBlobs(Bucket bucket, String folderName) { 76 | Page blobListing = folderName == null || folderName.isEmpty() 77 | ? bucket.list() 78 | : bucket.list(Storage.BlobListOption.prefix(folderName)); 79 | return blobListing.iterateAll(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/TableClearer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import static com.wepay.kafka.connect.bigquery.utils.TableNameUtils.table; 27 | 28 | import com.google.cloud.bigquery.BigQuery; 29 | import com.google.cloud.bigquery.TableId; 30 | import com.wepay.kafka.connect.bigquery.utils.FieldNameSanitizer; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class TableClearer { 37 | private static final Logger logger = LoggerFactory.getLogger(TableClearer.class); 38 | 39 | /** 40 | * Clear out one or more BigQuery tables. Useful in integration testing to provide a clean slate 41 | * before creating a connector and writing to those tables. 42 | * 43 | * @param bigQuery The BigQuery client to use when sending table deletion requests. 44 | * @param dataset The dataset that the to-be-cleared tables belong to. 45 | * @param tables The tables to clear. 46 | */ 47 | public static void clearTables(BigQuery bigQuery, String dataset, Collection tables) { 48 | for (String tableName : tables) { 49 | TableId table = TableId.of(dataset, FieldNameSanitizer.sanitizeName(tableName)); 50 | if (bigQuery.delete(table)) { 51 | logger.info("{} deleted successfully", table(table)); 52 | } else { 53 | logger.info("{} does not exist", table(table)); 54 | } 55 | } 56 | } 57 | 58 | /** 59 | * Clear out one or more BigQuery tables. Useful in integration testing to provide a clean slate 60 | * before creating a connector and writing to those tables. 61 | * 62 | * @param bigQuery The BigQuery client to use when sending table deletion requests. 63 | * @param dataset The dataset that the to-be-cleared tables belong to. 64 | * @param tables The tables to clear. 65 | */ 66 | public static void clearTables(BigQuery bigQuery, String dataset, String... tables) { 67 | clearTables(bigQuery, dataset, Arrays.asList(tables)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/TestCaseLogger.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import org.junit.jupiter.api.extension.AfterEachCallback; 27 | import org.junit.jupiter.api.extension.BeforeEachCallback; 28 | import org.junit.jupiter.api.extension.ExtensionContext; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | public class TestCaseLogger implements BeforeEachCallback, AfterEachCallback { 33 | private static final Logger logger = LoggerFactory.getLogger(TestCaseLogger.class); 34 | 35 | @Override 36 | public void beforeEach(ExtensionContext extensionContext) throws Exception { 37 | logger.info("Starting test {}", extensionContext.getDisplayName()); 38 | } 39 | 40 | @Override 41 | public void afterEach(ExtensionContext extensionContext) throws Exception { 42 | logger.info("Finished test {}", extensionContext.getDisplayName()); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import java.util.Collections; 30 | import java.util.HashMap; 31 | import java.util.Map; 32 | import org.junit.jupiter.api.BeforeEach; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class FieldNameSanitizerTest { 36 | private Map testMap; 37 | 38 | @BeforeEach 39 | public void setUp() { 40 | testMap = new HashMap() {{ 41 | put("A.1", new HashMap() {{ 42 | put("_B1", 1); 43 | put("B.2", "hello.B-2"); 44 | }}); 45 | put("A-2", new HashMap() {{ 46 | put("=/B.3", "hello B3"); 47 | put("B./4", "hello B4"); 48 | put("2A/", "hello B5"); 49 | put("3A/", "hello B6"); 50 | }}); 51 | put("Foo", "Simple Value"); 52 | put("Foo_1", "Simple Value 1"); 53 | put("Foo-2", "Simple Value 2"); 54 | }}; 55 | } 56 | 57 | @Test 58 | public void testInvalidSymbol() { 59 | Map sanitizedMap = FieldNameSanitizer.replaceInvalidKeys(testMap); 60 | assertTrue(sanitizedMap.containsKey("A_1")); 61 | assertTrue(sanitizedMap.containsKey("A_2")); 62 | 63 | Map nestedMap1 = (Map) sanitizedMap.get("A_1"); 64 | // Validate changed keys. 65 | assertTrue(nestedMap1.containsKey("B_2")); 66 | assertTrue(nestedMap1.containsKey("_B1")); 67 | 68 | // Validate unchanged values. 69 | assertEquals(nestedMap1.get("B_2"), "hello.B-2"); 70 | assertEquals(nestedMap1.get("_B1"), 1); 71 | 72 | // Validate map size. 73 | assertEquals(2, nestedMap1.size()); 74 | 75 | Map nestedMap2 = (Map) sanitizedMap.get("A_2"); 76 | // Validate changed keys. 77 | assertTrue(nestedMap2.containsKey("__B_3")); 78 | assertTrue(nestedMap2.containsKey("B__4")); 79 | assertTrue(nestedMap2.containsKey("_2A_")); 80 | assertTrue(nestedMap2.containsKey("_3A_")); 81 | 82 | // Validate unchanged values. 83 | assertEquals(nestedMap2.get("__B_3"), "hello B3"); 84 | assertEquals(nestedMap2.get("B__4"), "hello B4"); 85 | assertEquals(nestedMap2.get("_2A_"), "hello B5"); 86 | assertEquals(nestedMap2.get("_3A_"), "hello B6"); 87 | 88 | // Validate map size. 89 | assertEquals(4, nestedMap2.size()); 90 | 91 | // Validate keys shall be unchanged. 92 | assertTrue(sanitizedMap.containsKey("Foo")); 93 | assertTrue(sanitizedMap.containsKey("Foo_1")); 94 | 95 | // Validate key shall be changed. 96 | assertTrue(sanitizedMap.containsKey("Foo_2")); 97 | 98 | // Validate map size. 99 | assertEquals(5, sanitizedMap.size()); 100 | } 101 | 102 | /** 103 | * Verifies that null values are acceptable while sanitizing keys. 104 | */ 105 | @Test 106 | public void testNullValue() { 107 | assertEquals( 108 | Collections.singletonMap("abc", null), 109 | FieldNameSanitizer.replaceInvalidKeys(Collections.singletonMap("abc", null))); 110 | } 111 | 112 | @Test 113 | public void testDeeplyNestedNullValues() { 114 | testMap = new HashMap<>(); 115 | testMap.put("top", null); 116 | testMap.put("middle", Collections.singletonMap("key", null)); 117 | testMap.put("bottom", Collections.singletonMap("key", Collections.singletonMap("key", null))); 118 | assertEquals( 119 | testMap, 120 | FieldNameSanitizer.replaceInvalidKeys(testMap) 121 | ); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/MockTime.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | public class MockTime extends org.apache.kafka.common.utils.MockTime implements Time { 27 | } 28 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.TableId; 29 | import java.time.LocalDate; 30 | import org.junit.jupiter.api.Test; 31 | 32 | public class PartitionedTableIdTest { 33 | 34 | @Test 35 | public void testBasicBuilder() { 36 | final String dataset = "dataset"; 37 | final String table = "table"; 38 | 39 | final PartitionedTableId tableId = new PartitionedTableId.Builder(dataset, table).build(); 40 | 41 | assertEquals(dataset, tableId.getDataset()); 42 | assertEquals(table, tableId.getBaseTableName()); 43 | assertEquals(table, tableId.getFullTableName()); 44 | 45 | TableId expectedTableId = TableId.of(dataset, table); 46 | assertEquals(expectedTableId, tableId.getBaseTableId()); 47 | assertEquals(expectedTableId, tableId.getFullTableId()); 48 | } 49 | 50 | @Test 51 | public void testTableIdBuilder() { 52 | final String project = "project"; 53 | final String dataset = "dataset"; 54 | final String table = "table"; 55 | final TableId tableId = TableId.of(project, dataset, table); 56 | 57 | final PartitionedTableId partitionedTableId = new PartitionedTableId.Builder(tableId).build(); 58 | 59 | assertEquals(project, partitionedTableId.getProject()); 60 | assertEquals(dataset, partitionedTableId.getDataset()); 61 | assertEquals(table, partitionedTableId.getBaseTableName()); 62 | assertEquals(table, partitionedTableId.getFullTableName()); 63 | 64 | assertEquals(tableId, partitionedTableId.getBaseTableId()); 65 | assertEquals(tableId, partitionedTableId.getFullTableId()); 66 | } 67 | 68 | @Test 69 | public void testWithPartition() { 70 | final String dataset = "dataset"; 71 | final String table = "table"; 72 | final LocalDate partitionDate = LocalDate.of(2016, 9, 21); 73 | 74 | final PartitionedTableId partitionedTableId = 75 | new PartitionedTableId.Builder(dataset, table).setDayPartition(partitionDate).build(); 76 | 77 | final String expectedPartition = "20160921"; 78 | 79 | assertEquals(dataset, partitionedTableId.getDataset()); 80 | assertEquals(table, partitionedTableId.getBaseTableName()); 81 | assertEquals(table + "$" + expectedPartition, partitionedTableId.getFullTableName()); 82 | 83 | final TableId expectedBaseTableId = TableId.of(dataset, table); 84 | final TableId expectedFullTableId = TableId.of(dataset, table + "$" + expectedPartition); 85 | 86 | assertEquals(expectedBaseTableId, partitionedTableId.getBaseTableId()); 87 | assertEquals(expectedFullTableId, partitionedTableId.getFullTableId()); 88 | } 89 | 90 | @Test 91 | public void testWithEpochTimePartition() { 92 | final String dataset = "dataset"; 93 | final String table = "table"; 94 | 95 | final long utcTime = 1509007584334L; 96 | 97 | final PartitionedTableId partitionedTableId = 98 | new PartitionedTableId.Builder(dataset, table).setDayPartition(utcTime).build(); 99 | 100 | final String expectedPartition = "20171026"; 101 | 102 | assertEquals(dataset, partitionedTableId.getDataset()); 103 | assertEquals(table, partitionedTableId.getBaseTableName()); 104 | assertEquals(table + "$" + expectedPartition, partitionedTableId.getFullTableName()); 105 | 106 | final TableId expectedBaseTableId = TableId.of(dataset, table); 107 | final TableId expectedFullTableId = TableId.of(dataset, table + "$" + expectedPartition); 108 | 109 | assertEquals(expectedBaseTableId, partitionedTableId.getBaseTableId()); 110 | assertEquals(expectedFullTableId, partitionedTableId.getFullTableId()); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/BigQueryBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | 36 | public class BigQueryBuilderTest { 37 | 38 | @Test 39 | public void testBigQueryBuild() { 40 | Map properties = new HashMap<>(); 41 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | BigQuery actualSettings = new GcpClientBuilder.BigQueryBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getOptions().getProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/BigQueryWriteSettingsBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.storage.v1.BigQueryWriteSettings; 29 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 30 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class BigQueryWriteSettingsBuilderTest { 36 | 37 | @Test 38 | public void testBigQueryWriteSettingsBuild() { 39 | Map properties = new HashMap<>(); 40 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 41 | properties.put(BigQuerySinkConfig.KEY_SOURCE_CONFIG, GcpClientBuilder.KeySource.FILE.name()); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | BigQueryWriteSettings actualSettings = new GcpClientBuilder.BigQueryWriteSettingsBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getQuotaProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/GcsBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.cloud.storage.Storage; 27 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | 36 | public class GcsBuilderTest { 37 | 38 | @Test 39 | public void testStorageBuild() { 40 | Map properties = new HashMap<>(); 41 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | Storage actualSettings = new GcpClientBuilder.GcsBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getOptions().getProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/StorageApiBatchModeHandlerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.mockito.Mockito.any; 28 | import static org.mockito.Mockito.mock; 29 | import static org.mockito.Mockito.times; 30 | import static org.mockito.Mockito.verify; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.google.cloud.bigquery.storage.v1.TableName; 34 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 35 | import java.util.ArrayList; 36 | import java.util.Arrays; 37 | import java.util.HashMap; 38 | import java.util.List; 39 | import java.util.Map; 40 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 41 | import org.apache.kafka.common.TopicPartition; 42 | import org.junit.jupiter.api.BeforeEach; 43 | import org.junit.jupiter.api.Test; 44 | 45 | public class StorageApiBatchModeHandlerTest { 46 | StorageWriteApiBatchApplicationStream mockedStreamApi = mock(StorageWriteApiBatchApplicationStream.class); 47 | BigQuerySinkTaskConfig mockedConfig = mock(BigQuerySinkTaskConfig.class); 48 | Map offsetInfo = new HashMap<>(); 49 | StorageApiBatchModeHandler batchModeHandler = new StorageApiBatchModeHandler( 50 | mockedStreamApi, 51 | mockedConfig 52 | ); 53 | List rows = new ArrayList<>(); 54 | 55 | @BeforeEach 56 | public void setup() { 57 | when(mockedConfig.getString(BigQuerySinkTaskConfig.PROJECT_CONFIG)).thenReturn("p"); 58 | when(mockedConfig.getString(BigQuerySinkTaskConfig.DEFAULT_DATASET_CONFIG)).thenReturn("d1"); 59 | when(mockedConfig.getBoolean(BigQuerySinkTaskConfig.SANITIZE_TOPICS_CONFIG)).thenReturn(false); 60 | when(mockedConfig.getList(BigQuerySinkTaskConfig.TOPICS_CONFIG)).thenReturn( 61 | Arrays.asList("topic1", "topic2") 62 | ); 63 | when(mockedStreamApi.maybeCreateStream(any(), any())).thenReturn(true); 64 | when(mockedStreamApi.updateOffsetsOnStream(any(), any())).thenReturn("s1_app_stream"); 65 | when(mockedStreamApi.getCommitableOffsets()).thenReturn(offsetInfo); 66 | } 67 | 68 | @Test 69 | public void testCommitStreams() { 70 | batchModeHandler.refreshStreams(); 71 | } 72 | 73 | @Test 74 | public void testUpdateOffsetsOnStream() { 75 | String actualStreamName = batchModeHandler.updateOffsetsOnStream( 76 | TableName.of("p", "d1", "topic1").toString(), rows); 77 | 78 | assertEquals("s1_app_stream", actualStreamName); 79 | verify(mockedStreamApi, times(1)) 80 | .updateOffsetsOnStream("projects/p/datasets/d1/tables/topic1", rows); 81 | } 82 | 83 | @Test 84 | public void testGetCommitableOffsets() { 85 | batchModeHandler.getCommitableOffsets(); 86 | verify(mockedStreamApi, times(1)).getCommitableOffsets(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/gcs-load/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"null_prim":null,"boolean_prim":false,"int_prim":4242,"long_prim":42424242424242,"float_prim":42.42,"double_prim":42424242.42424242,"string_prim":"forty-two","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 2 | {"row":2,"null_prim":{"int":5},"boolean_prim":true,"int_prim":4354,"long_prim":435443544354,"float_prim":43.54,"double_prim":435443.544354,"string_prim":"forty-three","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 3 | {"row":3,"null_prim":{"int":8},"boolean_prim":false,"int_prim":1993,"long_prim":199319931993,"float_prim":19.93,"double_prim":199319.931993,"string_prim":"nineteen","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 4 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/gcs-load/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "gcsLoad", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "null_prim", "type": ["null", "int"] }, 8 | { "name": "boolean_prim", "type": "boolean" }, 9 | { "name": "int_prim", "type": "int" }, 10 | { "name": "long_prim", "type": "long" }, 11 | { "name": "float_prim", "type": "float" }, 12 | { "name": "double_prim", "type": "double" }, 13 | { "name": "string_prim", "type": "string" }, 14 | { "name": "bytes_prim", "type": "bytes" } 15 | ] 16 | } -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/logical-types/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"timestamp_test":0,"date_test":0} 2 | {"row":2,"timestamp_test":42000000,"date_test":4200} 3 | {"row":3,"timestamp_test":1468275102000,"date_test":16993} 4 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/logical-types/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "logicals", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "timestamp_test", 8 | "type": 9 | { "type": "long", 10 | "connect.name": "org.apache.kafka.connect.data.Timestamp" 11 | } 12 | }, 13 | { "name": "date_test", 14 | "type": 15 | { 16 | "type": "int", 17 | "connect.name": "org.apache.kafka.connect.data.Date" 18 | } 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/matryoshka-dolls/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"middle":{"middle_array":[42.0, 42.42, 42.4242],"inner":{"inner_int":42,"inner_string":"42"}},"inner":{"inner_int":-42,"inner_string": "-42"}} 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/matryoshka-dolls/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "outer_doll", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": [ 5 | { "name": "row", "type": "int" }, 6 | { "name": "middle", "type": 7 | { "type": "record", 8 | "name": "middle_doll", 9 | "fields": [ 10 | { "name": "middle_array", 11 | "type": { 12 | "type": "array", 13 | "items": "float" 14 | } 15 | }, 16 | { "name": "inner", 17 | "type": { 18 | "type": "record", 19 | "name": "inner_doll", 20 | "fields": [ 21 | { "name": "inner_int", 22 | "type": "int" 23 | }, 24 | { "name": "inner_string", 25 | "type": "string" 26 | } 27 | ] 28 | } 29 | } 30 | ] 31 | } 32 | }, 33 | { 34 | "name": "inner", 35 | "type": "com.wepay.kafka.connect.bigquery.inner_doll" 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/nulls/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"f1":"Required string","f2":null,"f3":{"int":42},"f4":{"boolean":false}} 2 | {"row":2,"f1":"Required string","f2":{"string":"Optional string"},"f3":{"int":89},"f4":null} 3 | {"row":3,"f1":"Required string","f2":null,"f3":null,"f4":{"boolean":true}} 4 | {"row":4,"f1":"Required string","f2":{"string":"Optional string"},"f3":null,"f4":null} 5 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/nulls/schema.json: -------------------------------------------------------------------------------- 1 | {"type":"record", 2 | "name":"myrecord", 3 | "fields":[ 4 | {"name":"row","type":"int"}, 5 | {"name":"f1","type":"string"}, 6 | {"name":"f2","type":["null","string"]}, 7 | {"name":"f3","type":["null","int"]}, 8 | {"name":"f4","type":["null","boolean"]}] 9 | } 10 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/primitives/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"null_prim":null,"boolean_prim":false,"int_prim":4242,"long_prim":42424242424242,"float_prim":42.42,"double_prim":42424242.42424242,"string_prim":"forty-two","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/primitives/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "primitives", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "null_prim", "type": ["null", "int"] }, 8 | { "name": "boolean_prim", "type": "boolean" }, 9 | { "name": "int_prim", "type": "int" }, 10 | { "name": "long_prim", "type": "long" }, 11 | { "name": "float_prim", "type": "float" }, 12 | { "name": "double_prim", "type": "double" }, 13 | { "name": "string_prim", "type": "string" }, 14 | { "name": "bytes_prim", "type": "bytes" } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | log4j.rootLogger=INFO, stdout 25 | 26 | # Send the logs to the console. 27 | # 28 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 29 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 30 | 31 | connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n 32 | log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern} 33 | log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern} 34 | 35 | # These are used in the log4j properties file that ships by default with Connect 36 | log4j.logger.org.apache.zookeeper=ERROR 37 | log4j.logger.org.reflections=ERROR 38 | 39 | log4j.logger.com.wepay.kafka.connect.bigquery=DEBUG 40 | 41 | # We see a lot of WARN-level messages from this class when a table is created by the connector and 42 | # then written to shortly after. No need for that much noise during routine tests 43 | log4j.logger.com.wepay.kafka.connect.bigquery.write.batch.TableWriter=ERROR 44 | # Logs a message at INFO on every http request 45 | log4j.logger.org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster=WARN 46 | log4j.logger.com.wepay.kafka.connect.bigquery.integration.BigQueryErrorResponsesIT=DEBUG 47 | --------------------------------------------------------------------------------