├── .github └── workflows │ ├── build_site.yml │ ├── create_release.yml │ ├── manual.yml │ ├── nightly.yml │ ├── prs_and_commits.yml │ └── release_pr_workflow.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── config ├── checkstyle │ └── suppressions.xml └── copyright │ └── custom-header-styles.xml ├── docs ├── pom.xml ├── sink-connector-config-options.rst └── src │ └── site │ ├── custom │ └── project-info-reports.properties │ ├── markdown │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── RELEASE_NOTES.md │ ├── SECURITY.md │ ├── configuration.md.vm │ ├── designNotes.md.vm │ ├── index.md.vm │ ├── writeapiBestPractices.md │ ├── writeapiPartitionDecorator.md │ └── writeapiUnknownFields.md │ └── site.xml ├── kcbq-api ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── wepay │ └── kafka │ └── connect │ └── bigquery │ └── api │ ├── KafkaSchemaRecordType.java │ └── SchemaRetriever.java ├── kcbq-connector ├── pom.xml └── src │ ├── main │ ├── assembly │ │ ├── release-tar.xml │ │ └── release-zip.xml │ ├── java │ │ ├── com │ │ │ └── wepay │ │ │ │ └── kafka │ │ │ │ └── connect │ │ │ │ └── bigquery │ │ │ │ ├── BigQuerySinkConnector.java │ │ │ │ ├── BigQuerySinkTask.java │ │ │ │ ├── ErrantRecordHandler.java │ │ │ │ ├── GcpClientBuilder.java │ │ │ │ ├── GcsToBqLoadRunnable.java │ │ │ │ ├── MergeQueries.java │ │ │ │ ├── RecordTableResolver.java │ │ │ │ ├── SchemaManager.java │ │ │ │ ├── config │ │ │ │ ├── BigQuerySinkConfig.java │ │ │ │ ├── BigQuerySinkTaskConfig.java │ │ │ │ ├── CredentialsValidator.java │ │ │ │ ├── GcsBucketValidator.java │ │ │ │ ├── MultiPropertyValidator.java │ │ │ │ ├── PartitioningModeValidator.java │ │ │ │ ├── PartitioningTypeValidator.java │ │ │ │ ├── StorageWriteApiValidator.java │ │ │ │ └── UpsertDeleteValidator.java │ │ │ │ ├── convert │ │ │ │ ├── BigQueryRecordConverter.java │ │ │ │ ├── BigQuerySchemaConverter.java │ │ │ │ ├── KafkaDataBuilder.java │ │ │ │ ├── RecordConverter.java │ │ │ │ ├── SchemaConverter.java │ │ │ │ └── logicaltype │ │ │ │ │ ├── DebeziumLogicalConverters.java │ │ │ │ │ ├── KafkaLogicalConverters.java │ │ │ │ │ ├── LogicalConverterRegistry.java │ │ │ │ │ └── LogicalTypeConverter.java │ │ │ │ ├── exception │ │ │ │ ├── BigQueryConnectException.java │ │ │ │ ├── BigQueryErrorResponses.java │ │ │ │ ├── BigQueryStorageWriteApiConnectException.java │ │ │ │ ├── BigQueryStorageWriteApiErrorResponses.java │ │ │ │ ├── ConversionConnectException.java │ │ │ │ ├── ExpectedInterruptException.java │ │ │ │ └── GcsConnectException.java │ │ │ │ ├── retrieve │ │ │ │ └── IdentitySchemaRetriever.java │ │ │ │ ├── utils │ │ │ │ ├── FieldNameSanitizer.java │ │ │ │ ├── GsonUtils.java │ │ │ │ ├── PartitionedTableId.java │ │ │ │ ├── SinkRecordConverter.java │ │ │ │ ├── SleepUtils.java │ │ │ │ ├── TableNameUtils.java │ │ │ │ └── Time.java │ │ │ │ └── write │ │ │ │ ├── RecordBatches.java │ │ │ │ ├── batch │ │ │ │ ├── CountDownRunnable.java │ │ │ │ ├── GcsBatchTableWriter.java │ │ │ │ ├── KcbqThreadPoolExecutor.java │ │ │ │ ├── MergeBatches.java │ │ │ │ ├── TableWriter.java │ │ │ │ └── TableWriterBuilder.java │ │ │ │ ├── row │ │ │ │ ├── AdaptiveBigQueryWriter.java │ │ │ │ ├── BigQueryWriter.java │ │ │ │ ├── GcsToBqWriter.java │ │ │ │ ├── SimpleBigQueryWriter.java │ │ │ │ └── UpsertDeleteBigQueryWriter.java │ │ │ │ └── storage │ │ │ │ ├── ApplicationStream.java │ │ │ │ ├── ConvertedRecord.java │ │ │ │ ├── JsonStreamWriterFactory.java │ │ │ │ ├── StorageApiBatchModeHandler.java │ │ │ │ ├── StorageWriteApiBase.java │ │ │ │ ├── StorageWriteApiBatchApplicationStream.java │ │ │ │ ├── StorageWriteApiDefaultStream.java │ │ │ │ ├── StorageWriteApiRetryHandler.java │ │ │ │ ├── StorageWriteApiWriter.java │ │ │ │ ├── StreamState.java │ │ │ │ └── StreamWriter.java │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── utils │ │ │ ├── ConfigKeyBuilder.java │ │ │ ├── ExtendedConfigKey.java │ │ │ └── VersionInfo.java │ └── resources │ │ └── META-INF │ │ └── services │ │ └── org.apache.kafka.connect.sink.SinkConnector │ └── test │ ├── java │ └── com │ │ └── wepay │ │ └── kafka │ │ └── connect │ │ └── bigquery │ │ ├── BigQuerySinkConnectorTest.java │ │ ├── BigQuerySinkTaskTest.java │ │ ├── BigQueryStorageApiBatchSinkTaskTest.java │ │ ├── BigQueryStorageApiSinkTaskTest.java │ │ ├── ErrantRecordHandlerTest.java │ │ ├── GcpClientBuilderProjectTest.java │ │ ├── GcsToBqLoadRunnableTest.java │ │ ├── MergeQueriesTest.java │ │ ├── RecordTableResolverTest.java │ │ ├── SchemaManagerTest.java │ │ ├── SinkPropertiesFactory.java │ │ ├── SinkTaskPropertiesFactory.java │ │ ├── config │ │ ├── BigQuerySinkConfigTest.java │ │ ├── CredentialsValidatorTest.java │ │ ├── GcsBucketValidatorTest.java │ │ ├── MultiPropertyValidatorTest.java │ │ ├── PartitioningModeValidatorTest.java │ │ ├── PartitioningTypeValidatorTest.java │ │ └── StorageWriteApiValidatorTest.java │ │ ├── convert │ │ ├── BigQueryRecordConverterTest.java │ │ ├── BigQuerySchemaConverterTest.java │ │ ├── KafkaDataConverterTest.java │ │ └── logicaltype │ │ │ ├── DebeziumLogicalConvertersTest.java │ │ │ └── KafkaLogicalConvertersTest.java │ │ ├── exception │ │ ├── BigQueryErrorResponsesTest.java │ │ ├── BigQueryStorageWriteApiConnectExceptionTest.java │ │ └── BigQueryStorageWriteApiErrorResponsesTest.java │ │ ├── integration │ │ ├── ApplicationStreamIT.java │ │ ├── BaseConnectorIT.java │ │ ├── BigQueryErrantRecordHandlerIT.java │ │ ├── BigQueryErrorResponsesIT.java │ │ ├── BigQuerySinkConnectorIT.java │ │ ├── GcpClientBuilderIT.java │ │ ├── GcsBatchSchemaEvolutionIT.java │ │ ├── StorageWriteApiBatchBigQuerySinkConnectorIT.java │ │ ├── StorageWriteApiBigQuerySinkConnectorIT.java │ │ ├── TimePartitioningIT.java │ │ ├── UpsertDeleteBigQuerySinkConnectorIT.java │ │ ├── UpsertDeleteBigQuerySinkConnectorWithSRIT.java │ │ ├── VersionTestIT.java │ │ └── utils │ │ │ ├── BigQueryTestUtils.java │ │ │ ├── BucketClearer.java │ │ │ ├── SchemaRegistryTestUtils.java │ │ │ ├── TableClearer.java │ │ │ ├── TestCaseLogger.java │ │ │ └── TimePartitioningTestUtils.java │ │ ├── utils │ │ ├── FieldNameSanitizerTest.java │ │ ├── MockTime.java │ │ └── PartitionedTableIdTest.java │ │ └── write │ │ ├── batch │ │ └── GcsBatchTableWriterTest.java │ │ ├── row │ │ ├── BigQueryWriterTest.java │ │ └── GcsToBqWriterTest.java │ │ └── storage │ │ ├── BigQueryBuilderTest.java │ │ ├── BigQueryWriteSettingsBuilderTest.java │ │ ├── GcsBuilderTest.java │ │ ├── StorageApiBatchModeHandlerTest.java │ │ ├── StorageWriteApiBatchApplicationStreamTest.java │ │ ├── StorageWriteApiDefaultStreamTest.java │ │ └── StorageWriteApiWriterTest.java │ └── resources │ ├── integration_test_cases │ ├── gcs-load │ │ ├── data.json │ │ └── schema.json │ ├── logical-types │ │ ├── data.json │ │ └── schema.json │ ├── matryoshka-dolls │ │ ├── data.json │ │ └── schema.json │ ├── nulls │ │ ├── data.json │ │ └── schema.json │ └── primitives │ │ ├── data.json │ │ └── schema.json │ └── log4j.properties ├── pom.xml ├── scripts └── release_detail.sh └── tools ├── pom.xml └── src ├── main ├── java │ ├── com │ │ └── wepay │ │ │ └── kafka │ │ │ └── connect │ │ │ └── bigquery │ │ │ └── config │ │ │ └── BigQueryConfigDefBean.java │ └── io │ │ └── aiven │ │ └── kafka │ │ └── config │ │ └── tools │ │ ├── BaseConfigDefBean.java │ │ ├── ConfigKeyBean.java │ │ └── ExtendedConfigKeyBean.java └── resources │ └── META-INF │ └── maven │ └── site-tools.xml └── test └── java └── io └── aiven └── kafka └── config └── tools ├── ConfigDefBeanTests.java └── ExtendedConfigKeyBeanTest.java /.github/workflows/build_site.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | 25 | # Simple workflow to build the site and deploy it. 26 | name: Build site and deploy 27 | 28 | on: 29 | # Runs on pushes targeting the default branch 30 | push: 31 | branches: ["main"] 32 | 33 | # Allows you to run this workflow manually from the Actions tab 34 | workflow_dispatch: 35 | 36 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 37 | permissions: 38 | contents: read 39 | pages: write 40 | id-token: write 41 | 42 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 43 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 44 | concurrency: 45 | group: "pages" 46 | cancel-in-progress: false 47 | 48 | jobs: 49 | # Build job 50 | build: 51 | runs-on: ubuntu-latest 52 | steps: 53 | - name: Checkout 54 | uses: actions/checkout@v4 55 | 56 | - name: Build connector 57 | run: mvn install -DskipITs 58 | 59 | - name: Build site tools 60 | run: mvn -f tools 61 | 62 | - name: Build site 63 | run: mvn -f docs 64 | 65 | - name: Upload artifact 66 | uses: actions/upload-pages-artifact@v3 67 | with: 68 | path: ./docs/target/site 69 | 70 | # Deployment job 71 | deploy: 72 | environment: 73 | name: github-pages 74 | url: ${{ steps.deployment.outputs.page_url }} 75 | runs-on: ubuntu-latest 76 | needs: build 77 | steps: 78 | - name: Deploy to GitHub Pages 79 | id: deployment 80 | uses: actions/deploy-pages@v4 81 | -------------------------------------------------------------------------------- /.github/workflows/create_release.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | name: Create release 25 | 26 | on: 27 | workflow_dispatch: 28 | inputs: 29 | commit_hash: 30 | description: "Hash of 'Release version x.y.z' commit" 31 | required: true 32 | 33 | permissions: 34 | contents: write 35 | pull-requests: write 36 | issues: write 37 | 38 | jobs: 39 | build: 40 | name: Create Release 41 | runs-on: ubuntu-latest 42 | steps: 43 | - name: Checkout code 44 | uses: actions/checkout@v2 45 | with: 46 | ref: ${{ github.event.inputs.commit_hash }} 47 | 48 | - name: Check commit title and extract version 49 | run: | 50 | export commit_title=$(git log --pretty=format:%s -1 ${{ github.event.inputs.commit_hash }}) 51 | echo "Commit title: $commit_title" 52 | if [[ $commit_title =~ ^Release\ version\ [0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta|rc[0-9]+))?$ ]]; then 53 | echo "Valid commit title" 54 | else 55 | echo "Invalid commit title" 56 | exit 1 57 | fi 58 | export version=$(echo ${commit_title} | sed s/^Release\ version\ //g) 59 | echo "Will use version ${version}" 60 | echo "version=${version}" >> $GITHUB_ENV 61 | 62 | - name: Set up JDK 8 63 | uses: actions/setup-java@v4 64 | with: 65 | distribution: 'adopt' 66 | java-version: 17 67 | cache: maven 68 | 69 | - name: Build 70 | run: | 71 | mvn -ntp install -DskipTests 72 | mvn -f kcbq-connector clean package assembly:single@release-artifacts -DskipTests 73 | 74 | export tar_file=$(ls ./kcbq-connector/target/ | grep tar) 75 | export zip_file=$(ls ./kcbq-connector/target/ | grep zip) 76 | echo tar_file=${tar_file} >> $GITHUB_ENV 77 | echo zip_file=${zip_file} >> $GITHUB_ENV 78 | 79 | echo tar_path=`realpath ./kcbq-connector/target/${tar_file}` >> $GITHUB_ENV 80 | echo zip_path=`realpath ./kcbq-connector/target/${zip_file}` >> $GITHUB_ENV 81 | 82 | - name: Create tag 83 | run: | 84 | git config --local user.name "GitHub Action" 85 | git config --local user.email "action@github.com" 86 | git tag -a "v${{ env.version }}" -m "Release version ${{ env.version }}" 87 | git push origin "v${{ env.version }}" 88 | 89 | - name: Create release draft 90 | id: create_release 91 | uses: actions/create-release@v1 92 | env: 93 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 94 | with: 95 | tag_name: "v${{ env.version }}" 96 | release_name: "v${{ env.version }}" 97 | commitish: ${{ github.event.inputs.commit_hash }} 98 | body: | 99 | *Fill in* 100 | draft: true 101 | prerelease: false 102 | 103 | - name: Upload tar 104 | uses: actions/upload-release-asset@v1 105 | env: 106 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 107 | with: 108 | upload_url: ${{ steps.create_release.outputs.upload_url }} 109 | asset_path: ${{ env.tar_path }} 110 | asset_name: ${{ env.tar_file }} 111 | asset_content_type: application/tar 112 | 113 | - name: Upload zip 114 | uses: actions/upload-release-asset@v1 115 | env: 116 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 117 | with: 118 | upload_url: ${{ steps.create_release.outputs.upload_url }} 119 | asset_path: ${{ env.zip_path }} 120 | asset_name: ${{ env.zip_file }} 121 | asset_content_type: application/zip 122 | -------------------------------------------------------------------------------- /.github/workflows/manual.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # Workflow to check pull requests and new commits to main branches 25 | # This checks the source in the state as if after the merge. 26 | name: Manual build 27 | on: 28 | workflow_dispatch: 29 | workflow_call: 30 | secrets: 31 | GCP_CREDENTIALS: 32 | KCBQ_TEST_PROJECT: 33 | KCBQ_TEST_DATASET: 34 | KCBQ_TEST_BUCKET: 35 | 36 | permissions: 37 | contents: write 38 | pull-requests: write 39 | issues: write 40 | 41 | 42 | # Disallow concurrent runs for the same PR by cancelling in-progress runs 43 | # when new commits are pushed 44 | concurrency: 45 | group: Manual_Build-${{ github.event.pull_request.number || github.ref }} 46 | cancel-in-progress: true 47 | 48 | jobs: 49 | build: 50 | name: Build 51 | runs-on: ubuntu-latest 52 | steps: 53 | - name: Checkout code 54 | uses: actions/checkout@v2 55 | - name: Dump GitHub context 56 | env: 57 | GITHUB_CONTEXT: ${{ toJson(github) }} 58 | run: echo "$GITHUB_CONTEXT" 59 | - name: Set up JDK 17 60 | uses: actions/setup-java@v4 61 | with: 62 | distribution: 'adopt' 63 | java-version: 17 64 | cache: maven 65 | - name: Integration tests (Maven) 66 | env: 67 | # Necessary for client builder integration tests that run with 68 | # default application credentials 69 | CREDENTIALS_JSON: ${{ secrets.GCP_CREDENTIALS }} 70 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/creds.json 71 | KCBQ_TEST_KEYFILE: /tmp/creds.json 72 | KCBQ_TEST_KEY_SOURCE: FILE 73 | KCBQ_TEST_PROJECT: ${{ secrets.KCBQ_TEST_PROJECT }} 74 | KCBQ_TEST_DATASET: ${{ secrets.KCBQ_TEST_DATASET }} 75 | KCBQ_TEST_BUCKET: ${{ secrets.KCBQ_TEST_BUCKET }} 76 | run: | 77 | echo "$CREDENTIALS_JSON" > /tmp/creds.json 78 | export KCBQ_TEST_TABLE_SUFFIX=_$(date +%s)_$RANDOM 79 | mvn -ntp -P ci -Dskip.unit.tests=true verify 80 | - name: Upload integration test results (Maven) 81 | if: always() 82 | uses: actions/upload-artifact@v4 83 | with: 84 | path: | 85 | **/target/failsafe-reports/* 86 | name: integration-test-results 87 | retention-days: 1 88 | -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # Workflow to check pull requests and new commits to main branches 25 | # This checks the source in the state as if after the merge. 26 | name: Nightly Build 27 | on: 28 | workflow_dispatch: 29 | schedule: ## run GMT 1:17 hours 30 | - cron: '17 1 * * *' 31 | workflow_call: 32 | secrets: 33 | GCP_CREDENTIALS: 34 | KCBQ_TEST_PROJECT: 35 | KCBQ_TEST_DATASET: 36 | KCBQ_TEST_BUCKET: 37 | 38 | permissions: 39 | contents: write 40 | pull-requests: write 41 | issues: write 42 | 43 | 44 | # Disallow concurrent runs for the same PR by cancelling in-progress runs 45 | # when new commits are pushed 46 | #concurrency: 47 | # group: Manual_Build-${{ github.event.pull_request.number || github.ref }} 48 | # cancel-in-progress: true 49 | 50 | jobs: 51 | call-workflow-2-in-local-repo: 52 | uses: ./.github/workflows/manual.yml 53 | secrets: 54 | GCP_CREDENTIALS: 55 | KCBQ_TEST_PROJECT: 56 | KCBQ_TEST_DATASET: 57 | KCBQ_TEST_BUCKET: 58 | -------------------------------------------------------------------------------- /.github/workflows/prs_and_commits.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # Workflow to check pull requests and new commits to main branches 25 | # This checks the source in the state as if after the merge. 26 | name: Pull request checks 27 | on: 28 | pull_request: 29 | branches: [ main ] 30 | push: 31 | branches: [ main ] 32 | 33 | # Disallow concurrent runs for the same PR by cancelling in-progress runs 34 | # when new commits are pushed 35 | concurrency: 36 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 37 | cancel-in-progress: true 38 | 39 | jobs: 40 | build: 41 | name: Build 42 | runs-on: ubuntu-latest 43 | steps: 44 | - name: Checkout code 45 | uses: actions/checkout@v2 46 | - name: Set up JDK 17 47 | uses: actions/setup-java@v4 48 | with: 49 | distribution: 'adopt' 50 | java-version: 17 51 | cache: maven 52 | - name: License header check 53 | run: | 54 | mvn -ntp license:remove license:format 55 | if [[ -n $(git status -s) ]]; then 56 | echo 1>&2 'Some files do not have the correct license header:' 57 | git diff --name-only 1>&2 58 | echo 1>&2 'Please update the license headers for these files by running `mvn license:remove license:format`' 59 | exit 1 60 | fi 61 | - name: Build (Maven) 62 | run: mvn -ntp -P ci --batch-mode clean package -DskipTests 63 | - name: Unit tests (Maven) 64 | run: mvn -ntp -P ci --batch-mode test 65 | - name: "Upload build failure reports" 66 | uses: actions/upload-artifact@v4 67 | if: failure() 68 | with: 69 | name: unit-test-results 70 | path: | 71 | **/target/*-reports/** 72 | retention-days: 1 73 | -------------------------------------------------------------------------------- /.github/workflows/release_pr_workflow.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | # The workflow to create PRs with release commits. 25 | name: Create release PR 26 | on: 27 | workflow_dispatch: 28 | inputs: 29 | release_version: 30 | description: "Release version '0.1.2' (without 'v')" 31 | required: true 32 | snapshot_version: 33 | description: "Snapshot version '0.2.0-SNAPSHOT' (without 'v')" 34 | required: true 35 | 36 | permissions: 37 | contents: write 38 | pull-requests: write 39 | issues: write 40 | 41 | jobs: 42 | create_release_pr: 43 | name: Create release PR (job) 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Check versions 47 | run: | 48 | echo "Checking release version..." 49 | if echo ${{ github.event.inputs.release_version }} | '^[0-9]\+\.[0-9]\+\.[0-9]\+\(-\(alpha\|beta\|rc[0-9]\+\)\)\?$' > /dev/null; then 50 | echo "Release version is invalid" 51 | exit 1 52 | fi 53 | 54 | echo "Checking snapshot version..." 55 | if echo ${{ github.event.inputs.snapshot_version }} | grep --invert-match '^[0-9]\+\.[0-9]\+\.[0-9]\+-SNAPSHOT$' > /dev/null; then 56 | echo "Snapshot version is invalid" 57 | exit 1 58 | fi 59 | 60 | - name: Checkout main 61 | uses: actions/checkout@v2 62 | with: 63 | ref: main 64 | fetch-depth: 0 65 | 66 | - name: Set up JDK 8 67 | uses: actions/setup-java@v4 68 | with: 69 | distribution: 'adopt' 70 | java-version: 8 71 | cache: maven 72 | 73 | - name: Create release commits 74 | run: | 75 | git config --local user.name "GitHub Action" 76 | git config --local user.email "action@github.com" 77 | mvn -f tools versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.release_version }} -DskipResolution=true 78 | mvn -f docs versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.release_version }} -DskipResolution=true 79 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.release_version }} versions:set-property -Dproperty=latestRelease 80 | git add pom.xml **/pom.xml 81 | git commit -m "Release version ${{ github.event.inputs.release_version }}" 82 | mvn -f tools versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.snapshot_version }} -DskipResolution=true 83 | mvn -f docs versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.snapshot_version }} -DskipResolution=true 84 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.snapshot_version }} 85 | git add pom.xml **/pom.xml 86 | git commit -m "Bump version to ${{ github.event.inputs.snapshot_version }}" 87 | 88 | - name: Create Pull Request 89 | uses: peter-evans/create-pull-request@v3 90 | with: 91 | branch: release-${{ github.event.inputs.release_version }} 92 | delete-branch: true 93 | draft: true 94 | title: Release version ${{ github.event.inputs.release_version }} 95 | body: | 96 | Proposed changelog: 97 | - *fill in* 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | build/ 3 | target/ 4 | *.class 5 | *.jar 6 | *.tar 7 | *.zip 8 | 9 | .gradle 10 | **/.checkstyle 11 | 12 | # Intellij 13 | .idea 14 | *.iml 15 | *.iws 16 | *.ipr 17 | .DS_STORE 18 | 19 | # Eclipse 20 | .classpath 21 | .project 22 | .settings 23 | .metadata 24 | 25 | key.json 26 | 27 | test.conf 28 | kcbq-connector/src/test/resources/test.properties 29 | kcbq-connector/test/docker/connect/properties/ 30 | kcbq-connector/out/ 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect BigQuery Connector 2 | 3 | This is an implementation of a sink connector from [Apache Kafka](http://kafka.apache.org) to 4 | [Google BigQuery](https://cloud.google.com/bigquery/), built on top 5 | of [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect). 6 | 7 | ## Documentation 8 | 9 | The Kafka Connect BigQuery Connector documentation is available online at https://aiven-open.github.io/bigquery-connector-for-apache-kafka/. 10 | The site contains a complete list of the configuration options as well as information about the project. 11 | 12 | ## History 13 | 14 | This connector was [originally developed by WePay](https://github.com/wepay/kafka-connect-bigquery). 15 | In late 2020 the project moved to [Confluent](https://github.com/confluentinc/kafka-connect-bigquery), 16 | with both companies taking on maintenance duties. 17 | In 2024, Aiven created [its own fork](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/) 18 | based off the Confluent project in order to continue maintaining an open source, Apache 2-licensed 19 | version of the connector. 20 | 21 | ## Configuration 22 | 23 | ### Sample 24 | 25 | A simple example connector configuration, that reads records from Kafka with 26 | JSON-encoded values and writes their values to BigQuery: 27 | 28 | ```json 29 | { 30 | "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector", 31 | "topics": "users, clicks, payments", 32 | "tasks.max": "3", 33 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 34 | 35 | "project": "kafka-ingest-testing", 36 | "defaultDataset": "kcbq-example", 37 | "keyfile": "/tmp/bigquery-credentials.json" 38 | } 39 | ``` 40 | 41 | ### Complete docs 42 | See the [configuration documentation](https://aiven-open.github.io/bigquery-connector-for-apache-kafka/configuration.html) for a list of the connector's 43 | configuration properties. 44 | 45 | ## Download 46 | 47 | Download information is available on the [project web site]((https://aiven-open.github.io/bigquery-connector-for-apache-kafka)). 48 | 49 | ## Building from source 50 | 51 | This project uses the Maven build tool. 52 | 53 | To compile the project without running the integration tests execute `mvn package -DskipITs`. 54 | 55 | To build the documentation execute the following steps: 56 | 57 | ``` 58 | mvn install -DskipITs 59 | mvn -f tools 60 | mvn -f docs 61 | ``` 62 | 63 | Once the documentation is built it can be run by executing `mvn -f docs site:run`. 64 | 65 | ### Integration test setup 66 | 67 | Integration tests require a live BigQuery and Kafka installation. Configuring those components is beyond the scope of this document. 68 | 69 | Once you have the test environment ready, integration specific environment variables must be set. 70 | 71 | #### Local configuration 72 | 73 | - GOOGLE_APPLICATION_CREDENTIALS - the path to a json file that was download when the GCP account key was created. 74 | - KCBQ_TEST_BUCKET - the name of the bucket to use for testing, 75 | - KCBQ_TEST_DATASET - the name of the dataset to use for testing, 76 | - KCBQ_TEST_KEYFILE - same as the GOOGLE_APPLICATION_CREDENTIALS 77 | - KCBQ_TEST_PROJECT - the name of the project to use. 78 | 79 | #### GitHub configuration 80 | 81 | To run the integration tests from a GitHub action the following variables must be set 82 | 83 | - GCP_CREDENTIALS - the contents of a json file that was download when the GCP account key was created. 84 | - KCBQ_TEST_BUCKET - the bucket to use for the tests 85 | - KCBQ_TEST_DATASET - the data set to use for the tests. 86 | - KCBQ_TEST_PROJECT - the project to use for the tests. 87 | -------------------------------------------------------------------------------- /config/checkstyle/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /config/copyright/custom-header-styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 27 | 28 | /* 29 | * 30 | */EOL 31 | (\s|\t)*/\*.*$ 32 | .*\*/(\s|\t)*$ 33 | false 34 | true 35 | false 36 | 37 | 38 | /* 39 | * 40 | */ 41 | #!.* 42 | (\s|\t)*/\*.* 43 | .*\*/(\s|\t)*$ 44 | false 45 | true 46 | false 47 | 48 | -------------------------------------------------------------------------------- /docs/src/site/custom/project-info-reports.properties: -------------------------------------------------------------------------------- 1 | 2 | report.team.contributors.intro = The following additional people have contributed to this project through the way of suggestions, patches or documentation.\ 3 |

If you would like to be included in this list please submit a pull request adding your information to the pom.xml and include in the description a link to a previously accepted pull request.

4 | -------------------------------------------------------------------------------- /docs/src/site/markdown/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 7. Two reviews are required from the maintainers team before merging of new features into main 39 | 8. Before merging, clean up the commit history for the PR. Each commit should be self-contained with an informative message, since each commit will be added to the history for this project. 40 | 41 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 42 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 43 | 44 | ## Developer Certificate of Origin 45 | 46 | All connectors for Apache Kafka in this repository are open source products released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software. 47 | 48 | We respect intellectual property rights of others, and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. 49 | 50 | So we require by making a contribution every contributor certifies that: 51 | ``` 52 | The contribution was created in whole or in part by me and I have the right to submit it under the open source license 53 | indicated in the file 54 | ``` 55 | 56 | ## Finding contributions to work on 57 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 58 | 59 | 60 | ## Code of Conduct 61 | This project has adopted the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.html). 62 | For more information see the [Code of Conduct FAQ](https://www.contributor-covenant.org/faq/). 63 | 64 | 65 | ## Security issue notifications 66 | If you discover a potential security issue in this project we ask that you report it according to [Security Policy](SECURITY.html). Please do **not** create a public GitHub issue. 67 | 68 | ## Licensing 69 | 70 | See the [LICENSE](LICENSE.html) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 71 | -------------------------------------------------------------------------------- /docs/src/site/markdown/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | receiving such patches depend on the [CVSS](https://www.first.org/cvss/) rating. 7 | 8 | ## Reporting a Vulnerability 9 | 10 | Please report (suspected) security vulnerabilities to our **[bug bounty 11 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from 12 | us within 2 working days. If the issue is confirmed, we will release a patch as 13 | soon as possible depending on impact and complexity. 14 | 15 | ## Qualifying Vulnerabilities 16 | 17 | Any reproducible vulnerability that has a severe effect on the security or 18 | privacy of our users is likely to be in scope for the program. 19 | 20 | We generally **are not** interested in the following issues: 21 | * Social engineering (e.g. phishing, vishing, smishing) attacks 22 | * Brute force, DoS, text injection 23 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 24 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 25 | * Software version disclosure / Banner identification issues / Descriptive 26 | error messages or headers (e.g. stack traces, application or server errors). 27 | * Clickjacking on pages with no sensitive actions 28 | * Theoretical vulnerabilities where you can't demonstrate a significant 29 | security impact with a proof of concept. 30 | -------------------------------------------------------------------------------- /docs/src/site/markdown/configuration.md.vm: -------------------------------------------------------------------------------- 1 | # BigQuery connector configuration options 2 | 3 | All the configuration options are listed below in alphabetical order. 4 | 5 | #foreach ($configKey in ${extendedConfigDef.configKeys}) 6 | #set($parents = ${extendedConfigDef.parents(${configKey.name})}) 7 | 8 | ${esc.hash}${esc.hash} ${configKey.displayName} 9 | 10 | #if (${configKey.isDeprecated}) 11 | **${configKey.deprecated}** 12 | #end 13 | 14 | - Configuration option: ${configKey.name} 15 | 16 | #if ($stringUtils.isNotEmpty(${configKey.since})) 17 | - Since: ${configKey.since} 18 | #end 19 | 20 | - Default value: ${configKey.getDefaultValue()|"none"} 21 | - Type: $configKey.type 22 | #if (${configKey.validator}) 23 | - Valid values: ${configKey.validator|"no restrictions"} 24 | #end 25 | - Importance: $configKey.importance 26 | #if (!$parents.isEmpty) 27 | - Options that influence if or how this option can be used: 28 | 29 | #foreach ($p in $parents) 30 | - ${p.name} 31 | 32 | #end 33 | 34 | #end 35 | 36 | #if (!${configKey.dependents.isEmpty}) 37 | 38 | - Options that this option influences: 39 | 40 | #foreach ($p in ${configKey.dependents}) 41 | - ${p} 42 | 43 | #end 44 | 45 | #end 46 | 47 | ${extendedConfigDef.markdownEscape(${configKey.documentation})} 48 | 49 | 50 | #end 51 | -------------------------------------------------------------------------------- /docs/src/site/markdown/designNotes.md.vm: -------------------------------------------------------------------------------- 1 | # Design Notes 2 | 3 | The BigQuery sink connector supports two distinct paths for inserting data into BigQuery. The original BatchLoader path is uses GCS to store intermediate files before writing them to tables in BigQuery. The second path is to use the StorageWriteAPI to stream the data to BigQuery. 4 | 5 | ${esc.hash}${esc.hash} The general flow 6 | 7 | 1. Records come into the connector from Kafka. 8 | 2. They are processed and converted into BigQuery table data. 9 | 3. The table data are written to temporary files in GCS. 10 | 4. The data from the files is written to BigQuery through either: 11 | 1. Batch loading 12 | 2. StorageWriterAPI. 13 | 14 | 15 | ${esc.hash}${esc.hash} Configuration options that are influenced by other options 16 | 17 | #foreach ($configKey in ${extendedConfigDef.configKeys}) 18 | #set($parents = ${extendedConfigDef.parents(${configKey.name})}) 19 | #if (!$parents.isEmpty) 20 | 21 | ${esc.hash}${esc.hash}${esc.hash} ${configKey.name} 22 | 23 | #foreach ($p in $parents) 24 | 25 | - ${p.name} 26 | 27 | #end 28 | 29 | #end 30 | 31 | #end 32 | -------------------------------------------------------------------------------- /docs/src/site/markdown/index.md.vm: -------------------------------------------------------------------------------- 1 | # Kafka Connect BigQuery Connector 2 | 3 | This is an implementation of a sink connector from [Apache Kafka](http://kafka.apache.org) to 4 | [Google BigQuery](https://cloud.google.com/bigquery/), built on top 5 | of [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect). 6 | 7 | ${esc.hash}${esc.hash} Download 8 | 9 | The current release is [v${latestRelease}](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/tag/v${latestRelease}) 10 | 11 | We provide the following convenience packages 12 | 13 | - Connector + dependencies [tar](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/download/v${latestRelease}/bigquery-connector-for-apache-kafka-${latestRelease}.tar) [zip](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/download/v${latestRelease}/bigquery-connector-for-apache-kafka-${latestRelease}.zip) | 14 | - Source [tar.gz](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/archive/refs/tags/v${latestRelease}.tar.gz) [zip](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/archive/refs/tags/v${latestRelease}.zip) 15 | 16 | See the [release notes](RELEASE_NOTES.html) for information on all releases. 17 | 18 | The Kafka Connect BigQuery Connector is dependent upon or uses the following: 19 | 20 | - [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect) 21 | - [Apache Kafka](http://kafka.apache.org) 22 | - [Google BigQuery](https://cloud.google.com/bigquery/) 23 | 24 | 25 | ${esc.hash}${esc.hash} History 26 | 27 | This connector was [originally developed by WePay](https://github.com/wepay/kafka-connect-bigquery). 28 | In late 2020 the project moved to [Confluent](https://github.com/confluentinc/kafka-connect-bigquery), 29 | with both companies taking on maintenance duties. 30 | In 2024, [Aiven](https://aiven.io) created [its own fork](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/) 31 | based off the Confluent project in order to continue maintaining an open source, Apache 2-licensed 32 | version of the connector. 33 | 34 | ${esc.hash}${esc.hash} Configuration 35 | 36 | ${esc.hash}${esc.hash}${esc.hash} Sample 37 | 38 | An example connector configuration, that reads records from Kafka with 39 | JSON-encoded values and writes their values to BigQuery: 40 | 41 | ```json 42 | { 43 | "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector", 44 | "topics": "users, clicks, payments", 45 | "tasks.max": "3", 46 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 47 | 48 | "project": "kafka-ingest-testing", 49 | "defaultDataset": "kcbq-example", 50 | "keyfile": "/tmp/bigquery-credentials.json" 51 | } 52 | ``` 53 | 54 | ${esc.hash}${esc.hash}${esc.hash} Configuration options documentation 55 | 56 | See the [Configuration options](configuration.html) for a list of the connector's configuration properties. 57 | 58 | ${esc.hash}${esc.hash} Building from source 59 | 60 | This project uses the Maven build tool. 61 | 62 | To compile the project without running the integration tests execute `mvn package -DskipITs`. 63 | 64 | To build the documentation execute the following steps: 65 | 66 | ``` 67 | mvn install -DskipITs 68 | mvn -f tools 69 | mvn -f docs 70 | ``` 71 | 72 | Once the documentation is built it can be run by executing `mvn -f docs site:run`. 73 | 74 | 75 | ${esc.hash}${esc.hash}${esc.hash} Integration test setup 76 | 77 | Integration tests require a live BigQuery and Kafka installation. Configuring those components is beyond the scope of this document. 78 | 79 | Once you have the test environment ready, integration specific environment variables must be set. 80 | 81 | ${esc.hash}${esc.hash}${esc.hash}${esc.hash} Local configuration 82 | 83 | - GOOGLE_APPLICATION_CREDENTIALS - the path to a json file that was download when the GCP account key was created. 84 | - KCBQ_TEST_BUCKET - the name of the bucket to use for testing, 85 | - KCBQ_TEST_DATASET - the name of the dataset to use for testing, 86 | - KCBQ_TEST_KEYFILE - same as the GOOGLE_APPLICATION_CREDENTIALS 87 | - KCBQ_TEST_PROJECT - the name of the project to use. 88 | 89 | ${esc.hash}${esc.hash}${esc.hash}${esc.hash} GitHub configuration 90 | 91 | To run the integration tests from a GitHub action the following variables must be set 92 | 93 | - GCP_CREDENTIALS - the contents of a json file that was download when the GCP account key was created. 94 | - KCBQ_TEST_BUCKET - the bucket to use for the tests 95 | - KCBQ_TEST_DATASET - the data set to use for the tests. 96 | - KCBQ_TEST_PROJECT - the project to use for the tests. 97 | -------------------------------------------------------------------------------- /docs/src/site/site.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 72 | 73 | 74 | org.apache.maven.skins 75 | maven-fluido-skin 76 | 2.0.1 77 | 78 | 79 | 80 | false 81 | true 82 | true 83 | 84 | aiven-open/bigquery-connector-for-apache-kafka 85 | right 86 | orange 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /kcbq-api/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 26 | 29 | 4.0.0 30 | 31 | 32 | com.wepay.kcbq 33 | kcbq-parent 34 | 2.11.0-SNAPSHOT 35 | .. 36 | 37 | 38 | kcbq-api 39 | kafka-connect-bigquery-api 40 | 41 | 42 | ${project.parent.basedir} 43 | 44 | 45 | 46 | 47 | org.apache.kafka 48 | connect-api 49 | 50 | 51 | 52 | 53 | 54 | 55 | org.apache.maven.plugins 56 | maven-compiler-plugin 57 | 58 | 59 | org.apache.maven.plugins 60 | maven-checkstyle-plugin 61 | 62 | 63 | org.apache.maven.plugins 64 | maven-jar-plugin 65 | 66 | 67 | org.apache.maven.plugins 68 | maven-site-plugin 69 | 70 | true 71 | false 72 | false 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-project-info-reports-plugin 78 | 79 | true 80 | 81 | 82 | 83 | org.apache.maven.plugins 84 | maven-surefire-plugin 85 | 86 | true 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.api; 25 | 26 | 27 | /** 28 | * Enum class for Kafka schema or record type, either value or key. 29 | */ 30 | public enum KafkaSchemaRecordType { 31 | 32 | VALUE("value"), 33 | KEY("key"); 34 | 35 | private final String str; 36 | 37 | KafkaSchemaRecordType(String str) { 38 | this.str = str; 39 | } 40 | 41 | public String toString() { 42 | return this.str; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.api; 25 | 26 | import java.util.Map; 27 | import org.apache.kafka.connect.data.Schema; 28 | import org.apache.kafka.connect.sink.SinkRecord; 29 | 30 | /** 31 | * Interface for retrieving the most up-to-date schemas for a given Sink Record. Used in 32 | * automatic table creation and schema updates. 33 | */ 34 | public interface SchemaRetriever { 35 | /** 36 | * Called with all of the configuration settings passed to the connector via its 37 | * {@link org.apache.kafka.connect.sink.SinkConnector#start(Map)} method. 38 | * 39 | * @param properties The configuration settings of the connector. 40 | */ 41 | void configure(Map properties); 42 | 43 | /** 44 | * Retrieve the most current key schema for the given sink record. 45 | * 46 | * @param record The record to retrieve a key schema for. 47 | * @return The key Schema for the given record. 48 | */ 49 | Schema retrieveKeySchema(SinkRecord record); 50 | 51 | /** 52 | * Retrieve the most current value schema for the given sink record. 53 | * 54 | * @param record The record to retrieve a value schema for. 55 | * @return The value Schema for the given record. 56 | */ 57 | Schema retrieveValueSchema(SinkRecord record); 58 | } 59 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/assembly/release-tar.xml: -------------------------------------------------------------------------------- 1 | 25 | 28 | release-tar 29 | 30 | tar 31 | 32 | false 33 | 34 | 35 | / 36 | true 37 | false 38 | runtime 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/assembly/release-zip.xml: -------------------------------------------------------------------------------- 1 | 25 | 28 | release-zip 29 | 30 | zip 31 | 32 | false 33 | 34 | 35 | / 36 | true 37 | false 38 | runtime 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/ErrantRecordHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.google.cloud.bigquery.BigQueryError; 27 | import java.util.Arrays; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.Set; 31 | import org.apache.kafka.connect.sink.ErrantRecordReporter; 32 | import org.apache.kafka.connect.sink.SinkRecord; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class ErrantRecordHandler { 37 | private static final Logger logger = LoggerFactory.getLogger(ErrantRecordHandler.class); 38 | private static final List allowedBigQueryErrorReason = Arrays.asList("invalid"); 39 | private final ErrantRecordReporter errantRecordReporter; 40 | 41 | public ErrantRecordHandler(ErrantRecordReporter errantRecordReporter) { 42 | this.errantRecordReporter = errantRecordReporter; 43 | } 44 | 45 | public void reportErrantRecords(Set records, Exception e) { 46 | if (errantRecordReporter != null) { 47 | logger.debug("Sending {} records to DLQ", records.size()); 48 | for (SinkRecord r : records) { 49 | // Reporting records in async mode 50 | errantRecordReporter.report(r, e); 51 | } 52 | } else { 53 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null"); 54 | } 55 | } 56 | 57 | public void reportErrantRecords(Map rowToError) { 58 | if (errantRecordReporter != null) { 59 | logger.debug("Sending {} records to DLQ", rowToError.size()); 60 | for (Map.Entry rowToErrorEntry : rowToError.entrySet()) { 61 | // Reporting records in async mode 62 | errantRecordReporter.report(rowToErrorEntry.getKey(), rowToErrorEntry.getValue()); 63 | } 64 | } else { 65 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null"); 66 | } 67 | } 68 | 69 | public ErrantRecordReporter getErrantRecordReporter() { 70 | return errantRecordReporter; 71 | } 72 | 73 | public boolean isErrorReasonAllowed(List bqErrorList) { 74 | for (BigQueryError bqError : bqErrorList) { 75 | boolean errorMatch = false; 76 | String bqErrorReason = bqError.getReason(); 77 | for (String allowedBqErrorReason : allowedBigQueryErrorReason) { 78 | if (bqErrorReason.equalsIgnoreCase(allowedBqErrorReason)) { 79 | errorMatch = true; 80 | break; 81 | } 82 | } 83 | if (!errorMatch) { 84 | return false; 85 | } 86 | } 87 | return true; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import java.util.Map; 27 | import org.apache.kafka.common.config.ConfigDef; 28 | 29 | /** 30 | * Class for task-specific configuration properties. 31 | */ 32 | public class BigQuerySinkTaskConfig extends BigQuerySinkConfig { 33 | 34 | public static final String GCS_BQ_TASK_CONFIG = "GCSBQTask"; 35 | public static final String TASK_ID_CONFIG = "taskId"; 36 | public static final ConfigDef.Importance TASK_ID_IMPORTANCE = ConfigDef.Importance.LOW; 37 | private static final ConfigDef.Type GCS_BQ_TASK_TYPE = ConfigDef.Type.BOOLEAN; 38 | private static final boolean GCS_BQ_TASK_DEFAULT = false; 39 | private static final ConfigDef.Importance GCS_BQ_TASK_IMPORTANCE = ConfigDef.Importance.LOW; 40 | private static final ConfigDef.Type TASK_ID_TYPE = ConfigDef.Type.INT; 41 | 42 | /** 43 | * @param properties A Map detailing configuration properties and their respective values. 44 | */ 45 | public BigQuerySinkTaskConfig(Map properties) { 46 | super(config(), properties); 47 | } 48 | 49 | /** 50 | * Return a ConfigDef object used to define this config's fields. 51 | * 52 | * @return A ConfigDef object used to define this config's fields. 53 | */ 54 | public static ConfigDef config() { 55 | return BigQuerySinkConfig.getConfig() 56 | .defineInternal( 57 | GCS_BQ_TASK_CONFIG, 58 | GCS_BQ_TASK_TYPE, 59 | GCS_BQ_TASK_DEFAULT, 60 | GCS_BQ_TASK_IMPORTANCE 61 | ).defineInternal( 62 | TASK_ID_CONFIG, 63 | TASK_ID_TYPE, 64 | ConfigDef.NO_DEFAULT_VALUE, 65 | TASK_ID_IMPORTANCE 66 | ); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.AUTO_CREATE_BUCKET_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; 29 | 30 | import com.google.cloud.storage.Bucket; 31 | import com.google.cloud.storage.Storage; 32 | import com.google.common.annotations.VisibleForTesting; 33 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 34 | import java.util.Arrays; 35 | import java.util.Collection; 36 | import java.util.Collections; 37 | import java.util.List; 38 | import java.util.Optional; 39 | 40 | public class GcsBucketValidator extends MultiPropertyValidator { 41 | 42 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 43 | ENABLE_BATCH_CONFIG, AUTO_CREATE_BUCKET_CONFIG 44 | )); 45 | 46 | public GcsBucketValidator() { 47 | super(GCS_BUCKET_NAME_CONFIG); 48 | } 49 | 50 | @Override 51 | protected Collection dependents() { 52 | return DEPENDENTS; 53 | } 54 | 55 | @Override 56 | protected Optional doValidate(BigQuerySinkConfig config) { 57 | Storage gcs; 58 | try { 59 | gcs = new GcpClientBuilder.GcsBuilder() 60 | .withConfig(config) 61 | .build(); 62 | } catch (RuntimeException e) { 63 | return Optional.of(String.format( 64 | "Failed to construct GCS client%s", 65 | e.getMessage() != null ? ": " + e.getMessage() : "" 66 | )); 67 | } 68 | return doValidate(gcs, config); 69 | } 70 | 71 | @VisibleForTesting 72 | Optional doValidate(Storage gcs, BigQuerySinkConfig config) { 73 | List batchLoadedTopics = config.getList(ENABLE_BATCH_CONFIG); 74 | if (batchLoadedTopics == null || batchLoadedTopics.isEmpty()) { 75 | // Batch loading is disabled; no need to validate the GCS bucket 76 | return Optional.empty(); 77 | } 78 | 79 | String bucketName = config.getString(GCS_BUCKET_NAME_CONFIG); 80 | if (bucketName == null || bucketName.trim().isEmpty()) { 81 | return Optional.of("When GCS batch loading is enabled, a bucket must be provided"); 82 | } 83 | 84 | if (config.getBoolean(AUTO_CREATE_BUCKET_CONFIG)) { 85 | return Optional.empty(); 86 | } 87 | 88 | Bucket bucket = gcs.get(bucketName); 89 | if (bucket == null) { 90 | return Optional.of(String.format( 91 | "Automatic bucket creation is disabled but the GCS bucket %s does not exist. " 92 | + "Please either manually create this table before restarting the connector or enable automatic bucket creation " 93 | + "by the connector", 94 | bucketName 95 | )); 96 | } 97 | 98 | return Optional.empty(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import java.util.Collection; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.Objects; 30 | import java.util.Optional; 31 | import org.apache.kafka.common.config.ConfigValue; 32 | 33 | public abstract class MultiPropertyValidator { 34 | 35 | private final String propertyName; 36 | 37 | protected MultiPropertyValidator(String propertyName) { 38 | this.propertyName = propertyName; 39 | } 40 | 41 | public String propertyName() { 42 | return propertyName; 43 | } 44 | 45 | public Optional validate(ConfigValue value, ConfigT config, Map valuesByName) { 46 | // Only perform follow-up validation if the property doesn't already have an error associated with it 47 | if (!value.errorMessages().isEmpty()) { 48 | return Optional.empty(); 49 | } 50 | 51 | boolean dependentsAreValid = dependents().stream() 52 | .map(valuesByName::get) 53 | .filter(Objects::nonNull) 54 | .map(ConfigValue::errorMessages) 55 | .allMatch(List::isEmpty); 56 | // Also ensure that all of the other properties that the validation for this one depends on don't already have errors 57 | if (!dependentsAreValid) { 58 | return Optional.empty(); 59 | } 60 | 61 | try { 62 | return doValidate(config); 63 | } catch (RuntimeException e) { 64 | return Optional.of( 65 | "An unexpected error occurred during validation" 66 | + (e.getMessage() != null ? ": " + e.getMessage() : "") 67 | ); 68 | } 69 | } 70 | 71 | protected abstract Collection dependents(); 72 | 73 | protected abstract Optional doValidate(ConfigT config); 74 | } 75 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG; 28 | 29 | import java.util.Arrays; 30 | import java.util.Collection; 31 | import java.util.Collections; 32 | import java.util.Optional; 33 | 34 | public class PartitioningModeValidator extends MultiPropertyValidator { 35 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 36 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG 37 | )); 38 | 39 | public PartitioningModeValidator() { 40 | super(BIGQUERY_PARTITION_DECORATOR_CONFIG); 41 | } 42 | 43 | @Override 44 | protected Collection dependents() { 45 | return DEPENDENTS; 46 | } 47 | 48 | @Override 49 | protected Optional doValidate(BigQuerySinkConfig config) { 50 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) { 51 | return Optional.empty(); 52 | } 53 | 54 | if (config.getTimestampPartitionFieldName().isPresent()) { 55 | return Optional.of(String.format("Only one partitioning mode may be specified for the connector. " 56 | + "Use either %s OR %s.", 57 | BIGQUERY_PARTITION_DECORATOR_CONFIG, 58 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG 59 | )); 60 | } else { 61 | return Optional.empty(); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningTypeValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TIME_PARTITIONING_TYPE_CONFIG; 29 | 30 | import com.google.cloud.bigquery.TimePartitioning; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import java.util.Collections; 34 | import java.util.Optional; 35 | 36 | public class PartitioningTypeValidator extends MultiPropertyValidator { 37 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 38 | BIGQUERY_PARTITION_DECORATOR_CONFIG, TABLE_CREATE_CONFIG 39 | )); 40 | 41 | public PartitioningTypeValidator() { 42 | super(TIME_PARTITIONING_TYPE_CONFIG); 43 | } 44 | 45 | @Override 46 | protected Collection dependents() { 47 | return DEPENDENTS; 48 | } 49 | 50 | @Override 51 | protected Optional doValidate(BigQuerySinkConfig config) { 52 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG) || !config.getBoolean(TABLE_CREATE_CONFIG)) { 53 | return Optional.empty(); 54 | } 55 | 56 | Optional timePartitioningType = config.getTimePartitioningType(); 57 | 58 | if (!Optional.of(TimePartitioning.Type.DAY).equals(timePartitioningType)) { 59 | return Optional.of( 60 | "Tables must be partitioned by DAY when using partition decorator syntax. " 61 | + "Either configure the connector with the DAY time partitioning type, " 62 | + "disable automatic table creation, or disable partition decorator syntax." 63 | ); 64 | } 65 | 66 | return Optional.empty(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/UpsertDeleteValidator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.DELETE_ENABLED_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KAFKA_KEY_FIELD_NAME_CONFIG; 28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_INTERVAL_MS_CONFIG; 29 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_RECORDS_THRESHOLD_CONFIG; 30 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.UPSERT_ENABLED_CONFIG; 31 | 32 | import java.util.Arrays; 33 | import java.util.Collection; 34 | import java.util.Collections; 35 | import java.util.Optional; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | public abstract class UpsertDeleteValidator extends MultiPropertyValidator { 40 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList( 41 | MERGE_INTERVAL_MS_CONFIG, MERGE_RECORDS_THRESHOLD_CONFIG, KAFKA_KEY_FIELD_NAME_CONFIG 42 | )); 43 | private static final Logger logger = LoggerFactory.getLogger(UpsertDeleteValidator.class); 44 | 45 | private UpsertDeleteValidator(String propertyName) { 46 | super(propertyName); 47 | } 48 | 49 | @Override 50 | protected Collection dependents() { 51 | return DEPENDENTS; 52 | } 53 | 54 | @Override 55 | protected Optional doValidate(BigQuerySinkConfig config) { 56 | if (!modeEnabled(config)) { 57 | return Optional.empty(); 58 | } 59 | 60 | long mergeInterval = config.getLong(MERGE_INTERVAL_MS_CONFIG); 61 | long mergeRecordsThreshold = config.getLong(MERGE_RECORDS_THRESHOLD_CONFIG); 62 | 63 | if (mergeInterval == -1 && mergeRecordsThreshold == -1) { 64 | return Optional.of(String.format( 65 | "%s and %s cannot both be -1", 66 | MERGE_INTERVAL_MS_CONFIG, 67 | MERGE_RECORDS_THRESHOLD_CONFIG 68 | )); 69 | } 70 | 71 | if (mergeInterval != -1 && mergeInterval < 10_000L) { 72 | logger.warn(String.format( 73 | "%s should not be set to less than 10 seconds. A validation would be introduced in a future release to " 74 | + "this effect.", 75 | MERGE_INTERVAL_MS_CONFIG 76 | )); 77 | } 78 | 79 | if (!config.getKafkaKeyFieldName().isPresent()) { 80 | return Optional.of(String.format( 81 | "%s must be specified when %s is set to true", 82 | KAFKA_KEY_FIELD_NAME_CONFIG, 83 | propertyName() 84 | )); 85 | } 86 | 87 | return Optional.empty(); 88 | } 89 | 90 | /** 91 | * @param config the user-provided configuration 92 | * @return whether the write mode for the validator (i.e., either upsert or delete) is enabled 93 | */ 94 | protected abstract boolean modeEnabled(BigQuerySinkConfig config); 95 | 96 | public static class UpsertValidator extends UpsertDeleteValidator { 97 | public UpsertValidator() { 98 | super(UPSERT_ENABLED_CONFIG); 99 | } 100 | 101 | @Override 102 | protected boolean modeEnabled(BigQuerySinkConfig config) { 103 | return config.getBoolean(UPSERT_ENABLED_CONFIG); 104 | } 105 | } 106 | 107 | public static class DeleteValidator extends UpsertDeleteValidator { 108 | public DeleteValidator() { 109 | super(DELETE_ENABLED_CONFIG); 110 | } 111 | 112 | @Override 113 | protected boolean modeEnabled(BigQuerySinkConfig config) { 114 | return config.getBoolean(DELETE_ENABLED_CONFIG); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType; 27 | import org.apache.kafka.connect.sink.SinkRecord; 28 | 29 | /** 30 | * Interface for converting from a {@link SinkRecord} to some other kind of record. 31 | * 32 | * @param The type of record to convert to. 33 | */ 34 | public interface RecordConverter { 35 | /** 36 | * @param record The record to convert. 37 | * @param recordType The type of the record to convert, either value or key. 38 | * @return The converted record. 39 | */ 40 | R convertRecord(SinkRecord record, KafkaSchemaRecordType recordType); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert; 25 | 26 | import org.apache.kafka.connect.data.Schema; 27 | 28 | /** 29 | * Interface for converting from a {@link Schema Kafka Connect Schema} to some other kind of schema. 30 | * 31 | * @param The kind of schema to convert to. 32 | */ 33 | public interface SchemaConverter { 34 | /** 35 | * @param schema The schema to convert. 36 | * @return The converted schema. 37 | */ 38 | S convertSchema(Schema schema); 39 | } 40 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype; 25 | 26 | import java.util.Map; 27 | import java.util.concurrent.ConcurrentHashMap; 28 | 29 | /** 30 | * Registry for finding and accessing {@link LogicalTypeConverter}s. 31 | */ 32 | public class LogicalConverterRegistry { 33 | 34 | private static Map converterMap = new ConcurrentHashMap<>(); 35 | 36 | /** 37 | * Registers the logical type name. Will override existing value if any. 38 | * 39 | * @param logicalTypeName the logical type name to register. 40 | * @param converter the converter for the name. May not be {@code null}. 41 | */ 42 | public static void register(String logicalTypeName, LogicalTypeConverter converter) { 43 | converterMap.put(logicalTypeName, converter); 44 | } 45 | 46 | /** 47 | * Registers the logical type name if it was not previously registered. 48 | * 49 | * @param logicalTypeName the logical type name to register. 50 | * @param converter the converter for the name. May not be {@code null}. 51 | */ 52 | public static void registerIfAbsent(String logicalTypeName, LogicalTypeConverter converter) { 53 | converterMap.putIfAbsent(logicalTypeName, converter); 54 | } 55 | 56 | /** 57 | * Unregisters (removes) the logical type name if it was previously registered. After an {@code unregister} call 58 | * the result of {@link #isRegisteredLogicalType(String)} is guaranteed to be false. 59 | * 60 | * @param logicalTypeName the logical type name to unregister. 61 | */ 62 | public static void unregister(String logicalTypeName) { 63 | if (logicalTypeName != null) { 64 | converterMap.remove(logicalTypeName); 65 | } 66 | } 67 | 68 | /** 69 | * Gets the converter registered with the logical type name. 70 | * 71 | * @param logicalTypeName the logical type name. May be {@code null}. 72 | * @return the LogicalTypeConverter or {@code null} if none is registered or {@code null} passed for {@code logicalTypeName}. 73 | */ 74 | public static LogicalTypeConverter getConverter(String logicalTypeName) { 75 | return logicalTypeName == null ? null : converterMap.get(logicalTypeName); 76 | } 77 | 78 | /** 79 | * Determines if a converter is registered with the logical type name. 80 | * 81 | * @param typeName the logical type name. 82 | * @return }{@code true} if there is a converter registered, {@code false} otherwise. 83 | */ 84 | public static boolean isRegisteredLogicalType(String typeName) { 85 | return typeName != null && converterMap.containsKey(typeName); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import com.google.cloud.bigquery.BigQueryError; 27 | import java.util.List; 28 | import java.util.Map; 29 | import org.apache.kafka.connect.errors.ConnectException; 30 | 31 | /** 32 | * Class for exceptions that occur while interacting with BigQuery, such as login failures, schema 33 | * update failures, and table insertion failures. 34 | */ 35 | public class BigQueryConnectException extends ConnectException { 36 | public BigQueryConnectException(String msg) { 37 | super(msg); 38 | } 39 | 40 | public BigQueryConnectException(String msg, Throwable thr) { 41 | super(msg, thr); 42 | } 43 | 44 | public BigQueryConnectException(Throwable thr) { 45 | super(thr); 46 | } 47 | 48 | public BigQueryConnectException(String tableInfo, Map> errors) { 49 | super(formatInsertAllErrors(tableInfo, errors)); 50 | } 51 | 52 | private static String formatInsertAllErrors(String tableInfo, Map> errorsMap) { 53 | StringBuilder messageBuilder = new StringBuilder(); 54 | messageBuilder.append(String.format("table: %s insertion failed for the following rows:", tableInfo)); 55 | for (Map.Entry> errorsEntry : errorsMap.entrySet()) { 56 | for (BigQueryError error : errorsEntry.getValue()) { 57 | messageBuilder.append(String.format( 58 | "%n\t[row index %d] (location %s, reason: %s): %s", 59 | errorsEntry.getKey(), 60 | error.getLocation(), 61 | error.getReason(), 62 | error.getMessage() 63 | )); 64 | } 65 | } 66 | return messageBuilder.toString(); 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return getCause() != null 72 | ? super.toString() + "\nCaused by: " + getCause().getLocalizedMessage() 73 | : super.toString(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryStorageWriteApiConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | 27 | import com.google.cloud.bigquery.storage.v1.RowError; 28 | import java.util.List; 29 | import java.util.Map; 30 | import org.apache.kafka.connect.errors.ConnectException; 31 | 32 | /** 33 | * Exception Class for exceptions that occur while interacting with BigQuery Storage Write API, such as login failures, schema 34 | * update failures, and table insertion failures. 35 | */ 36 | public class BigQueryStorageWriteApiConnectException extends ConnectException { 37 | 38 | public BigQueryStorageWriteApiConnectException(String message) { 39 | super(message); 40 | } 41 | 42 | public BigQueryStorageWriteApiConnectException(String message, Throwable error) { 43 | super(message, error); 44 | } 45 | 46 | public BigQueryStorageWriteApiConnectException(String tableName, List errors) { 47 | super(formatRowErrors(tableName, errors)); 48 | } 49 | 50 | public BigQueryStorageWriteApiConnectException(String tableName, Map errors) { 51 | super(formatRowErrors(tableName, errors)); 52 | } 53 | 54 | private static String formatRowErrors(String tableName, List errors) { 55 | StringBuilder builder = new StringBuilder(); 56 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName)); 57 | for (RowError error : errors) { 58 | builder.append(String.format( 59 | "\n [row index %d] (Failure reason : %s) ", 60 | error.getIndex(), 61 | error.getMessage()) 62 | ); 63 | } 64 | return builder.toString(); 65 | } 66 | 67 | private static String formatRowErrors(String tableName, Map errors) { 68 | StringBuilder builder = new StringBuilder(); 69 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName)); 70 | for (Map.Entry error : errors.entrySet()) { 71 | builder.append(String.format( 72 | "\n [row index %d] (Failure reason : %s) ", 73 | error.getKey(), 74 | error.getValue() 75 | )); 76 | } 77 | return builder.toString(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | /** 29 | * Class for exceptions that occur while converting between Kafka Connect and BigQuery schemas and 30 | * records. 31 | */ 32 | public class ConversionConnectException extends ConnectException { 33 | public ConversionConnectException(String msg) { 34 | super(msg); 35 | } 36 | 37 | public ConversionConnectException(String msg, Throwable thr) { 38 | super(msg, thr); 39 | } 40 | 41 | public ConversionConnectException(Throwable thr) { 42 | super(thr); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ExpectedInterruptException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | public class ExpectedInterruptException extends ConnectException { 29 | 30 | public ExpectedInterruptException(String message) { 31 | super(message); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GcsConnectException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import org.apache.kafka.connect.errors.ConnectException; 27 | 28 | /** 29 | * Class for exceptions that occur while interacting with Google Cloud Storage, such as login 30 | * failures. 31 | */ 32 | public class GcsConnectException extends ConnectException { 33 | public GcsConnectException(String msg) { 34 | super(msg); 35 | } 36 | 37 | public GcsConnectException(String msg, Throwable thr) { 38 | super(msg, thr); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/IdentitySchemaRetriever.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.retrieve; 25 | 26 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; 27 | import java.util.Map; 28 | import org.apache.kafka.connect.data.Schema; 29 | import org.apache.kafka.connect.sink.SinkRecord; 30 | 31 | /** 32 | * Fetches the key Schema and value Schema from a Sink Record 33 | */ 34 | public class IdentitySchemaRetriever implements SchemaRetriever { 35 | 36 | @Override 37 | public void configure(Map properties) { 38 | } 39 | 40 | @Override 41 | public Schema retrieveKeySchema(SinkRecord record) { 42 | return record.keySchema(); 43 | } 44 | 45 | @Override 46 | public Schema retrieveValueSchema(SinkRecord record) { 47 | return record.valueSchema(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | public class FieldNameSanitizer { 30 | 31 | // Replace all non-letter, non-digit characters with underscore. Append underscore in front of 32 | // name if it does not begin with alphabet or underscore. 33 | public static String sanitizeName(String name) { 34 | String sanitizedName = name.replaceAll("[^a-zA-Z0-9_]", "_"); 35 | if (sanitizedName.matches("^[^a-zA-Z_].*")) { 36 | sanitizedName = "_" + sanitizedName; 37 | } 38 | return sanitizedName; 39 | } 40 | 41 | 42 | // Big Query specifies field name must begin with a alphabet or underscore and can only contain 43 | // letters, numbers, and underscores. 44 | // Note: a.b and a/b will have the same value after sanitization which will cause Duplicate key 45 | // Exception. 46 | @SuppressWarnings("unchecked") 47 | public static Map replaceInvalidKeys(Map map) { 48 | Map result = new HashMap<>(); 49 | map.forEach((key, value) -> { 50 | String sanitizedKey = sanitizeName(key); 51 | if (value instanceof Map) { 52 | result.put(sanitizedKey, replaceInvalidKeys((Map) value)); 53 | } else { 54 | result.put(sanitizedKey, value); 55 | } 56 | }); 57 | return result; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/GsonUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import com.google.gson.Gson; 27 | import com.google.gson.GsonBuilder; 28 | import com.google.gson.TypeAdapter; 29 | import com.google.gson.stream.JsonReader; 30 | import com.google.gson.stream.JsonToken; 31 | import com.google.gson.stream.JsonWriter; 32 | import java.io.IOException; 33 | import java.nio.ByteBuffer; 34 | import java.util.Base64; 35 | 36 | /** 37 | * Gson utilities for safe JSON handling on Java 9+. 38 | * 39 | *

Exposes a preconfigured {@link Gson} instance that registers a hierarchy adapter for {@link ByteBuffer} 40 | * , ensuring serialization does not rely on illegal reflection into JDK internals (e.g., 41 | * ByteBuffer#hb) which would otherwise throw {@code InaccessibleObjectException} on Java 9+. 42 | */ 43 | public final class GsonUtils { 44 | 45 | /** A ready-to-use Gson that safely serializes ByteBuffer (as Base64 strings). */ 46 | public static final Gson SAFE_GSON = 47 | new GsonBuilder() 48 | // Use hierarchy adapter so HeapByteBuffer/DirectByteBuffer subclasses are covered. 49 | .registerTypeHierarchyAdapter(ByteBuffer.class, new ByteBufferTypeAdapter()) 50 | .create(); 51 | 52 | private GsonUtils() { 53 | // no instances 54 | } 55 | 56 | /** 57 | * Serializes {@link ByteBuffer} values as Base64 strings and deserializes them back. Registered 58 | * as a hierarchy adapter so it handles all ByteBuffer subclasses. 59 | */ 60 | static final class ByteBufferTypeAdapter extends TypeAdapter { 61 | 62 | @Override 63 | public void write(JsonWriter out, ByteBuffer value) throws IOException { 64 | if (value == null) { 65 | out.nullValue(); 66 | return; 67 | } 68 | // Duplicate to avoid mutating the original buffer's position/limit. 69 | ByteBuffer dup = value.duplicate(); 70 | byte[] bytes = new byte[dup.remaining()]; 71 | dup.get(bytes); 72 | out.value(Base64.getEncoder().encodeToString(bytes)); 73 | } 74 | 75 | @Override 76 | public ByteBuffer read(JsonReader in) throws IOException { 77 | if (in.peek() == JsonToken.NULL) { 78 | in.nextNull(); 79 | return null; 80 | } 81 | byte[] bytes = Base64.getDecoder().decode(in.nextString()); 82 | return ByteBuffer.wrap(bytes); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/SleepUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import java.util.concurrent.ThreadLocalRandom; 27 | 28 | public final class SleepUtils { 29 | 30 | public static void waitRandomTime(Time time, long sleepMs, long jitterMs) throws InterruptedException { 31 | time.sleep(sleepMs + ThreadLocalRandom.current().nextLong(jitterMs)); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TableNameUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import com.google.cloud.bigquery.TableId; 27 | import com.google.cloud.bigquery.storage.v1.TableName; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 30 | import java.util.Map; 31 | import org.apache.kafka.connect.errors.ConnectException; 32 | 33 | public class TableNameUtils { 34 | 35 | public static String table(TableId table) { 36 | return String.format("table `%s`.`%s`", table.getDataset(), table.getTable()); 37 | } 38 | 39 | public static TableName tableName(TableId id) { 40 | return TableName.of(id.getProject(), id.getDataset(), id.getTable()); 41 | } 42 | 43 | public static String intTable(TableId table) { 44 | return "intermediate " + table(table); 45 | } 46 | 47 | public static String destTable(TableId table) { 48 | return "destination " + table(table); 49 | } 50 | 51 | public static TableId tableId(TableName name) { 52 | return TableId.of(name.getProject(), name.getDataset(), name.getTable()); 53 | } 54 | 55 | public static PartitionedTableId partitionedTableId(TableName name) { 56 | return new PartitionedTableId.Builder(tableId(name)).build(); 57 | } 58 | 59 | public static String[] getDataSetAndTableName(BigQuerySinkTaskConfig config, String topic) { 60 | String tableName; 61 | Map topic2TableMap = config.getTopic2TableMap().orElse(null); 62 | String dataset = config.getString(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG); 63 | 64 | if (topic2TableMap != null) { 65 | tableName = topic2TableMap.getOrDefault(topic, topic); 66 | } else { 67 | String[] smtReplacement = topic.split(":"); 68 | 69 | if (smtReplacement.length == 2) { 70 | dataset = smtReplacement[0]; 71 | tableName = smtReplacement[1]; 72 | } else if (smtReplacement.length == 1) { 73 | tableName = smtReplacement[0]; 74 | } else { 75 | throw new ConnectException(String.format( 76 | "Incorrect regex replacement format in topic name '%s'. " 77 | + "SMT replacement should either produce the : format " 78 | + "or just the format.", 79 | topic 80 | )); 81 | } 82 | if (config.getBoolean(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG)) { 83 | tableName = FieldNameSanitizer.sanitizeName(tableName); 84 | } 85 | } 86 | 87 | return new String[]{dataset, tableName}; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Time.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | /** 27 | * Largely adapted from the 28 | * Kafka Time interface, 29 | * which is not public API and therefore cannot be relied upon as a dependency. 30 | */ 31 | public interface Time { 32 | 33 | Time SYSTEM = new Time() { 34 | @Override 35 | public void sleep(long durationMs) throws InterruptedException { 36 | Thread.sleep(durationMs); 37 | } 38 | 39 | @Override 40 | public long milliseconds() { 41 | return System.currentTimeMillis(); 42 | } 43 | }; 44 | 45 | void sleep(long durationMs) throws InterruptedException; 46 | 47 | long milliseconds(); 48 | 49 | } 50 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/RecordBatches.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write; 25 | 26 | import java.util.List; 27 | 28 | public class RecordBatches { 29 | 30 | private final List records; 31 | 32 | private int batchStart; 33 | private int batchSize; 34 | 35 | public RecordBatches(List records) { 36 | this.records = records; 37 | this.batchStart = 0; 38 | this.batchSize = records.size(); 39 | } 40 | 41 | public List currentBatch() { 42 | int size = Math.min(records.size() - batchStart, batchSize); 43 | return records.subList(batchStart, batchStart + size); 44 | } 45 | 46 | public void advanceToNextBatch() { 47 | batchStart += batchSize; 48 | } 49 | 50 | public void reduceBatchSize() { 51 | if (batchSize <= 1) { 52 | throw new IllegalStateException("Cannot reduce batch size any further"); 53 | } 54 | batchSize /= 2; 55 | } 56 | 57 | public boolean completed() { 58 | return batchStart >= records.size(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.batch; 25 | 26 | import java.util.concurrent.CountDownLatch; 27 | import org.apache.kafka.connect.errors.ConnectException; 28 | 29 | /** 30 | * A Runnable that counts down, and then waits for the countdown to be finished. 31 | */ 32 | public class CountDownRunnable implements Runnable { 33 | 34 | private CountDownLatch countDownLatch; 35 | 36 | public CountDownRunnable(CountDownLatch countDownLatch) { 37 | this.countDownLatch = countDownLatch; 38 | } 39 | 40 | @Override 41 | public void run() { 42 | countDownLatch.countDown(); 43 | try { 44 | /* 45 | * Hog this thread until ALL threads are finished counting down. This is needed so that 46 | * this thread doesn't start processing another countdown. If countdown tasks are holding onto 47 | * all the threads, then we know that nothing that went in before the countdown is still 48 | * processing. 49 | */ 50 | countDownLatch.await(); 51 | } catch (InterruptedException err) { 52 | throw new ConnectException("Thread interrupted while waiting for countdown.", err); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.batch; 25 | 26 | import com.google.cloud.bigquery.TableId; 27 | import org.apache.kafka.connect.sink.SinkRecord; 28 | 29 | /** 30 | * Interface for building a {@link TableWriter} or TableWriterGCS. 31 | */ 32 | public interface TableWriterBuilder { 33 | 34 | /** 35 | * Add a record to the builder. 36 | * 37 | * @param sinkRecord the row to add. 38 | * @param table the table the row will be written to. 39 | */ 40 | void addRow(SinkRecord sinkRecord, TableId table); 41 | 42 | /** 43 | * Create a {@link TableWriter} from this builder. 44 | * 45 | * @return a TableWriter containing the given writer, table, topic, and all added rows. 46 | */ 47 | Runnable build(); 48 | } 49 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/ConvertedRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import org.apache.kafka.connect.sink.SinkRecord; 27 | import org.json.JSONObject; 28 | 29 | public class ConvertedRecord { 30 | 31 | private final SinkRecord original; 32 | private final JSONObject converted; 33 | 34 | public ConvertedRecord(SinkRecord original, JSONObject converted) { 35 | this.original = original; 36 | this.converted = converted; 37 | } 38 | 39 | public SinkRecord original() { 40 | return original; 41 | } 42 | 43 | public JSONObject converted() { 44 | return converted; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/JsonStreamWriterFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.cloud.bigquery.storage.v1.JsonStreamWriter; 27 | import com.google.protobuf.Descriptors; 28 | import java.io.IOException; 29 | 30 | /** 31 | * A functional interface for creating {@link JsonStreamWriter} instances. 32 | */ 33 | @FunctionalInterface 34 | public interface JsonStreamWriterFactory { 35 | JsonStreamWriter create(String streamOrTableName) throws Descriptors.DescriptorValidationException, 36 | IOException, InterruptedException; 37 | } 38 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StorageApiBatchModeHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 27 | import java.util.List; 28 | import java.util.Map; 29 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 30 | import org.apache.kafka.common.TopicPartition; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | /** 35 | * Handles all operations related to Batch Storage Write API 36 | */ 37 | public class StorageApiBatchModeHandler { 38 | 39 | private static final Logger logger = LoggerFactory.getLogger(StorageApiBatchModeHandler.class); 40 | private final StorageWriteApiBatchApplicationStream streamApi; 41 | 42 | public StorageApiBatchModeHandler(StorageWriteApiBatchApplicationStream streamApi, BigQuerySinkTaskConfig config) { 43 | this.streamApi = streamApi; 44 | } 45 | 46 | /** 47 | * Used by the scheduler to commit all eligible streams and create new active 48 | * streams. 49 | */ 50 | public void refreshStreams() { 51 | logger.trace("Storage Write API commit stream attempt by scheduler"); 52 | streamApi.refreshStreams(); 53 | } 54 | 55 | /** 56 | * Saves the offsets assigned to a particular stream on a table. This is required to commit offsets sequentially 57 | * even if the execution takes place in parallel at different times. 58 | * 59 | * @param tableName Name of tha table in project/dataset/tablename format 60 | * @param rows Records which would be written to table {tableName} sent to define schema if table creation is 61 | * attempted 62 | * @return Returns the streamName on which offsets are updated 63 | */ 64 | public String updateOffsetsOnStream( 65 | String tableName, 66 | List rows) { 67 | logger.trace("Updating offsets on current stream of table {}", tableName); 68 | return this.streamApi.updateOffsetsOnStream(tableName, rows); 69 | } 70 | 71 | /** 72 | * Gets offsets which are committed on BigQuery table. 73 | * 74 | * @return Returns Map of topic, partition, offset mapping 75 | */ 76 | public Map getCommitableOffsets() { 77 | logger.trace("Getting list of commitable offsets for batch mode"); 78 | return this.streamApi.getCommitableOffsets(); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | /** 27 | * Enums for Stream states 28 | */ 29 | public enum StreamState { 30 | CREATED, 31 | APPEND, 32 | FINALISED, 33 | COMMITTED, 34 | INACTIVE 35 | } 36 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.api.core.ApiFuture; 27 | import com.google.cloud.bigquery.storage.v1.AppendRowsResponse; 28 | import com.google.protobuf.Descriptors; 29 | import java.io.IOException; 30 | import org.json.JSONArray; 31 | 32 | public interface StreamWriter { 33 | 34 | /** 35 | * Write the provided rows 36 | * 37 | * @param rows the rows to write; may not be null 38 | * @return the response from BigQuery for the write attempt 39 | */ 40 | ApiFuture appendRows( 41 | JSONArray rows 42 | ) throws Descriptors.DescriptorValidationException, IOException; 43 | 44 | /** 45 | * Invoked if the underlying stream appears to be closed. Implementing classes 46 | * should respond by re-initialize the underlying stream. 47 | */ 48 | void refresh(); 49 | 50 | /** 51 | * Invoked when all rows have either been written to BigQuery or intentionally 52 | * discarded (e.g., reported to an {@link com.wepay.kafka.connect.bigquery.ErrantRecordHandler}). 53 | */ 54 | void onSuccess(); 55 | 56 | String streamName(); 57 | 58 | } 59 | -------------------------------------------------------------------------------- /kcbq-connector/src/main/java/io/aiven/kafka/utils/VersionInfo.java: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the Apache Software Foundation (ASF) under one * 4 | * or more contributor license agreements. See the NOTICE file * 5 | * distributed with this work for additional information * 6 | * regarding copyright ownership. The ASF licenses this file * 7 | * to you under the Apache License, Version 2.0 (the * 8 | * "License"); you may not use this file except in compliance * 9 | * with the License. You may obtain a copy of the License at * 10 | * * 11 | * http://www.apache.org/licenses/LICENSE-2.0 * 12 | * * 13 | * Unless required by applicable law or agreed to in writing, * 14 | * software distributed under the License is distributed on an * 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 16 | * KIND, either express or implied. See the License for the * 17 | * specific language governing permissions and limitations * 18 | * under the License. * 19 | */ 20 | 21 | package io.aiven.kafka.utils; 22 | 23 | /** 24 | * This class was originally developed by the Apache RAT project 25 | * A formatter for Package information about a class. 26 | * 27 | * @see Package 28 | */ 29 | public final class VersionInfo { 30 | /** 31 | * The version info string. 32 | */ 33 | private final Package pkg; 34 | 35 | private String orDefault(final String value, final String defaultValue) { 36 | return value == null ? defaultValue : value; 37 | } 38 | 39 | /** 40 | * Constructor that uses the VersionInfo package for information. 41 | */ 42 | public VersionInfo() { 43 | this(VersionInfo.class); 44 | } 45 | 46 | /** 47 | * Constructor for a specific class. 48 | * 49 | * @param clazz the class to get the Package information from. 50 | */ 51 | public VersionInfo(final Class clazz) { 52 | pkg = clazz.getPackage(); 53 | } 54 | 55 | /** 56 | * Default string representation of the implementation information from the package. 57 | * 58 | * @return The string representation. 59 | */ 60 | @Override 61 | public String toString() { 62 | return String.format("%s %s (%s)", getTitle(), getVersion(), getVendor()); 63 | } 64 | 65 | /** 66 | * Gets the implementation version of the package. Will return "VERSION-NUMBER" if 67 | * package information is not available. 68 | * 69 | * @return the implementation version. 70 | */ 71 | public String getVersion() { 72 | return orDefault(pkg.getImplementationVersion(), "VERSION-NUMBER"); 73 | } 74 | 75 | /** 76 | * Gets the implementation vendor of the package. Will return "VENDOR-NAME" if 77 | * package information is not available. 78 | * 79 | * @return the implementation vendor 80 | */ 81 | public String getVendor() { 82 | return orDefault(pkg.getImplementationVendor(), "VENDOR-NAME"); 83 | } 84 | 85 | /** 86 | * Gets the implementation title of the package. Will return "TITLE" if 87 | * package information is not available. 88 | * 89 | * @return the implementation title 90 | */ 91 | public String getTitle() { 92 | return orDefault(pkg.getImplementationTitle(), "TITLE"); 93 | } 94 | 95 | /** 96 | * Gets the specification version of the package. Will return "SPEC-VERSION" if 97 | * package information is not available. 98 | * 99 | * @return the specification version. 100 | */ 101 | public String getSpecVersion() { 102 | return orDefault(pkg.getSpecificationVersion(), "SPEC-VERSION"); 103 | } 104 | 105 | /** 106 | * Gets the specification vendor of the package. Will return "SPEC-VENDOR" if 107 | * package information is not available. 108 | * 109 | * @return the specification vendor 110 | */ 111 | public String getSpecVendor() { 112 | return orDefault(pkg.getSpecificationVendor(), "SPEC-VENDOR"); 113 | } 114 | 115 | /** 116 | * Gets the specification title of the package. Will return "SPEC-TITLE" if 117 | * package information is not available. 118 | * 119 | * @return the specification title 120 | */ 121 | public String getSpecTitle() { 122 | return orDefault(pkg.getSpecificationTitle(), "SPEC-TITLE"); 123 | } 124 | 125 | public static void main(String[] args) { 126 | VersionInfo versionInfo = new VersionInfo(); 127 | System.out.println(versionInfo); 128 | System.out.format("Spec: %s %s %s%n", versionInfo.getSpecTitle(), versionInfo.getSpecVersion(), versionInfo.getSpecVendor()); 129 | } 130 | } -------------------------------------------------------------------------------- /kcbq-connector/src/main/resources/META-INF/services/org.apache.kafka.connect.sink.SinkConnector: -------------------------------------------------------------------------------- 1 | com.wepay.kafka.connect.bigquery.BigQuerySinkConnector 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNotNull; 28 | import static org.junit.jupiter.api.Assertions.assertNotSame; 29 | 30 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever; 31 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 32 | import java.util.HashMap; 33 | import java.util.List; 34 | import java.util.Map; 35 | import org.apache.kafka.connect.data.Schema; 36 | import org.apache.kafka.connect.sink.SinkRecord; 37 | import org.junit.jupiter.api.BeforeAll; 38 | import org.junit.jupiter.api.Test; 39 | 40 | public class BigQuerySinkConnectorTest { 41 | private static SinkPropertiesFactory propertiesFactory; 42 | 43 | @BeforeAll 44 | public static void initializePropertiesFactory() { 45 | propertiesFactory = new SinkPropertiesFactory(); 46 | } 47 | 48 | @Test 49 | public void testTaskClass() { 50 | assertEquals(BigQuerySinkTask.class, new BigQuerySinkConnector().taskClass()); 51 | } 52 | 53 | @Test 54 | public void testTaskConfigs() { 55 | Map properties = propertiesFactory.getProperties(); 56 | 57 | BigQuerySinkConnector testConnector = new BigQuerySinkConnector(); 58 | 59 | testConnector.start(properties); 60 | 61 | for (int i : new int[]{1, 2, 10, 100}) { 62 | Map expectedProperties = new HashMap<>(properties); 63 | List> taskConfigs = testConnector.taskConfigs(i); 64 | assertEquals(i, taskConfigs.size()); 65 | for (int j = 0; j < i; j++) { 66 | expectedProperties.put(BigQuerySinkTaskConfig.TASK_ID_CONFIG, Integer.toString(j)); 67 | assertEquals( 68 | expectedProperties, 69 | taskConfigs.get(j), 70 | "Connector properties should match task configs" 71 | ); 72 | assertNotSame( 73 | properties, 74 | taskConfigs.get(j), 75 | "Properties should not be referentially equal to task config" 76 | ); 77 | // A little overboard, sure, but since it's only in the ballpark of 10,000 iterations this 78 | // should be fine 79 | for (int k = j + 1; k < i; k++) { 80 | assertNotSame( 81 | taskConfigs.get(j), 82 | taskConfigs.get(k), 83 | "Task configs should not be referentially equal to each other" 84 | ); 85 | } 86 | } 87 | } 88 | } 89 | 90 | @Test 91 | public void testConfig() { 92 | assertNotNull(new BigQuerySinkConnector().config()); 93 | } 94 | 95 | @Test 96 | public void testVersion() { 97 | assertNotNull(new BigQuerySinkConnector().version()); 98 | } 99 | 100 | // Doesn't do anything at the moment, but having this here will encourage tests to be written if 101 | // the stop() method ever does anything significant 102 | @Test 103 | public void testStop() { 104 | new BigQuerySinkConnector().stop(); 105 | } 106 | 107 | // Would just use Mockito, but can't provide the name of an anonymous class to the config file 108 | public static class MockSchemaRetriever implements SchemaRetriever { 109 | @Override 110 | public void configure(Map properties) { 111 | // Shouldn't be called 112 | } 113 | 114 | @Override 115 | public Schema retrieveKeySchema(SinkRecord record) { 116 | return null; 117 | } 118 | 119 | @Override 120 | public Schema retrieveValueSchema(SinkRecord record) { 121 | return null; 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/ErrantRecordHandlerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertFalse; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import com.google.cloud.bigquery.BigQueryError; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import org.junit.jupiter.api.Test; 33 | 34 | public class ErrantRecordHandlerTest { 35 | 36 | @Test 37 | public void shouldReturnTrueOnAllowedBigQueryReason() { 38 | ErrantRecordHandler errantRecordHandler = new ErrantRecordHandler(null); 39 | List bqErrorList = new ArrayList<>(); 40 | bqErrorList.add(new BigQueryError("invalid", "location", "message", "info")); 41 | 42 | // should allow sending records to dlq for bigquery reason:invalid (present in 43 | // allowedBigQueryErrorReason list) 44 | boolean expected = errantRecordHandler.isErrorReasonAllowed(bqErrorList); 45 | assertTrue(expected); 46 | } 47 | 48 | @Test 49 | public void shouldReturnFalseOnNonAllowedReason() { 50 | ErrantRecordHandler errantRecordHandler = new ErrantRecordHandler(null); 51 | List bqErrorList = new ArrayList<>(); 52 | bqErrorList.add(new BigQueryError("backendError", "location", "message", "info")); 53 | 54 | // Should not allow sending records to dlq for reason not present in 55 | // allowedBigQueryErrorReason list 56 | boolean expected = errantRecordHandler.isErrorReasonAllowed(bqErrorList); 57 | assertFalse(expected); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkPropertiesFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | /** 31 | * Factory for generating default configuration maps, useful for testing. 32 | */ 33 | public class SinkPropertiesFactory { 34 | /** 35 | * A default configuration map for the tested class. 36 | */ 37 | public Map getProperties() { 38 | Map properties = new HashMap<>(); 39 | 40 | properties.put(BigQuerySinkConfig.TABLE_CREATE_CONFIG, "false"); 41 | properties.put(BigQuerySinkConfig.TOPICS_CONFIG, "kcbq-test"); 42 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "test-project"); 43 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "kcbq-test"); 44 | 45 | properties.put(BigQuerySinkConfig.KEYFILE_CONFIG, "key.json"); 46 | 47 | properties.put(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG, "false"); 48 | properties.put(BigQuerySinkConfig.AVRO_DATA_CACHE_SIZE_CONFIG, "10"); 49 | 50 | properties.put(BigQuerySinkConfig.ALLOW_NEW_BIGQUERY_FIELDS_CONFIG, "false"); 51 | properties.put(BigQuerySinkConfig.ALLOW_BIGQUERY_REQUIRED_FIELD_RELAXATION_CONFIG, "false"); 52 | properties.put(BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG, "false"); 53 | return properties; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/SinkTaskPropertiesFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 27 | import java.util.Map; 28 | 29 | public class SinkTaskPropertiesFactory extends SinkPropertiesFactory { 30 | 31 | @Override 32 | public Map getProperties() { 33 | Map properties = super.getProperties(); 34 | 35 | properties.put(BigQuerySinkTaskConfig.TASK_ID_CONFIG, "1"); 36 | 37 | return properties; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/CredentialsValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 28 | import static org.junit.jupiter.api.Assertions.assertTrue; 29 | import static org.mockito.ArgumentMatchers.eq; 30 | import static org.mockito.Mockito.mock; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 34 | import java.util.Optional; 35 | import org.junit.jupiter.api.Test; 36 | 37 | public class CredentialsValidatorTest { 38 | 39 | @Test 40 | public void testNoCredentialsSkipsValidation() { 41 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 42 | when(config.getKey()).thenReturn(null); 43 | 44 | assertEquals( 45 | Optional.empty(), 46 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 47 | ); 48 | assertEquals( 49 | Optional.empty(), 50 | new CredentialsValidator.GcsCredentialsValidator().doValidate(config) 51 | ); 52 | assertEquals( 53 | Optional.empty(), 54 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 55 | ); 56 | } 57 | 58 | @Test 59 | public void testFailureToConstructClient() { 60 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 61 | when(config.getKey()).thenReturn("key"); 62 | 63 | @SuppressWarnings("unchecked") 64 | GcpClientBuilder mockClientBuilder = mock(GcpClientBuilder.class); 65 | when(mockClientBuilder.withConfig(eq(config))).thenReturn(mockClientBuilder); 66 | when(mockClientBuilder.build()).thenThrow(new RuntimeException("Provided credentials are invalid")); 67 | 68 | assertNotEquals( 69 | Optional.empty(), 70 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 71 | ); 72 | assertNotEquals( 73 | Optional.empty(), 74 | new CredentialsValidator.GcsCredentialsValidator().doValidate(config) 75 | ); 76 | assertNotEquals( 77 | Optional.empty(), 78 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 79 | ); 80 | } 81 | 82 | @Test 83 | public void testKeyShouldNotBeProvidedIfUsingApplicationDefaultCredentials() { 84 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 85 | when(config.getKey()).thenReturn("key"); 86 | when(config.getKeySource()).thenReturn(GcpClientBuilder.KeySource.APPLICATION_DEFAULT); 87 | 88 | assertTrue( 89 | new CredentialsValidator.BigQueryCredentialsValidator().doValidate(config) 90 | .get().contains("should not be provided") 91 | ); 92 | assertTrue( 93 | new CredentialsValidator.BigQueryStorageWriteApiCredentialsValidator().doValidate(config) 94 | .get().contains("should not be provided") 95 | ); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG; 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 30 | import static org.mockito.ArgumentMatchers.eq; 31 | import static org.mockito.Mockito.mock; 32 | import static org.mockito.Mockito.when; 33 | 34 | import com.google.cloud.storage.Bucket; 35 | import com.google.cloud.storage.Storage; 36 | import java.util.Collections; 37 | import java.util.Optional; 38 | import org.junit.jupiter.api.Test; 39 | 40 | public class GcsBucketValidatorTest { 41 | 42 | private final Storage gcs = mock(Storage.class); 43 | 44 | @Test 45 | public void testNullBatchLoadingSkipsValidation() { 46 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 47 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(null); 48 | 49 | assertEquals( 50 | Optional.empty(), 51 | new GcsBucketValidator().doValidate(gcs, config) 52 | ); 53 | } 54 | 55 | @Test 56 | public void testEmptyBatchLoadingSkipsValidation() { 57 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 58 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.emptyList()); 59 | 60 | assertEquals( 61 | Optional.empty(), 62 | new GcsBucketValidator().doValidate(gcs, config) 63 | ); 64 | } 65 | 66 | @Test 67 | public void testNullBucketWithBatchLoading() { 68 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 69 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 70 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(null); 71 | 72 | assertNotEquals( 73 | Optional.empty(), 74 | new GcsBucketValidator().doValidate(gcs, config) 75 | ); 76 | } 77 | 78 | @Test 79 | public void testBlankBucketWithBatchLoading() { 80 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 81 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 82 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(" \t "); 83 | 84 | assertNotEquals( 85 | Optional.empty(), 86 | new GcsBucketValidator().doValidate(gcs, config) 87 | ); 88 | } 89 | 90 | @Test 91 | public void testValidBucketWithBatchLoading() { 92 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 93 | final String bucketName = "gee_cs"; 94 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 95 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(bucketName); 96 | 97 | Bucket bucket = mock(Bucket.class); 98 | when(gcs.get(eq(bucketName))).thenReturn(bucket); 99 | 100 | assertEquals( 101 | Optional.empty(), 102 | new GcsBucketValidator().doValidate(gcs, config) 103 | ); 104 | } 105 | 106 | @Test 107 | public void testMissingBucketAndBucketCreationDisabledWithBatchLoading() { 108 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 109 | final String bucketName = "gee_cs"; 110 | when(config.getList(ENABLE_BATCH_CONFIG)).thenReturn(Collections.singletonList("t1")); 111 | when(config.getString(GCS_BUCKET_NAME_CONFIG)).thenReturn(bucketName); 112 | 113 | when(gcs.get(eq(bucketName))).thenReturn(null); 114 | 115 | assertNotEquals( 116 | Optional.empty(), 117 | new GcsBucketValidator().doValidate(gcs, config) 118 | ); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static org.junit.jupiter.api.Assertions.assertEquals; 28 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 29 | import static org.mockito.Mockito.mock; 30 | import static org.mockito.Mockito.when; 31 | 32 | import java.util.Optional; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class PartitioningModeValidatorTest { 36 | 37 | @Test 38 | public void testDisabledDecoratorSyntaxSkipsValidation() { 39 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 40 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 41 | 42 | assertEquals( 43 | Optional.empty(), 44 | new PartitioningModeValidator().doValidate(config) 45 | ); 46 | } 47 | 48 | @Test 49 | public void testDecoratorSyntaxWithoutTimestampPartitionFieldName() { 50 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 51 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 52 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.empty()); 53 | 54 | assertEquals( 55 | Optional.empty(), 56 | new PartitioningModeValidator().doValidate(config) 57 | ); 58 | } 59 | 60 | @Test 61 | public void testDecoratorSyntaxWithTimestampPartitionFieldName() { 62 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 63 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 64 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); 65 | 66 | assertNotEquals( 67 | Optional.empty(), 68 | new PartitioningModeValidator().doValidate(config) 69 | ); 70 | } 71 | 72 | @Test 73 | public void testTimestampPartitionFieldNameWithoutDecoratorSyntax() { 74 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 75 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 76 | when(config.getTimestampPartitionFieldName()).thenReturn(Optional.of("f1")); 77 | 78 | assertEquals( 79 | Optional.empty(), 80 | new PartitioningModeValidator().doValidate(config) 81 | ); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/config/PartitioningTypeValidatorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.config; 25 | 26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG; 27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG; 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 30 | import static org.mockito.Mockito.mock; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.google.cloud.bigquery.TimePartitioning; 34 | import java.util.Optional; 35 | import org.junit.jupiter.api.Test; 36 | 37 | public class PartitioningTypeValidatorTest { 38 | 39 | @Test 40 | public void testDisabledDecoratorSyntaxSkipsValidation() { 41 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 42 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(false); 43 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 44 | 45 | assertEquals( 46 | Optional.empty(), 47 | new PartitioningTypeValidator().doValidate(config) 48 | ); 49 | } 50 | 51 | @Test 52 | public void testDisabledTableCreationSkipsValidation() { 53 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 54 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 55 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(false); 56 | 57 | assertEquals( 58 | Optional.empty(), 59 | new PartitioningTypeValidator().doValidate(config) 60 | ); 61 | } 62 | 63 | @Test 64 | public void testNonDayTimePartitioningWithTableCreationAndDecoratorSyntax() { 65 | // TODO: This can be refactored into programmatically-generated test cases once we start using JUnit 5 66 | for (TimePartitioning.Type timePartitioningType : TimePartitioning.Type.values()) { 67 | if (TimePartitioning.Type.DAY.equals(timePartitioningType)) { 68 | continue; 69 | } 70 | 71 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 72 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 73 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 74 | when(config.getTimePartitioningType()).thenReturn(Optional.of(timePartitioningType)); 75 | 76 | assertNotEquals( 77 | Optional.empty(), 78 | new PartitioningTypeValidator().doValidate(config) 79 | ); 80 | } 81 | } 82 | 83 | @Test 84 | public void testDayTimePartitioningWithTableCreationAndDecoratorSyntax() { 85 | BigQuerySinkConfig config = mock(BigQuerySinkConfig.class); 86 | when(config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)).thenReturn(true); 87 | when(config.getBoolean(TABLE_CREATE_CONFIG)).thenReturn(true); 88 | when(config.getTimePartitioningType()).thenReturn(Optional.of(TimePartitioning.Type.DAY)); 89 | 90 | assertEquals( 91 | Optional.empty(), 92 | new PartitioningTypeValidator().doValidate(config) 93 | ); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/exception/BigQueryErrorResponsesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertFalse; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import com.google.cloud.bigquery.BigQueryException; 30 | import org.junit.jupiter.api.Test; 31 | 32 | public class BigQueryErrorResponsesTest { 33 | 34 | @Test 35 | public void testIsAuthenticationError() { 36 | BigQueryException error = new BigQueryException(0, "......401.....Unauthorized error....."); 37 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 38 | 39 | error = new BigQueryException(0, "......401.....Unauthorized error...invalid_grant.."); 40 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 41 | 42 | error = new BigQueryException(0, "......400........invalid_grant.."); 43 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 44 | 45 | error = new BigQueryException(0, "......400.....invalid_request.."); 46 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 47 | 48 | error = new BigQueryException(0, "......400.....invalid_client.."); 49 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 50 | 51 | error = new BigQueryException(0, "......400.....unauthorized_client.."); 52 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 53 | 54 | error = new BigQueryException(0, "......400.....unsupported_grant_type.."); 55 | assertTrue(BigQueryErrorResponses.isAuthenticationError(error)); 56 | 57 | error = new BigQueryException(0, "......403..Access denied error....."); 58 | assertFalse(BigQueryErrorResponses.isAuthenticationError(error)); 59 | 60 | error = new BigQueryException(0, "......500...Internal Server Error..."); 61 | assertFalse(BigQueryErrorResponses.isAuthenticationError(error)); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/exception/BigQueryStorageWriteApiConnectExceptionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.exception; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.storage.v1.RowError; 29 | import java.util.ArrayList; 30 | import java.util.HashMap; 31 | import java.util.List; 32 | import java.util.Map; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class BigQueryStorageWriteApiConnectExceptionTest { 36 | 37 | @Test 38 | public void testFormatRowErrorBigQueryStorageWriteApi() { 39 | String expectedMessage = "Insertion failed at table abc for following rows: \n " + 40 | "[row index 0] (Failure reason : f1 is not valid) "; 41 | List errors = new ArrayList<>(); 42 | errors.add(RowError.newBuilder().setIndex(0).setMessage("f1 is not valid").build()); 43 | BigQueryStorageWriteApiConnectException exception = new BigQueryStorageWriteApiConnectException("abc", errors); 44 | assertEquals(expectedMessage, exception.getMessage()); 45 | } 46 | 47 | @Test 48 | public void testFormatAppendSerializationErrorBigQueryStorageWriteApi() { 49 | String expectedMessage = "Insertion failed at table abc for following rows: \n " + 50 | "[row index 0] (Failure reason : f1 is not valid) \n [row index 1] (Failure reason : f2 is not valid) "; 51 | Map errors = new HashMap<>(); 52 | errors.put(0, "f1 is not valid"); 53 | errors.put(1, "f2 is not valid"); 54 | BigQueryStorageWriteApiConnectException exception = new BigQueryStorageWriteApiConnectException("abc", errors); 55 | assertEquals(expectedMessage, exception.getMessage()); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/StorageWriteApiBatchBigQuerySinkConnectorIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import java.util.Map; 28 | import org.junit.jupiter.api.Tag; 29 | 30 | @Tag("integration") 31 | public class StorageWriteApiBatchBigQuerySinkConnectorIT extends StorageWriteApiBigQuerySinkConnectorIT { 32 | 33 | @Override 34 | protected Map configs(String topic) { 35 | Map result = super.configs(topic); 36 | result.put(BigQuerySinkConfig.ENABLE_BATCH_MODE_CONFIG, "true"); 37 | result.put(BigQuerySinkConfig.COMMIT_INTERVAL_SEC_CONFIG, "15"); 38 | return result; 39 | } 40 | 41 | @Override 42 | protected String topic(String basename) { 43 | return super.topic(basename + "-batch-mode"); 44 | } 45 | 46 | @Override 47 | protected boolean isBatchMode() { 48 | return true; 49 | }} 50 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/VersionTestIT.java: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright 2024 Copyright 2022 Aiven Oy and 4 | * bigquery-connector-for-apache-kafka project contributors 5 | * 6 | * This software contains code derived from the Confluent BigQuery 7 | * Kafka Connector, Copyright Confluent, Inc, which in turn 8 | * contains code derived from the WePay BigQuery Kafka Connector, 9 | * Copyright WePay, Inc. 10 | * 11 | * Licensed under the Apache License, Version 2.0 (the "License"); 12 | * you may not use this file except in compliance with the License. 13 | * You may obtain a copy of the License at 14 | * 15 | * http://www.apache.org/licenses/LICENSE-2.0 16 | * 17 | * Unless required by applicable law or agreed to in writing, 18 | * software distributed under the License is distributed on an 19 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 | * KIND, either express or implied. See the License for the 21 | * specific language governing permissions and limitations 22 | * under the License. 23 | */ 24 | 25 | package com.wepay.kafka.connect.bigquery.integration; 26 | 27 | import io.aiven.kafka.utils.VersionInfo; 28 | import org.junit.jupiter.api.Test; 29 | 30 | import static org.junit.jupiter.api.Assertions.assertEquals; 31 | 32 | public class VersionTestIT { 33 | @Test 34 | void versionTest() throws Exception { 35 | VersionInfo versionInfo = new VersionInfo(); 36 | assertEquals("Aiven", versionInfo.getVendor()); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/BigQueryTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.google.cloud.bigquery.BigQueryException; 28 | import com.google.cloud.bigquery.Schema; 29 | import com.google.cloud.bigquery.StandardTableDefinition; 30 | import com.google.cloud.bigquery.TableId; 31 | import com.google.cloud.bigquery.TableInfo; 32 | import com.google.cloud.bigquery.TimePartitioning; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class BigQueryTestUtils { 37 | 38 | private static final Logger logger = LoggerFactory.getLogger(BigQueryTestUtils.class); 39 | 40 | public static void createPartitionedTable(BigQuery bigQuery, String datasetName, String tableName, 41 | Schema schema) { 42 | try { 43 | TableId tableId = TableId.of(datasetName, tableName); 44 | 45 | TimePartitioning partitioning = 46 | TimePartitioning.newBuilder(TimePartitioning.Type.DAY) 47 | .build(); 48 | 49 | StandardTableDefinition tableDefinition = 50 | StandardTableDefinition.newBuilder() 51 | .setSchema(schema) 52 | .setTimePartitioning(partitioning) 53 | .build(); 54 | TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build(); 55 | 56 | bigQuery.create(tableInfo); 57 | logger.info("Partitioned table {} created successfully", tableName); 58 | } catch (BigQueryException e) { 59 | logger.error("Failed to create partitioned table {} in dataset {}", tableName, datasetName); 60 | throw e; 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/BucketClearer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import com.google.api.gax.paging.Page; 27 | import com.google.cloud.storage.Blob; 28 | import com.google.cloud.storage.Bucket; 29 | import com.google.cloud.storage.Storage; 30 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | public class BucketClearer { 35 | 36 | private static final Logger logger = LoggerFactory.getLogger(BucketClearer.class); 37 | 38 | /** 39 | * Clear out a GCS bucket. Useful in integration testing to provide a clean slate before creating 40 | * a connector and writing to that bucket. 41 | * 42 | * @param key The GCP credentials to use (can be a filename or a raw JSON string). 43 | * @param project The GCP project the bucket belongs to. 44 | * @param bucketName The bucket to clear. 45 | * @param folderName The folder to clear (can be empty or null). 46 | * @param keySource The key source. If "FILE", then the {@code key} parameter will be treated as a 47 | * filename; if "JSON", then {@code key} will be treated as a raw JSON string. 48 | */ 49 | public static void clearBucket(String key, String project, String bucketName, String folderName, String keySource) { 50 | Storage gcs = new GcpClientBuilder.GcsBuilder() 51 | .withKeySource(GcpClientBuilder.KeySource.valueOf(keySource)) 52 | .withKey(key) 53 | .withProject(project) 54 | .build(); 55 | Bucket bucket = gcs.get(bucketName); 56 | if (bucket != null) { 57 | logger.info("Deleting objects in the {} folder for bucket {}", 58 | humanReadableFolderName(folderName), bucketName); 59 | for (Blob blob : listBlobs(bucket, folderName)) { 60 | gcs.delete(blob.getBlobId()); 61 | } 62 | bucket.delete(); 63 | logger.info("Bucket {} deleted successfully", bucketName); 64 | } else { 65 | logger.info("Bucket {} does not exist", bucketName); 66 | } 67 | } 68 | 69 | private static String humanReadableFolderName(String folderName) { 70 | return folderName == null || folderName.isEmpty() 71 | ? "root" 72 | : "'" + folderName + "'"; 73 | } 74 | 75 | private static Iterable listBlobs(Bucket bucket, String folderName) { 76 | Page blobListing = folderName == null || folderName.isEmpty() 77 | ? bucket.list() 78 | : bucket.list(Storage.BlobListOption.prefix(folderName)); 79 | return blobListing.iterateAll(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/TableClearer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import static com.wepay.kafka.connect.bigquery.utils.TableNameUtils.table; 27 | 28 | import com.google.cloud.bigquery.BigQuery; 29 | import com.google.cloud.bigquery.TableId; 30 | import com.wepay.kafka.connect.bigquery.utils.FieldNameSanitizer; 31 | import java.util.Arrays; 32 | import java.util.Collection; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | public class TableClearer { 37 | private static final Logger logger = LoggerFactory.getLogger(TableClearer.class); 38 | 39 | /** 40 | * Clear out one or more BigQuery tables. Useful in integration testing to provide a clean slate 41 | * before creating a connector and writing to those tables. 42 | * 43 | * @param bigQuery The BigQuery client to use when sending table deletion requests. 44 | * @param dataset The dataset that the to-be-cleared tables belong to. 45 | * @param tables The tables to clear. 46 | */ 47 | public static void clearTables(BigQuery bigQuery, String dataset, Collection tables) { 48 | for (String tableName : tables) { 49 | TableId table = TableId.of(dataset, FieldNameSanitizer.sanitizeName(tableName)); 50 | if (bigQuery.delete(table)) { 51 | logger.info("{} deleted successfully", table(table)); 52 | } else { 53 | logger.info("{} does not exist", table(table)); 54 | } 55 | } 56 | } 57 | 58 | /** 59 | * Clear out one or more BigQuery tables. Useful in integration testing to provide a clean slate 60 | * before creating a connector and writing to those tables. 61 | * 62 | * @param bigQuery The BigQuery client to use when sending table deletion requests. 63 | * @param dataset The dataset that the to-be-cleared tables belong to. 64 | * @param tables The tables to clear. 65 | */ 66 | public static void clearTables(BigQuery bigQuery, String dataset, String... tables) { 67 | clearTables(bigQuery, dataset, Arrays.asList(tables)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/integration/utils/TestCaseLogger.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.integration.utils; 25 | 26 | import org.junit.jupiter.api.extension.AfterEachCallback; 27 | import org.junit.jupiter.api.extension.BeforeEachCallback; 28 | import org.junit.jupiter.api.extension.ExtensionContext; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | public class TestCaseLogger implements BeforeEachCallback, AfterEachCallback { 33 | private static final Logger logger = LoggerFactory.getLogger(TestCaseLogger.class); 34 | 35 | @Override 36 | public void beforeEach(ExtensionContext extensionContext) throws Exception { 37 | logger.info("Starting test {}", extensionContext.getDisplayName()); 38 | } 39 | 40 | @Override 41 | public void afterEach(ExtensionContext extensionContext) throws Exception { 42 | logger.info("Finished test {}", extensionContext.getDisplayName()); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.junit.jupiter.api.Assertions.assertTrue; 28 | 29 | import java.util.Collections; 30 | import java.util.HashMap; 31 | import java.util.Map; 32 | import org.junit.jupiter.api.BeforeEach; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class FieldNameSanitizerTest { 36 | private Map testMap; 37 | 38 | @BeforeEach 39 | public void setUp() { 40 | testMap = new HashMap() {{ 41 | put("A.1", new HashMap() {{ 42 | put("_B1", 1); 43 | put("B.2", "hello.B-2"); 44 | }}); 45 | put("A-2", new HashMap() {{ 46 | put("=/B.3", "hello B3"); 47 | put("B./4", "hello B4"); 48 | put("2A/", "hello B5"); 49 | put("3A/", "hello B6"); 50 | }}); 51 | put("Foo", "Simple Value"); 52 | put("Foo_1", "Simple Value 1"); 53 | put("Foo-2", "Simple Value 2"); 54 | }}; 55 | } 56 | 57 | @Test 58 | public void testInvalidSymbol() { 59 | Map sanitizedMap = FieldNameSanitizer.replaceInvalidKeys(testMap); 60 | assertTrue(sanitizedMap.containsKey("A_1")); 61 | assertTrue(sanitizedMap.containsKey("A_2")); 62 | 63 | Map nestedMap1 = (Map) sanitizedMap.get("A_1"); 64 | // Validate changed keys. 65 | assertTrue(nestedMap1.containsKey("B_2")); 66 | assertTrue(nestedMap1.containsKey("_B1")); 67 | 68 | // Validate unchanged values. 69 | assertEquals(nestedMap1.get("B_2"), "hello.B-2"); 70 | assertEquals(nestedMap1.get("_B1"), 1); 71 | 72 | // Validate map size. 73 | assertEquals(2, nestedMap1.size()); 74 | 75 | Map nestedMap2 = (Map) sanitizedMap.get("A_2"); 76 | // Validate changed keys. 77 | assertTrue(nestedMap2.containsKey("__B_3")); 78 | assertTrue(nestedMap2.containsKey("B__4")); 79 | assertTrue(nestedMap2.containsKey("_2A_")); 80 | assertTrue(nestedMap2.containsKey("_3A_")); 81 | 82 | // Validate unchanged values. 83 | assertEquals(nestedMap2.get("__B_3"), "hello B3"); 84 | assertEquals(nestedMap2.get("B__4"), "hello B4"); 85 | assertEquals(nestedMap2.get("_2A_"), "hello B5"); 86 | assertEquals(nestedMap2.get("_3A_"), "hello B6"); 87 | 88 | // Validate map size. 89 | assertEquals(4, nestedMap2.size()); 90 | 91 | // Validate keys shall be unchanged. 92 | assertTrue(sanitizedMap.containsKey("Foo")); 93 | assertTrue(sanitizedMap.containsKey("Foo_1")); 94 | 95 | // Validate key shall be changed. 96 | assertTrue(sanitizedMap.containsKey("Foo_2")); 97 | 98 | // Validate map size. 99 | assertEquals(5, sanitizedMap.size()); 100 | } 101 | 102 | /** 103 | * Verifies that null values are acceptable while sanitizing keys. 104 | */ 105 | @Test 106 | public void testNullValue() { 107 | assertEquals( 108 | Collections.singletonMap("abc", null), 109 | FieldNameSanitizer.replaceInvalidKeys(Collections.singletonMap("abc", null))); 110 | } 111 | 112 | @Test 113 | public void testDeeplyNestedNullValues() { 114 | testMap = new HashMap<>(); 115 | testMap.put("top", null); 116 | testMap.put("middle", Collections.singletonMap("key", null)); 117 | testMap.put("bottom", Collections.singletonMap("key", Collections.singletonMap("key", null))); 118 | assertEquals( 119 | testMap, 120 | FieldNameSanitizer.replaceInvalidKeys(testMap) 121 | ); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/MockTime.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | public class MockTime extends org.apache.kafka.common.utils.MockTime implements Time { 27 | } 28 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/utils/PartitionedTableIdTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.utils; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.TableId; 29 | import java.time.LocalDate; 30 | import org.junit.jupiter.api.Test; 31 | 32 | public class PartitionedTableIdTest { 33 | 34 | @Test 35 | public void testBasicBuilder() { 36 | final String dataset = "dataset"; 37 | final String table = "table"; 38 | 39 | final PartitionedTableId tableId = new PartitionedTableId.Builder(dataset, table).build(); 40 | 41 | assertEquals(dataset, tableId.getDataset()); 42 | assertEquals(table, tableId.getBaseTableName()); 43 | assertEquals(table, tableId.getFullTableName()); 44 | 45 | TableId expectedTableId = TableId.of(dataset, table); 46 | assertEquals(expectedTableId, tableId.getBaseTableId()); 47 | assertEquals(expectedTableId, tableId.getFullTableId()); 48 | } 49 | 50 | @Test 51 | public void testTableIdBuilder() { 52 | final String project = "project"; 53 | final String dataset = "dataset"; 54 | final String table = "table"; 55 | final TableId tableId = TableId.of(project, dataset, table); 56 | 57 | final PartitionedTableId partitionedTableId = new PartitionedTableId.Builder(tableId).build(); 58 | 59 | assertEquals(project, partitionedTableId.getProject()); 60 | assertEquals(dataset, partitionedTableId.getDataset()); 61 | assertEquals(table, partitionedTableId.getBaseTableName()); 62 | assertEquals(table, partitionedTableId.getFullTableName()); 63 | 64 | assertEquals(tableId, partitionedTableId.getBaseTableId()); 65 | assertEquals(tableId, partitionedTableId.getFullTableId()); 66 | } 67 | 68 | @Test 69 | public void testWithPartition() { 70 | final String dataset = "dataset"; 71 | final String table = "table"; 72 | final LocalDate partitionDate = LocalDate.of(2016, 9, 21); 73 | 74 | final PartitionedTableId partitionedTableId = 75 | new PartitionedTableId.Builder(dataset, table).setDayPartition(partitionDate).build(); 76 | 77 | final String expectedPartition = "20160921"; 78 | 79 | assertEquals(dataset, partitionedTableId.getDataset()); 80 | assertEquals(table, partitionedTableId.getBaseTableName()); 81 | assertEquals(table + "$" + expectedPartition, partitionedTableId.getFullTableName()); 82 | 83 | final TableId expectedBaseTableId = TableId.of(dataset, table); 84 | final TableId expectedFullTableId = TableId.of(dataset, table + "$" + expectedPartition); 85 | 86 | assertEquals(expectedBaseTableId, partitionedTableId.getBaseTableId()); 87 | assertEquals(expectedFullTableId, partitionedTableId.getFullTableId()); 88 | } 89 | 90 | @Test 91 | public void testWithEpochTimePartition() { 92 | final String dataset = "dataset"; 93 | final String table = "table"; 94 | 95 | final long utcTime = 1509007584334L; 96 | 97 | final PartitionedTableId partitionedTableId = 98 | new PartitionedTableId.Builder(dataset, table).setDayPartition(utcTime).build(); 99 | 100 | final String expectedPartition = "20171026"; 101 | 102 | assertEquals(dataset, partitionedTableId.getDataset()); 103 | assertEquals(table, partitionedTableId.getBaseTableName()); 104 | assertEquals(table + "$" + expectedPartition, partitionedTableId.getFullTableName()); 105 | 106 | final TableId expectedBaseTableId = TableId.of(dataset, table); 107 | final TableId expectedFullTableId = TableId.of(dataset, table + "$" + expectedPartition); 108 | 109 | assertEquals(expectedBaseTableId, partitionedTableId.getBaseTableId()); 110 | assertEquals(expectedFullTableId, partitionedTableId.getFullTableId()); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/BigQueryBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.cloud.bigquery.BigQuery; 27 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | 36 | public class BigQueryBuilderTest { 37 | 38 | @Test 39 | public void testBigQueryBuild() { 40 | Map properties = new HashMap<>(); 41 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | BigQuery actualSettings = new GcpClientBuilder.BigQueryBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getOptions().getProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/BigQueryWriteSettingsBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | 28 | import com.google.cloud.bigquery.storage.v1.BigQueryWriteSettings; 29 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 30 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | import org.junit.jupiter.api.Test; 34 | 35 | public class BigQueryWriteSettingsBuilderTest { 36 | 37 | @Test 38 | public void testBigQueryWriteSettingsBuild() { 39 | Map properties = new HashMap<>(); 40 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 41 | properties.put(BigQuerySinkConfig.KEY_SOURCE_CONFIG, GcpClientBuilder.KeySource.FILE.name()); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | BigQueryWriteSettings actualSettings = new GcpClientBuilder.BigQueryWriteSettingsBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getQuotaProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/GcsBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import com.google.cloud.storage.Storage; 27 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder; 28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.HashMap; 32 | import java.util.Map; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | 36 | public class GcsBuilderTest { 37 | 38 | @Test 39 | public void testStorageBuild() { 40 | Map properties = new HashMap<>(); 41 | properties.put(BigQuerySinkConfig.PROJECT_CONFIG, "abcd"); 42 | properties.put(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG, "dummy_dataset"); 43 | BigQuerySinkConfig config = new BigQuerySinkConfig(properties); 44 | 45 | Storage actualSettings = new GcpClientBuilder.GcsBuilder() 46 | .withConfig(config) 47 | .build(); 48 | 49 | assertEquals(actualSettings.getOptions().getProjectId(), "abcd"); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/write/storage/StorageApiBatchModeHandlerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package com.wepay.kafka.connect.bigquery.write.storage; 25 | 26 | import static org.junit.jupiter.api.Assertions.assertEquals; 27 | import static org.mockito.Mockito.any; 28 | import static org.mockito.Mockito.mock; 29 | import static org.mockito.Mockito.times; 30 | import static org.mockito.Mockito.verify; 31 | import static org.mockito.Mockito.when; 32 | 33 | import com.google.cloud.bigquery.storage.v1.TableName; 34 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig; 35 | import java.util.ArrayList; 36 | import java.util.Arrays; 37 | import java.util.HashMap; 38 | import java.util.List; 39 | import java.util.Map; 40 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 41 | import org.apache.kafka.common.TopicPartition; 42 | import org.junit.jupiter.api.BeforeEach; 43 | import org.junit.jupiter.api.Test; 44 | 45 | public class StorageApiBatchModeHandlerTest { 46 | StorageWriteApiBatchApplicationStream mockedStreamApi = mock(StorageWriteApiBatchApplicationStream.class); 47 | BigQuerySinkTaskConfig mockedConfig = mock(BigQuerySinkTaskConfig.class); 48 | Map offsetInfo = new HashMap<>(); 49 | StorageApiBatchModeHandler batchModeHandler = new StorageApiBatchModeHandler( 50 | mockedStreamApi, 51 | mockedConfig 52 | ); 53 | List rows = new ArrayList<>(); 54 | 55 | @BeforeEach 56 | public void setup() { 57 | when(mockedConfig.getString(BigQuerySinkTaskConfig.PROJECT_CONFIG)).thenReturn("p"); 58 | when(mockedConfig.getString(BigQuerySinkTaskConfig.DEFAULT_DATASET_CONFIG)).thenReturn("d1"); 59 | when(mockedConfig.getBoolean(BigQuerySinkTaskConfig.SANITIZE_TOPICS_CONFIG)).thenReturn(false); 60 | when(mockedConfig.getList(BigQuerySinkTaskConfig.TOPICS_CONFIG)).thenReturn( 61 | Arrays.asList("topic1", "topic2") 62 | ); 63 | when(mockedStreamApi.maybeCreateStream(any(), any())).thenReturn(true); 64 | when(mockedStreamApi.updateOffsetsOnStream(any(), any())).thenReturn("s1_app_stream"); 65 | when(mockedStreamApi.getCommitableOffsets()).thenReturn(offsetInfo); 66 | } 67 | 68 | @Test 69 | public void testCommitStreams() { 70 | batchModeHandler.refreshStreams(); 71 | } 72 | 73 | @Test 74 | public void testUpdateOffsetsOnStream() { 75 | String actualStreamName = batchModeHandler.updateOffsetsOnStream( 76 | TableName.of("p", "d1", "topic1").toString(), rows); 77 | 78 | assertEquals("s1_app_stream", actualStreamName); 79 | verify(mockedStreamApi, times(1)) 80 | .updateOffsetsOnStream("projects/p/datasets/d1/tables/topic1", rows); 81 | } 82 | 83 | @Test 84 | public void testGetCommitableOffsets() { 85 | batchModeHandler.getCommitableOffsets(); 86 | verify(mockedStreamApi, times(1)).getCommitableOffsets(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/gcs-load/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"null_prim":null,"boolean_prim":false,"int_prim":4242,"long_prim":42424242424242,"float_prim":42.42,"double_prim":42424242.42424242,"string_prim":"forty-two","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 2 | {"row":2,"null_prim":{"int":5},"boolean_prim":true,"int_prim":4354,"long_prim":435443544354,"float_prim":43.54,"double_prim":435443.544354,"string_prim":"forty-three","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 3 | {"row":3,"null_prim":{"int":8},"boolean_prim":false,"int_prim":1993,"long_prim":199319931993,"float_prim":19.93,"double_prim":199319.931993,"string_prim":"nineteen","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 4 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/gcs-load/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "gcsLoad", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "null_prim", "type": ["null", "int"] }, 8 | { "name": "boolean_prim", "type": "boolean" }, 9 | { "name": "int_prim", "type": "int" }, 10 | { "name": "long_prim", "type": "long" }, 11 | { "name": "float_prim", "type": "float" }, 12 | { "name": "double_prim", "type": "double" }, 13 | { "name": "string_prim", "type": "string" }, 14 | { "name": "bytes_prim", "type": "bytes" } 15 | ] 16 | } -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/logical-types/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"timestamp_test":0,"date_test":0} 2 | {"row":2,"timestamp_test":42000000,"date_test":4200} 3 | {"row":3,"timestamp_test":1468275102000,"date_test":16993} 4 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/logical-types/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "logicals", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "timestamp_test", 8 | "type": 9 | { "type": "long", 10 | "connect.name": "org.apache.kafka.connect.data.Timestamp" 11 | } 12 | }, 13 | { "name": "date_test", 14 | "type": 15 | { 16 | "type": "int", 17 | "connect.name": "org.apache.kafka.connect.data.Date" 18 | } 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/matryoshka-dolls/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"middle":{"middle_array":[42.0, 42.42, 42.4242],"inner":{"inner_int":42,"inner_string":"42"}},"inner":{"inner_int":-42,"inner_string": "-42"}} 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/matryoshka-dolls/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "outer_doll", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": [ 5 | { "name": "row", "type": "int" }, 6 | { "name": "middle", "type": 7 | { "type": "record", 8 | "name": "middle_doll", 9 | "fields": [ 10 | { "name": "middle_array", 11 | "type": { 12 | "type": "array", 13 | "items": "float" 14 | } 15 | }, 16 | { "name": "inner", 17 | "type": { 18 | "type": "record", 19 | "name": "inner_doll", 20 | "fields": [ 21 | { "name": "inner_int", 22 | "type": "int" 23 | }, 24 | { "name": "inner_string", 25 | "type": "string" 26 | } 27 | ] 28 | } 29 | } 30 | ] 31 | } 32 | }, 33 | { 34 | "name": "inner", 35 | "type": "com.wepay.kafka.connect.bigquery.inner_doll" 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/nulls/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"f1":"Required string","f2":null,"f3":{"int":42},"f4":{"boolean":false}} 2 | {"row":2,"f1":"Required string","f2":{"string":"Optional string"},"f3":{"int":89},"f4":null} 3 | {"row":3,"f1":"Required string","f2":null,"f3":null,"f4":{"boolean":true}} 4 | {"row":4,"f1":"Required string","f2":{"string":"Optional string"},"f3":null,"f4":null} 5 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/nulls/schema.json: -------------------------------------------------------------------------------- 1 | {"type":"record", 2 | "name":"myrecord", 3 | "fields":[ 4 | {"name":"row","type":"int"}, 5 | {"name":"f1","type":"string"}, 6 | {"name":"f2","type":["null","string"]}, 7 | {"name":"f3","type":["null","int"]}, 8 | {"name":"f4","type":["null","boolean"]}] 9 | } 10 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/primitives/data.json: -------------------------------------------------------------------------------- 1 | {"row":1,"null_prim":null,"boolean_prim":false,"int_prim":4242,"long_prim":42424242424242,"float_prim":42.42,"double_prim":42424242.42424242,"string_prim":"forty-two","bytes_prim":"\u0000\u000f\u001e\u002d\u003c\u004b\u005a\u0069\u0078"} 2 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/integration_test_cases/primitives/schema.json: -------------------------------------------------------------------------------- 1 | { "type": "record", 2 | "name": "primitives", 3 | "namespace": "com.wepay.kafka.connect.bigquery", 4 | "fields": 5 | [ 6 | { "name": "row", "type": "int" }, 7 | { "name": "null_prim", "type": ["null", "int"] }, 8 | { "name": "boolean_prim", "type": "boolean" }, 9 | { "name": "int_prim", "type": "int" }, 10 | { "name": "long_prim", "type": "long" }, 11 | { "name": "float_prim", "type": "float" }, 12 | { "name": "double_prim", "type": "double" }, 13 | { "name": "string_prim", "type": "string" }, 14 | { "name": "bytes_prim", "type": "bytes" } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /kcbq-connector/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2024 Copyright 2022 Aiven Oy and 3 | # bigquery-connector-for-apache-kafka project contributors 4 | # 5 | # This software contains code derived from the Confluent BigQuery 6 | # Kafka Connector, Copyright Confluent, Inc, which in turn 7 | # contains code derived from the WePay BigQuery Kafka Connector, 8 | # Copyright WePay, Inc. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, 17 | # software distributed under the License is distributed on an 18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | # KIND, either express or implied. See the License for the 20 | # specific language governing permissions and limitations 21 | # under the License. 22 | # 23 | 24 | log4j.rootLogger=INFO, stdout 25 | 26 | # Send the logs to the console. 27 | # 28 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 29 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 30 | 31 | connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n 32 | log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern} 33 | log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern} 34 | 35 | # These are used in the log4j properties file that ships by default with Connect 36 | log4j.logger.org.apache.zookeeper=ERROR 37 | log4j.logger.org.reflections=ERROR 38 | 39 | log4j.logger.com.wepay.kafka.connect.bigquery=DEBUG 40 | 41 | # We see a lot of WARN-level messages from this class when a table is created by the connector and 42 | # then written to shortly after. No need for that much noise during routine tests 43 | log4j.logger.com.wepay.kafka.connect.bigquery.write.batch.TableWriter=ERROR 44 | # Logs a message at INFO on every http request 45 | log4j.logger.org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster=WARN 46 | log4j.logger.com.wepay.kafka.connect.bigquery.integration.BigQueryErrorResponsesIT=DEBUG 47 | -------------------------------------------------------------------------------- /scripts/release_detail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2024 Copyright 2022 Aiven Oy and 4 | # bigquery-connector-for-apache-kafka project contributors 5 | # 6 | # This software contains code derived from the Confluent BigQuery 7 | # Kafka Connector, Copyright Confluent, Inc, which in turn 8 | # contains code derived from the WePay BigQuery Kafka Connector, 9 | # Copyright WePay, Inc. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, 18 | # software distributed under the License is distributed on an 19 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 | # KIND, either express or implied. See the License for the 21 | # specific language governing permissions and limitations 22 | # under the License. 23 | # 24 | 25 | git fetch origin 26 | if [ -z $1 ] 27 | then 28 | echo "Must provide final version" 29 | exit 1 30 | fi 31 | 32 | startTag=`grep latestRelease pom.xml | cut -f2 -d">" | cut -f1 -d"<"` 33 | endTag=${1} 34 | 35 | start=`git rev-parse v${startTag}` 36 | end=`git rev-parse HEAD` 37 | commits=${start}...${end} 38 | printf "## v%s\n### What's changed\n" ${endTag} 39 | git log --format=' - %s' ${commits} 40 | 41 | printf "\n\n### Co-authored by\n" 42 | git log --format=' - %an' ${commits} | sort -u 43 | printf "\n\n### Full Changelog\nhttps://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/compare/v${startTag}...v${endTag}\n\n" 44 | -------------------------------------------------------------------------------- /tools/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQueryConfigDefBean.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | 25 | package com.wepay.kafka.connect.bigquery.config; 26 | 27 | import io.aiven.kafka.config.tools.BaseConfigDefBean; 28 | import io.aiven.kafka.config.tools.ExtendedConfigKeyBean; 29 | import org.apache.velocity.tools.config.DefaultKey; 30 | import org.apache.velocity.tools.config.ValidScope; 31 | 32 | /** 33 | * A BaseConfigDefBean instance that uses the {@link BigQuerySinkConfig#getConfig} for data and returns {@link ExtendedConfigKeyBean} objects. 34 | */ 35 | @SuppressWarnings("unused") 36 | @DefaultKey("extendedConfigDef") 37 | @ValidScope({"application"}) 38 | public class BigQueryConfigDefBean extends BaseConfigDefBean { 39 | /** 40 | * Constructor. 41 | */ 42 | public BigQueryConfigDefBean() { 43 | super(BigQuerySinkConfig.getConfig(), ExtendedConfigKeyBean::new); 44 | } 45 | } -------------------------------------------------------------------------------- /tools/src/main/java/io/aiven/kafka/config/tools/ExtendedConfigKeyBean.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package io.aiven.kafka.config.tools; 25 | 26 | import io.aiven.kafka.utils.ExtendedConfigKey; 27 | import org.apache.kafka.common.config.ConfigDef; 28 | 29 | 30 | /** 31 | * Defines the variables that are available for the Velocity template to access a {@link ConfigDef.ConfigKey} object. 32 | * 33 | * @see Apache Velocity 34 | */ 35 | public class ExtendedConfigKeyBean extends ConfigKeyBean { 36 | /** The key */ 37 | private final boolean extendedFlag; 38 | 39 | /** 40 | * Constructor. 41 | * 42 | * @param key 43 | * the Key to wrap. 44 | */ 45 | public ExtendedConfigKeyBean(final ConfigDef.ConfigKey key) { 46 | super(key); 47 | this.extendedFlag = (key instanceof ExtendedConfigKey); 48 | } 49 | 50 | public final boolean isExtendedFlag() { 51 | return extendedFlag; 52 | } 53 | 54 | private ExtendedConfigKey asExtended() { 55 | return extendedFlag ? (ExtendedConfigKey) key : null; 56 | } 57 | 58 | public final String since() { 59 | return extendedFlag ? asExtended().since : null; 60 | } 61 | 62 | @SuppressWarnings("unused") 63 | public final boolean isDeprecated() { 64 | return extendedFlag && asExtended().isDeprecated(); 65 | } 66 | 67 | public final ExtendedConfigKey.DeprecatedInfo deprecated() { 68 | return extendedFlag ? asExtended().deprecated : null; 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /tools/src/main/resources/META-INF/maven/site-tools.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /tools/src/test/java/io/aiven/kafka/config/tools/ConfigDefBeanTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | package io.aiven.kafka.config.tools; 25 | 26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig; 27 | import org.junit.jupiter.api.Test; 28 | 29 | import java.util.Arrays; 30 | import java.util.Collections; 31 | import java.util.List; 32 | import java.util.stream.Collectors; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | import static org.junit.jupiter.api.Assertions.assertNotNull; 36 | import static org.junit.jupiter.api.Assertions.assertTrue; 37 | 38 | public class ConfigDefBeanTests { 39 | 40 | private BaseConfigDefBean underTest = new BaseConfigDefBean(BigQuerySinkConfig.getConfig(), ConfigKeyBean::new) { 41 | }; 42 | 43 | 44 | @Test 45 | public void testParents() { 46 | List expected = Arrays.asList(BigQuerySinkConfig.UPSERT_ENABLED_CONFIG, BigQuerySinkConfig.DELETE_ENABLED_CONFIG, BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG, BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG); 47 | List parents = underTest.parents(); 48 | Collections.sort(expected); 49 | assertEquals(expected, parents.stream().map(ConfigKeyBean::getName).collect(Collectors.toList())); 50 | 51 | parents = underTest.parents(BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG); 52 | 53 | assertEquals(2, parents.size()); 54 | expected = Arrays.asList(BigQuerySinkConfig.UPSERT_ENABLED_CONFIG, BigQuerySinkConfig.DELETE_ENABLED_CONFIG); 55 | Collections.sort(expected); 56 | assertEquals(expected, parents.stream().map(ConfigKeyBean::getName).collect(Collectors.toList())); 57 | } 58 | 59 | @Test 60 | public void testDependents() { 61 | List expected = Arrays.asList(BigQuerySinkConfig.MERGE_INTERVAL_MS_CONFIG, BigQuerySinkConfig.INTERMEDIATE_TABLE_SUFFIX_CONFIG, BigQuerySinkConfig.USE_STORAGE_WRITE_API_CONFIG, BigQuerySinkConfig.ENABLE_BATCH_MODE_CONFIG, 62 | BigQuerySinkConfig.COMMIT_INTERVAL_SEC_CONFIG, BigQuerySinkConfig.KAFKA_KEY_FIELD_NAME_CONFIG); 63 | List deps = underTest.dependents(); 64 | Collections.sort(expected); 65 | assertEquals(expected, deps.stream().map(ConfigKeyBean::getName).collect(Collectors.toList())); 66 | } 67 | 68 | @Test 69 | public void testConfigKeys() { 70 | List opts = underTest.configKeys(); 71 | assertNotNull(opts); 72 | assertTrue(!opts.isEmpty()); 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tools/src/test/java/io/aiven/kafka/config/tools/ExtendedConfigKeyBeanTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Copyright 2022 Aiven Oy and 3 | * bigquery-connector-for-apache-kafka project contributors 4 | * 5 | * This software contains code derived from the Confluent BigQuery 6 | * Kafka Connector, Copyright Confluent, Inc, which in turn 7 | * contains code derived from the WePay BigQuery Kafka Connector, 8 | * Copyright WePay, Inc. 9 | * 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, 17 | * software distributed under the License is distributed on an 18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19 | * KIND, either express or implied. See the License for the 20 | * specific language governing permissions and limitations 21 | * under the License. 22 | */ 23 | 24 | 25 | package io.aiven.kafka.config.tools; 26 | 27 | import io.aiven.kafka.utils.ConfigKeyBuilder; 28 | import io.aiven.kafka.utils.ExtendedConfigKey; 29 | import org.apache.kafka.common.config.ConfigDef; 30 | import org.junit.jupiter.api.Test; 31 | 32 | import static org.junit.jupiter.api.Assertions.assertEquals; 33 | import static org.junit.jupiter.api.Assertions.assertFalse; 34 | import static org.junit.jupiter.api.Assertions.assertNotNull; 35 | import static org.junit.jupiter.api.Assertions.assertNull; 36 | import static org.junit.jupiter.api.Assertions.assertTrue; 37 | 38 | public class ExtendedConfigKeyBeanTest { 39 | 40 | @Test 41 | void testExtendedConfigKey() { 42 | ExtendedConfigKey extendedConfigKey = ExtendedConfigKey.builder("testOpt").deprecatedInfo(ExtendedConfigKey.DeprecatedInfo.builder()).build(); 43 | ExtendedConfigKeyBean underTest = new ExtendedConfigKeyBean(extendedConfigKey); 44 | assertTrue(underTest.isExtendedFlag()); 45 | assertNotNull(underTest.deprecated()); 46 | assertNull(underTest.since()); 47 | 48 | extendedConfigKey = ExtendedConfigKey.builder("testOpt").deprecatedInfo(ExtendedConfigKey.DeprecatedInfo.builder()).since("Then").build(); 49 | underTest = new ExtendedConfigKeyBean(extendedConfigKey); 50 | assertTrue(underTest.isExtendedFlag()); 51 | assertNotNull(underTest.deprecated()); 52 | assertEquals("Then", underTest.since()); 53 | 54 | } 55 | 56 | @Test 57 | void testConfigKey() { 58 | ConfigDef.ConfigKey configKey = new ConfigKeyBuilder<>("testOpt").build(); 59 | ExtendedConfigKeyBean underTest = new ExtendedConfigKeyBean(configKey); 60 | assertFalse(underTest.isExtendedFlag()); 61 | assertNull(underTest.deprecated()); 62 | assertNull(underTest.since()); 63 | } 64 | 65 | } 66 | --------------------------------------------------------------------------------