├── .github
└── workflows
│ ├── build_site.yml
│ ├── create_release.yml
│ ├── manual.yml
│ ├── nightly.yml
│ ├── prs_and_commits.yml
│ └── release_pr_workflow.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── config
├── checkstyle
│ └── suppressions.xml
└── copyright
│ └── custom-header-styles.xml
├── docs
├── pom.xml
├── sink-connector-config-options.rst
└── src
│ └── site
│ ├── custom
│ └── project-info-reports.properties
│ ├── markdown
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── RELEASE_NOTES.md
│ ├── SECURITY.md
│ ├── configuration.md.vm
│ ├── designNotes.md.vm
│ ├── index.md.vm
│ ├── writeapiBestPractices.md
│ ├── writeapiPartitionDecorator.md
│ └── writeapiUnknownFields.md
│ └── site.xml
├── kcbq-api
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com
│ └── wepay
│ └── kafka
│ └── connect
│ └── bigquery
│ └── api
│ ├── KafkaSchemaRecordType.java
│ └── SchemaRetriever.java
├── kcbq-connector
├── pom.xml
└── src
│ ├── main
│ ├── assembly
│ │ ├── release-tar.xml
│ │ └── release-zip.xml
│ ├── java
│ │ ├── com
│ │ │ └── wepay
│ │ │ │ └── kafka
│ │ │ │ └── connect
│ │ │ │ └── bigquery
│ │ │ │ ├── BigQuerySinkConnector.java
│ │ │ │ ├── BigQuerySinkTask.java
│ │ │ │ ├── ErrantRecordHandler.java
│ │ │ │ ├── GcpClientBuilder.java
│ │ │ │ ├── GcsToBqLoadRunnable.java
│ │ │ │ ├── MergeQueries.java
│ │ │ │ ├── RecordTableResolver.java
│ │ │ │ ├── SchemaManager.java
│ │ │ │ ├── config
│ │ │ │ ├── BigQuerySinkConfig.java
│ │ │ │ ├── BigQuerySinkTaskConfig.java
│ │ │ │ ├── CredentialsValidator.java
│ │ │ │ ├── GcsBucketValidator.java
│ │ │ │ ├── MultiPropertyValidator.java
│ │ │ │ ├── PartitioningModeValidator.java
│ │ │ │ ├── PartitioningTypeValidator.java
│ │ │ │ ├── StorageWriteApiValidator.java
│ │ │ │ └── UpsertDeleteValidator.java
│ │ │ │ ├── convert
│ │ │ │ ├── BigQueryRecordConverter.java
│ │ │ │ ├── BigQuerySchemaConverter.java
│ │ │ │ ├── KafkaDataBuilder.java
│ │ │ │ ├── RecordConverter.java
│ │ │ │ ├── SchemaConverter.java
│ │ │ │ └── logicaltype
│ │ │ │ │ ├── DebeziumLogicalConverters.java
│ │ │ │ │ ├── KafkaLogicalConverters.java
│ │ │ │ │ ├── LogicalConverterRegistry.java
│ │ │ │ │ └── LogicalTypeConverter.java
│ │ │ │ ├── exception
│ │ │ │ ├── BigQueryConnectException.java
│ │ │ │ ├── BigQueryErrorResponses.java
│ │ │ │ ├── BigQueryStorageWriteApiConnectException.java
│ │ │ │ ├── BigQueryStorageWriteApiErrorResponses.java
│ │ │ │ ├── ConversionConnectException.java
│ │ │ │ ├── ExpectedInterruptException.java
│ │ │ │ └── GcsConnectException.java
│ │ │ │ ├── retrieve
│ │ │ │ └── IdentitySchemaRetriever.java
│ │ │ │ ├── utils
│ │ │ │ ├── FieldNameSanitizer.java
│ │ │ │ ├── GsonUtils.java
│ │ │ │ ├── PartitionedTableId.java
│ │ │ │ ├── SinkRecordConverter.java
│ │ │ │ ├── SleepUtils.java
│ │ │ │ ├── TableNameUtils.java
│ │ │ │ └── Time.java
│ │ │ │ └── write
│ │ │ │ ├── RecordBatches.java
│ │ │ │ ├── batch
│ │ │ │ ├── CountDownRunnable.java
│ │ │ │ ├── GcsBatchTableWriter.java
│ │ │ │ ├── KcbqThreadPoolExecutor.java
│ │ │ │ ├── MergeBatches.java
│ │ │ │ ├── TableWriter.java
│ │ │ │ └── TableWriterBuilder.java
│ │ │ │ ├── row
│ │ │ │ ├── AdaptiveBigQueryWriter.java
│ │ │ │ ├── BigQueryWriter.java
│ │ │ │ ├── GcsToBqWriter.java
│ │ │ │ ├── SimpleBigQueryWriter.java
│ │ │ │ └── UpsertDeleteBigQueryWriter.java
│ │ │ │ └── storage
│ │ │ │ ├── ApplicationStream.java
│ │ │ │ ├── ConvertedRecord.java
│ │ │ │ ├── JsonStreamWriterFactory.java
│ │ │ │ ├── StorageApiBatchModeHandler.java
│ │ │ │ ├── StorageWriteApiBase.java
│ │ │ │ ├── StorageWriteApiBatchApplicationStream.java
│ │ │ │ ├── StorageWriteApiDefaultStream.java
│ │ │ │ ├── StorageWriteApiRetryHandler.java
│ │ │ │ ├── StorageWriteApiWriter.java
│ │ │ │ ├── StreamState.java
│ │ │ │ └── StreamWriter.java
│ │ └── io
│ │ │ └── aiven
│ │ │ └── kafka
│ │ │ └── utils
│ │ │ ├── ConfigKeyBuilder.java
│ │ │ ├── ExtendedConfigKey.java
│ │ │ └── VersionInfo.java
│ └── resources
│ │ └── META-INF
│ │ └── services
│ │ └── org.apache.kafka.connect.sink.SinkConnector
│ └── test
│ ├── java
│ └── com
│ │ └── wepay
│ │ └── kafka
│ │ └── connect
│ │ └── bigquery
│ │ ├── BigQuerySinkConnectorTest.java
│ │ ├── BigQuerySinkTaskTest.java
│ │ ├── BigQueryStorageApiBatchSinkTaskTest.java
│ │ ├── BigQueryStorageApiSinkTaskTest.java
│ │ ├── ErrantRecordHandlerTest.java
│ │ ├── GcpClientBuilderProjectTest.java
│ │ ├── GcsToBqLoadRunnableTest.java
│ │ ├── MergeQueriesTest.java
│ │ ├── RecordTableResolverTest.java
│ │ ├── SchemaManagerTest.java
│ │ ├── SinkPropertiesFactory.java
│ │ ├── SinkTaskPropertiesFactory.java
│ │ ├── config
│ │ ├── BigQuerySinkConfigTest.java
│ │ ├── CredentialsValidatorTest.java
│ │ ├── GcsBucketValidatorTest.java
│ │ ├── MultiPropertyValidatorTest.java
│ │ ├── PartitioningModeValidatorTest.java
│ │ ├── PartitioningTypeValidatorTest.java
│ │ └── StorageWriteApiValidatorTest.java
│ │ ├── convert
│ │ ├── BigQueryRecordConverterTest.java
│ │ ├── BigQuerySchemaConverterTest.java
│ │ ├── KafkaDataConverterTest.java
│ │ └── logicaltype
│ │ │ ├── DebeziumLogicalConvertersTest.java
│ │ │ └── KafkaLogicalConvertersTest.java
│ │ ├── exception
│ │ ├── BigQueryErrorResponsesTest.java
│ │ ├── BigQueryStorageWriteApiConnectExceptionTest.java
│ │ └── BigQueryStorageWriteApiErrorResponsesTest.java
│ │ ├── integration
│ │ ├── ApplicationStreamIT.java
│ │ ├── BaseConnectorIT.java
│ │ ├── BigQueryErrantRecordHandlerIT.java
│ │ ├── BigQueryErrorResponsesIT.java
│ │ ├── BigQuerySinkConnectorIT.java
│ │ ├── GcpClientBuilderIT.java
│ │ ├── GcsBatchSchemaEvolutionIT.java
│ │ ├── StorageWriteApiBatchBigQuerySinkConnectorIT.java
│ │ ├── StorageWriteApiBigQuerySinkConnectorIT.java
│ │ ├── TimePartitioningIT.java
│ │ ├── UpsertDeleteBigQuerySinkConnectorIT.java
│ │ ├── UpsertDeleteBigQuerySinkConnectorWithSRIT.java
│ │ ├── VersionTestIT.java
│ │ └── utils
│ │ │ ├── BigQueryTestUtils.java
│ │ │ ├── BucketClearer.java
│ │ │ ├── SchemaRegistryTestUtils.java
│ │ │ ├── TableClearer.java
│ │ │ ├── TestCaseLogger.java
│ │ │ └── TimePartitioningTestUtils.java
│ │ ├── utils
│ │ ├── FieldNameSanitizerTest.java
│ │ ├── MockTime.java
│ │ └── PartitionedTableIdTest.java
│ │ └── write
│ │ ├── batch
│ │ └── GcsBatchTableWriterTest.java
│ │ ├── row
│ │ ├── BigQueryWriterTest.java
│ │ └── GcsToBqWriterTest.java
│ │ └── storage
│ │ ├── BigQueryBuilderTest.java
│ │ ├── BigQueryWriteSettingsBuilderTest.java
│ │ ├── GcsBuilderTest.java
│ │ ├── StorageApiBatchModeHandlerTest.java
│ │ ├── StorageWriteApiBatchApplicationStreamTest.java
│ │ ├── StorageWriteApiDefaultStreamTest.java
│ │ └── StorageWriteApiWriterTest.java
│ └── resources
│ ├── integration_test_cases
│ ├── gcs-load
│ │ ├── data.json
│ │ └── schema.json
│ ├── logical-types
│ │ ├── data.json
│ │ └── schema.json
│ ├── matryoshka-dolls
│ │ ├── data.json
│ │ └── schema.json
│ ├── nulls
│ │ ├── data.json
│ │ └── schema.json
│ └── primitives
│ │ ├── data.json
│ │ └── schema.json
│ └── log4j.properties
├── pom.xml
├── scripts
└── release_detail.sh
└── tools
├── pom.xml
└── src
├── main
├── java
│ ├── com
│ │ └── wepay
│ │ │ └── kafka
│ │ │ └── connect
│ │ │ └── bigquery
│ │ │ └── config
│ │ │ └── BigQueryConfigDefBean.java
│ └── io
│ │ └── aiven
│ │ └── kafka
│ │ └── config
│ │ └── tools
│ │ ├── BaseConfigDefBean.java
│ │ ├── ConfigKeyBean.java
│ │ └── ExtendedConfigKeyBean.java
└── resources
│ └── META-INF
│ └── maven
│ └── site-tools.xml
└── test
└── java
└── io
└── aiven
└── kafka
└── config
└── tools
├── ConfigDefBeanTests.java
└── ExtendedConfigKeyBeanTest.java
/.github/workflows/build_site.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 |
25 | # Simple workflow to build the site and deploy it.
26 | name: Build site and deploy
27 |
28 | on:
29 | # Runs on pushes targeting the default branch
30 | push:
31 | branches: ["main"]
32 |
33 | # Allows you to run this workflow manually from the Actions tab
34 | workflow_dispatch:
35 |
36 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
37 | permissions:
38 | contents: read
39 | pages: write
40 | id-token: write
41 |
42 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
43 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
44 | concurrency:
45 | group: "pages"
46 | cancel-in-progress: false
47 |
48 | jobs:
49 | # Build job
50 | build:
51 | runs-on: ubuntu-latest
52 | steps:
53 | - name: Checkout
54 | uses: actions/checkout@v4
55 |
56 | - name: Build connector
57 | run: mvn install -DskipITs
58 |
59 | - name: Build site tools
60 | run: mvn -f tools
61 |
62 | - name: Build site
63 | run: mvn -f docs
64 |
65 | - name: Upload artifact
66 | uses: actions/upload-pages-artifact@v3
67 | with:
68 | path: ./docs/target/site
69 |
70 | # Deployment job
71 | deploy:
72 | environment:
73 | name: github-pages
74 | url: ${{ steps.deployment.outputs.page_url }}
75 | runs-on: ubuntu-latest
76 | needs: build
77 | steps:
78 | - name: Deploy to GitHub Pages
79 | id: deployment
80 | uses: actions/deploy-pages@v4
81 |
--------------------------------------------------------------------------------
/.github/workflows/create_release.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 | name: Create release
25 |
26 | on:
27 | workflow_dispatch:
28 | inputs:
29 | commit_hash:
30 | description: "Hash of 'Release version x.y.z' commit"
31 | required: true
32 |
33 | permissions:
34 | contents: write
35 | pull-requests: write
36 | issues: write
37 |
38 | jobs:
39 | build:
40 | name: Create Release
41 | runs-on: ubuntu-latest
42 | steps:
43 | - name: Checkout code
44 | uses: actions/checkout@v2
45 | with:
46 | ref: ${{ github.event.inputs.commit_hash }}
47 |
48 | - name: Check commit title and extract version
49 | run: |
50 | export commit_title=$(git log --pretty=format:%s -1 ${{ github.event.inputs.commit_hash }})
51 | echo "Commit title: $commit_title"
52 | if [[ $commit_title =~ ^Release\ version\ [0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta|rc[0-9]+))?$ ]]; then
53 | echo "Valid commit title"
54 | else
55 | echo "Invalid commit title"
56 | exit 1
57 | fi
58 | export version=$(echo ${commit_title} | sed s/^Release\ version\ //g)
59 | echo "Will use version ${version}"
60 | echo "version=${version}" >> $GITHUB_ENV
61 |
62 | - name: Set up JDK 8
63 | uses: actions/setup-java@v4
64 | with:
65 | distribution: 'adopt'
66 | java-version: 17
67 | cache: maven
68 |
69 | - name: Build
70 | run: |
71 | mvn -ntp install -DskipTests
72 | mvn -f kcbq-connector clean package assembly:single@release-artifacts -DskipTests
73 |
74 | export tar_file=$(ls ./kcbq-connector/target/ | grep tar)
75 | export zip_file=$(ls ./kcbq-connector/target/ | grep zip)
76 | echo tar_file=${tar_file} >> $GITHUB_ENV
77 | echo zip_file=${zip_file} >> $GITHUB_ENV
78 |
79 | echo tar_path=`realpath ./kcbq-connector/target/${tar_file}` >> $GITHUB_ENV
80 | echo zip_path=`realpath ./kcbq-connector/target/${zip_file}` >> $GITHUB_ENV
81 |
82 | - name: Create tag
83 | run: |
84 | git config --local user.name "GitHub Action"
85 | git config --local user.email "action@github.com"
86 | git tag -a "v${{ env.version }}" -m "Release version ${{ env.version }}"
87 | git push origin "v${{ env.version }}"
88 |
89 | - name: Create release draft
90 | id: create_release
91 | uses: actions/create-release@v1
92 | env:
93 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
94 | with:
95 | tag_name: "v${{ env.version }}"
96 | release_name: "v${{ env.version }}"
97 | commitish: ${{ github.event.inputs.commit_hash }}
98 | body: |
99 | *Fill in*
100 | draft: true
101 | prerelease: false
102 |
103 | - name: Upload tar
104 | uses: actions/upload-release-asset@v1
105 | env:
106 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
107 | with:
108 | upload_url: ${{ steps.create_release.outputs.upload_url }}
109 | asset_path: ${{ env.tar_path }}
110 | asset_name: ${{ env.tar_file }}
111 | asset_content_type: application/tar
112 |
113 | - name: Upload zip
114 | uses: actions/upload-release-asset@v1
115 | env:
116 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
117 | with:
118 | upload_url: ${{ steps.create_release.outputs.upload_url }}
119 | asset_path: ${{ env.zip_path }}
120 | asset_name: ${{ env.zip_file }}
121 | asset_content_type: application/zip
122 |
--------------------------------------------------------------------------------
/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 | # Workflow to check pull requests and new commits to main branches
25 | # This checks the source in the state as if after the merge.
26 | name: Manual build
27 | on:
28 | workflow_dispatch:
29 | workflow_call:
30 | secrets:
31 | GCP_CREDENTIALS:
32 | KCBQ_TEST_PROJECT:
33 | KCBQ_TEST_DATASET:
34 | KCBQ_TEST_BUCKET:
35 |
36 | permissions:
37 | contents: write
38 | pull-requests: write
39 | issues: write
40 |
41 |
42 | # Disallow concurrent runs for the same PR by cancelling in-progress runs
43 | # when new commits are pushed
44 | concurrency:
45 | group: Manual_Build-${{ github.event.pull_request.number || github.ref }}
46 | cancel-in-progress: true
47 |
48 | jobs:
49 | build:
50 | name: Build
51 | runs-on: ubuntu-latest
52 | steps:
53 | - name: Checkout code
54 | uses: actions/checkout@v2
55 | - name: Dump GitHub context
56 | env:
57 | GITHUB_CONTEXT: ${{ toJson(github) }}
58 | run: echo "$GITHUB_CONTEXT"
59 | - name: Set up JDK 17
60 | uses: actions/setup-java@v4
61 | with:
62 | distribution: 'adopt'
63 | java-version: 17
64 | cache: maven
65 | - name: Integration tests (Maven)
66 | env:
67 | # Necessary for client builder integration tests that run with
68 | # default application credentials
69 | CREDENTIALS_JSON: ${{ secrets.GCP_CREDENTIALS }}
70 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/creds.json
71 | KCBQ_TEST_KEYFILE: /tmp/creds.json
72 | KCBQ_TEST_KEY_SOURCE: FILE
73 | KCBQ_TEST_PROJECT: ${{ secrets.KCBQ_TEST_PROJECT }}
74 | KCBQ_TEST_DATASET: ${{ secrets.KCBQ_TEST_DATASET }}
75 | KCBQ_TEST_BUCKET: ${{ secrets.KCBQ_TEST_BUCKET }}
76 | run: |
77 | echo "$CREDENTIALS_JSON" > /tmp/creds.json
78 | export KCBQ_TEST_TABLE_SUFFIX=_$(date +%s)_$RANDOM
79 | mvn -ntp -P ci -Dskip.unit.tests=true verify
80 | - name: Upload integration test results (Maven)
81 | if: always()
82 | uses: actions/upload-artifact@v4
83 | with:
84 | path: |
85 | **/target/failsafe-reports/*
86 | name: integration-test-results
87 | retention-days: 1
88 |
--------------------------------------------------------------------------------
/.github/workflows/nightly.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 | # Workflow to check pull requests and new commits to main branches
25 | # This checks the source in the state as if after the merge.
26 | name: Nightly Build
27 | on:
28 | workflow_dispatch:
29 | schedule: ## run GMT 1:17 hours
30 | - cron: '17 1 * * *'
31 | workflow_call:
32 | secrets:
33 | GCP_CREDENTIALS:
34 | KCBQ_TEST_PROJECT:
35 | KCBQ_TEST_DATASET:
36 | KCBQ_TEST_BUCKET:
37 |
38 | permissions:
39 | contents: write
40 | pull-requests: write
41 | issues: write
42 |
43 |
44 | # Disallow concurrent runs for the same PR by cancelling in-progress runs
45 | # when new commits are pushed
46 | #concurrency:
47 | # group: Manual_Build-${{ github.event.pull_request.number || github.ref }}
48 | # cancel-in-progress: true
49 |
50 | jobs:
51 | call-workflow-2-in-local-repo:
52 | uses: ./.github/workflows/manual.yml
53 | secrets:
54 | GCP_CREDENTIALS:
55 | KCBQ_TEST_PROJECT:
56 | KCBQ_TEST_DATASET:
57 | KCBQ_TEST_BUCKET:
58 |
--------------------------------------------------------------------------------
/.github/workflows/prs_and_commits.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 | # Workflow to check pull requests and new commits to main branches
25 | # This checks the source in the state as if after the merge.
26 | name: Pull request checks
27 | on:
28 | pull_request:
29 | branches: [ main ]
30 | push:
31 | branches: [ main ]
32 |
33 | # Disallow concurrent runs for the same PR by cancelling in-progress runs
34 | # when new commits are pushed
35 | concurrency:
36 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
37 | cancel-in-progress: true
38 |
39 | jobs:
40 | build:
41 | name: Build
42 | runs-on: ubuntu-latest
43 | steps:
44 | - name: Checkout code
45 | uses: actions/checkout@v2
46 | - name: Set up JDK 17
47 | uses: actions/setup-java@v4
48 | with:
49 | distribution: 'adopt'
50 | java-version: 17
51 | cache: maven
52 | - name: License header check
53 | run: |
54 | mvn -ntp license:remove license:format
55 | if [[ -n $(git status -s) ]]; then
56 | echo 1>&2 'Some files do not have the correct license header:'
57 | git diff --name-only 1>&2
58 | echo 1>&2 'Please update the license headers for these files by running `mvn license:remove license:format`'
59 | exit 1
60 | fi
61 | - name: Build (Maven)
62 | run: mvn -ntp -P ci --batch-mode clean package -DskipTests
63 | - name: Unit tests (Maven)
64 | run: mvn -ntp -P ci --batch-mode test
65 | - name: "Upload build failure reports"
66 | uses: actions/upload-artifact@v4
67 | if: failure()
68 | with:
69 | name: unit-test-results
70 | path: |
71 | **/target/*-reports/**
72 | retention-days: 1
73 |
--------------------------------------------------------------------------------
/.github/workflows/release_pr_workflow.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024 Copyright 2022 Aiven Oy and
3 | # bigquery-connector-for-apache-kafka project contributors
4 | #
5 | # This software contains code derived from the Confluent BigQuery
6 | # Kafka Connector, Copyright Confluent, Inc, which in turn
7 | # contains code derived from the WePay BigQuery Kafka Connector,
8 | # Copyright WePay, Inc.
9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing,
17 | # software distributed under the License is distributed on an
18 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | # KIND, either express or implied. See the License for the
20 | # specific language governing permissions and limitations
21 | # under the License.
22 | #
23 |
24 | # The workflow to create PRs with release commits.
25 | name: Create release PR
26 | on:
27 | workflow_dispatch:
28 | inputs:
29 | release_version:
30 | description: "Release version '0.1.2' (without 'v')"
31 | required: true
32 | snapshot_version:
33 | description: "Snapshot version '0.2.0-SNAPSHOT' (without 'v')"
34 | required: true
35 |
36 | permissions:
37 | contents: write
38 | pull-requests: write
39 | issues: write
40 |
41 | jobs:
42 | create_release_pr:
43 | name: Create release PR (job)
44 | runs-on: ubuntu-latest
45 | steps:
46 | - name: Check versions
47 | run: |
48 | echo "Checking release version..."
49 | if echo ${{ github.event.inputs.release_version }} | '^[0-9]\+\.[0-9]\+\.[0-9]\+\(-\(alpha\|beta\|rc[0-9]\+\)\)\?$' > /dev/null; then
50 | echo "Release version is invalid"
51 | exit 1
52 | fi
53 |
54 | echo "Checking snapshot version..."
55 | if echo ${{ github.event.inputs.snapshot_version }} | grep --invert-match '^[0-9]\+\.[0-9]\+\.[0-9]\+-SNAPSHOT$' > /dev/null; then
56 | echo "Snapshot version is invalid"
57 | exit 1
58 | fi
59 |
60 | - name: Checkout main
61 | uses: actions/checkout@v2
62 | with:
63 | ref: main
64 | fetch-depth: 0
65 |
66 | - name: Set up JDK 8
67 | uses: actions/setup-java@v4
68 | with:
69 | distribution: 'adopt'
70 | java-version: 8
71 | cache: maven
72 |
73 | - name: Create release commits
74 | run: |
75 | git config --local user.name "GitHub Action"
76 | git config --local user.email "action@github.com"
77 | mvn -f tools versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.release_version }} -DskipResolution=true
78 | mvn -f docs versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.release_version }} -DskipResolution=true
79 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.release_version }} versions:set-property -Dproperty=latestRelease
80 | git add pom.xml **/pom.xml
81 | git commit -m "Release version ${{ github.event.inputs.release_version }}"
82 | mvn -f tools versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.snapshot_version }} -DskipResolution=true
83 | mvn -f docs versions:update-parent -DgenerateBackupPoms=false -DparentVersion=${{ github.event.inputs.snapshot_version }} -DskipResolution=true
84 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${{ github.event.inputs.snapshot_version }}
85 | git add pom.xml **/pom.xml
86 | git commit -m "Bump version to ${{ github.event.inputs.snapshot_version }}"
87 |
88 | - name: Create Pull Request
89 | uses: peter-evans/create-pull-request@v3
90 | with:
91 | branch: release-${{ github.event.inputs.release_version }}
92 | delete-branch: true
93 | draft: true
94 | title: Release version ${{ github.event.inputs.release_version }}
95 | body: |
96 | Proposed changelog:
97 | - *fill in*
98 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | build/
3 | target/
4 | *.class
5 | *.jar
6 | *.tar
7 | *.zip
8 |
9 | .gradle
10 | **/.checkstyle
11 |
12 | # Intellij
13 | .idea
14 | *.iml
15 | *.iws
16 | *.ipr
17 | .DS_STORE
18 |
19 | # Eclipse
20 | .classpath
21 | .project
22 | .settings
23 | .metadata
24 |
25 | key.json
26 |
27 | test.conf
28 | kcbq-connector/src/test/resources/test.properties
29 | kcbq-connector/test/docker/connect/properties/
30 | kcbq-connector/out/
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Kafka Connect BigQuery Connector
2 |
3 | This is an implementation of a sink connector from [Apache Kafka](http://kafka.apache.org) to
4 | [Google BigQuery](https://cloud.google.com/bigquery/), built on top
5 | of [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect).
6 |
7 | ## Documentation
8 |
9 | The Kafka Connect BigQuery Connector documentation is available online at https://aiven-open.github.io/bigquery-connector-for-apache-kafka/.
10 | The site contains a complete list of the configuration options as well as information about the project.
11 |
12 | ## History
13 |
14 | This connector was [originally developed by WePay](https://github.com/wepay/kafka-connect-bigquery).
15 | In late 2020 the project moved to [Confluent](https://github.com/confluentinc/kafka-connect-bigquery),
16 | with both companies taking on maintenance duties.
17 | In 2024, Aiven created [its own fork](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/)
18 | based off the Confluent project in order to continue maintaining an open source, Apache 2-licensed
19 | version of the connector.
20 |
21 | ## Configuration
22 |
23 | ### Sample
24 |
25 | A simple example connector configuration, that reads records from Kafka with
26 | JSON-encoded values and writes their values to BigQuery:
27 |
28 | ```json
29 | {
30 | "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector",
31 | "topics": "users, clicks, payments",
32 | "tasks.max": "3",
33 | "value.converter": "org.apache.kafka.connect.json.JsonConverter",
34 |
35 | "project": "kafka-ingest-testing",
36 | "defaultDataset": "kcbq-example",
37 | "keyfile": "/tmp/bigquery-credentials.json"
38 | }
39 | ```
40 |
41 | ### Complete docs
42 | See the [configuration documentation](https://aiven-open.github.io/bigquery-connector-for-apache-kafka/configuration.html) for a list of the connector's
43 | configuration properties.
44 |
45 | ## Download
46 |
47 | Download information is available on the [project web site]((https://aiven-open.github.io/bigquery-connector-for-apache-kafka)).
48 |
49 | ## Building from source
50 |
51 | This project uses the Maven build tool.
52 |
53 | To compile the project without running the integration tests execute `mvn package -DskipITs`.
54 |
55 | To build the documentation execute the following steps:
56 |
57 | ```
58 | mvn install -DskipITs
59 | mvn -f tools
60 | mvn -f docs
61 | ```
62 |
63 | Once the documentation is built it can be run by executing `mvn -f docs site:run`.
64 |
65 | ### Integration test setup
66 |
67 | Integration tests require a live BigQuery and Kafka installation. Configuring those components is beyond the scope of this document.
68 |
69 | Once you have the test environment ready, integration specific environment variables must be set.
70 |
71 | #### Local configuration
72 |
73 | - GOOGLE_APPLICATION_CREDENTIALS - the path to a json file that was download when the GCP account key was created.
74 | - KCBQ_TEST_BUCKET - the name of the bucket to use for testing,
75 | - KCBQ_TEST_DATASET - the name of the dataset to use for testing,
76 | - KCBQ_TEST_KEYFILE - same as the GOOGLE_APPLICATION_CREDENTIALS
77 | - KCBQ_TEST_PROJECT - the name of the project to use.
78 |
79 | #### GitHub configuration
80 |
81 | To run the integration tests from a GitHub action the following variables must be set
82 |
83 | - GCP_CREDENTIALS - the contents of a json file that was download when the GCP account key was created.
84 | - KCBQ_TEST_BUCKET - the bucket to use for the tests
85 | - KCBQ_TEST_DATASET - the data set to use for the tests.
86 | - KCBQ_TEST_PROJECT - the project to use for the tests.
87 |
--------------------------------------------------------------------------------
/config/checkstyle/suppressions.xml:
--------------------------------------------------------------------------------
1 |
2 |
26 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/config/copyright/custom-header-styles.xml:
--------------------------------------------------------------------------------
1 |
2 |
26 |
27 |
28 | /*
29 | *
30 | */EOL
31 | (\s|\t)*/\*.*$
32 | .*\*/(\s|\t)*$
33 | false
34 | true
35 | false
36 |
37 |
38 | /*
39 | *
40 | */
41 | #!.*
42 | (\s|\t)*/\*.*
43 | .*\*/(\s|\t)*$
44 | false
45 | true
46 | false
47 |
48 |
--------------------------------------------------------------------------------
/docs/src/site/custom/project-info-reports.properties:
--------------------------------------------------------------------------------
1 |
2 | report.team.contributors.intro = The following additional people have contributed to this project through the way of suggestions, patches or documentation.\
3 |
If you would like to be included in this list please submit a pull request adding your information to the pom.xml and include in the description a link to a previously accepted pull request.
4 |
--------------------------------------------------------------------------------
/docs/src/site/markdown/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 7. Two reviews are required from the maintainers team before merging of new features into main
39 | 8. Before merging, clean up the commit history for the PR. Each commit should be self-contained with an informative message, since each commit will be added to the history for this project.
40 |
41 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
42 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
43 |
44 | ## Developer Certificate of Origin
45 |
46 | All connectors for Apache Kafka in this repository are open source products released under the Apache 2.0 license (see either [the Apache site](https://www.apache.org/licenses/LICENSE-2.0) or the [LICENSE.txt file](LICENSE.txt)). The Apache 2.0 license allows you to freely use, modify, distribute, and sell your own products that include Apache 2.0 licensed software.
47 |
48 | We respect intellectual property rights of others, and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that.
49 |
50 | So we require by making a contribution every contributor certifies that:
51 | ```
52 | The contribution was created in whole or in part by me and I have the right to submit it under the open source license
53 | indicated in the file
54 | ```
55 |
56 | ## Finding contributions to work on
57 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
58 |
59 |
60 | ## Code of Conduct
61 | This project has adopted the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.html).
62 | For more information see the [Code of Conduct FAQ](https://www.contributor-covenant.org/faq/).
63 |
64 |
65 | ## Security issue notifications
66 | If you discover a potential security issue in this project we ask that you report it according to [Security Policy](SECURITY.html). Please do **not** create a public GitHub issue.
67 |
68 | ## Licensing
69 |
70 | See the [LICENSE](LICENSE.html) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
71 |
--------------------------------------------------------------------------------
/docs/src/site/markdown/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | We release patches for security vulnerabilities. Which versions are eligible
6 | receiving such patches depend on the [CVSS](https://www.first.org/cvss/) rating.
7 |
8 | ## Reporting a Vulnerability
9 |
10 | Please report (suspected) security vulnerabilities to our **[bug bounty
11 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from
12 | us within 2 working days. If the issue is confirmed, we will release a patch as
13 | soon as possible depending on impact and complexity.
14 |
15 | ## Qualifying Vulnerabilities
16 |
17 | Any reproducible vulnerability that has a severe effect on the security or
18 | privacy of our users is likely to be in scope for the program.
19 |
20 | We generally **are not** interested in the following issues:
21 | * Social engineering (e.g. phishing, vishing, smishing) attacks
22 | * Brute force, DoS, text injection
23 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.),
24 | email (SPF/DKIM/DMARC records), SSL/TLS configuration.
25 | * Software version disclosure / Banner identification issues / Descriptive
26 | error messages or headers (e.g. stack traces, application or server errors).
27 | * Clickjacking on pages with no sensitive actions
28 | * Theoretical vulnerabilities where you can't demonstrate a significant
29 | security impact with a proof of concept.
30 |
--------------------------------------------------------------------------------
/docs/src/site/markdown/configuration.md.vm:
--------------------------------------------------------------------------------
1 | # BigQuery connector configuration options
2 |
3 | All the configuration options are listed below in alphabetical order.
4 |
5 | #foreach ($configKey in ${extendedConfigDef.configKeys})
6 | #set($parents = ${extendedConfigDef.parents(${configKey.name})})
7 |
8 | ${esc.hash}${esc.hash} ${configKey.displayName}
9 |
10 | #if (${configKey.isDeprecated})
11 | **${configKey.deprecated}**
12 | #end
13 |
14 | - Configuration option: ${configKey.name}
15 |
16 | #if ($stringUtils.isNotEmpty(${configKey.since}))
17 | - Since: ${configKey.since}
18 | #end
19 |
20 | - Default value: ${configKey.getDefaultValue()|"none"}
21 | - Type: $configKey.type
22 | #if (${configKey.validator})
23 | - Valid values: ${configKey.validator|"no restrictions"}
24 | #end
25 | - Importance: $configKey.importance
26 | #if (!$parents.isEmpty)
27 | - Options that influence if or how this option can be used:
28 |
29 | #foreach ($p in $parents)
30 | - ${p.name}
31 |
32 | #end
33 |
34 | #end
35 |
36 | #if (!${configKey.dependents.isEmpty})
37 |
38 | - Options that this option influences:
39 |
40 | #foreach ($p in ${configKey.dependents})
41 | - ${p}
42 |
43 | #end
44 |
45 | #end
46 |
47 | ${extendedConfigDef.markdownEscape(${configKey.documentation})}
48 |
49 |
50 | #end
51 |
--------------------------------------------------------------------------------
/docs/src/site/markdown/designNotes.md.vm:
--------------------------------------------------------------------------------
1 | # Design Notes
2 |
3 | The BigQuery sink connector supports two distinct paths for inserting data into BigQuery. The original BatchLoader path is uses GCS to store intermediate files before writing them to tables in BigQuery. The second path is to use the StorageWriteAPI to stream the data to BigQuery.
4 |
5 | ${esc.hash}${esc.hash} The general flow
6 |
7 | 1. Records come into the connector from Kafka.
8 | 2. They are processed and converted into BigQuery table data.
9 | 3. The table data are written to temporary files in GCS.
10 | 4. The data from the files is written to BigQuery through either:
11 | 1. Batch loading
12 | 2. StorageWriterAPI.
13 |
14 |
15 | ${esc.hash}${esc.hash} Configuration options that are influenced by other options
16 |
17 | #foreach ($configKey in ${extendedConfigDef.configKeys})
18 | #set($parents = ${extendedConfigDef.parents(${configKey.name})})
19 | #if (!$parents.isEmpty)
20 |
21 | ${esc.hash}${esc.hash}${esc.hash} ${configKey.name}
22 |
23 | #foreach ($p in $parents)
24 |
25 | - ${p.name}
26 |
27 | #end
28 |
29 | #end
30 |
31 | #end
32 |
--------------------------------------------------------------------------------
/docs/src/site/markdown/index.md.vm:
--------------------------------------------------------------------------------
1 | # Kafka Connect BigQuery Connector
2 |
3 | This is an implementation of a sink connector from [Apache Kafka](http://kafka.apache.org) to
4 | [Google BigQuery](https://cloud.google.com/bigquery/), built on top
5 | of [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect).
6 |
7 | ${esc.hash}${esc.hash} Download
8 |
9 | The current release is [v${latestRelease}](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/tag/v${latestRelease})
10 |
11 | We provide the following convenience packages
12 |
13 | - Connector + dependencies [tar](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/download/v${latestRelease}/bigquery-connector-for-apache-kafka-${latestRelease}.tar) [zip](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/releases/download/v${latestRelease}/bigquery-connector-for-apache-kafka-${latestRelease}.zip) |
14 | - Source [tar.gz](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/archive/refs/tags/v${latestRelease}.tar.gz) [zip](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/archive/refs/tags/v${latestRelease}.zip)
15 |
16 | See the [release notes](RELEASE_NOTES.html) for information on all releases.
17 |
18 | The Kafka Connect BigQuery Connector is dependent upon or uses the following:
19 |
20 | - [Apache Kafka Connect](https://kafka.apache.org/documentation.html#connect)
21 | - [Apache Kafka](http://kafka.apache.org)
22 | - [Google BigQuery](https://cloud.google.com/bigquery/)
23 |
24 |
25 | ${esc.hash}${esc.hash} History
26 |
27 | This connector was [originally developed by WePay](https://github.com/wepay/kafka-connect-bigquery).
28 | In late 2020 the project moved to [Confluent](https://github.com/confluentinc/kafka-connect-bigquery),
29 | with both companies taking on maintenance duties.
30 | In 2024, [Aiven](https://aiven.io) created [its own fork](https://github.com/Aiven-Open/bigquery-connector-for-apache-kafka/)
31 | based off the Confluent project in order to continue maintaining an open source, Apache 2-licensed
32 | version of the connector.
33 |
34 | ${esc.hash}${esc.hash} Configuration
35 |
36 | ${esc.hash}${esc.hash}${esc.hash} Sample
37 |
38 | An example connector configuration, that reads records from Kafka with
39 | JSON-encoded values and writes their values to BigQuery:
40 |
41 | ```json
42 | {
43 | "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector",
44 | "topics": "users, clicks, payments",
45 | "tasks.max": "3",
46 | "value.converter": "org.apache.kafka.connect.json.JsonConverter",
47 |
48 | "project": "kafka-ingest-testing",
49 | "defaultDataset": "kcbq-example",
50 | "keyfile": "/tmp/bigquery-credentials.json"
51 | }
52 | ```
53 |
54 | ${esc.hash}${esc.hash}${esc.hash} Configuration options documentation
55 |
56 | See the [Configuration options](configuration.html) for a list of the connector's configuration properties.
57 |
58 | ${esc.hash}${esc.hash} Building from source
59 |
60 | This project uses the Maven build tool.
61 |
62 | To compile the project without running the integration tests execute `mvn package -DskipITs`.
63 |
64 | To build the documentation execute the following steps:
65 |
66 | ```
67 | mvn install -DskipITs
68 | mvn -f tools
69 | mvn -f docs
70 | ```
71 |
72 | Once the documentation is built it can be run by executing `mvn -f docs site:run`.
73 |
74 |
75 | ${esc.hash}${esc.hash}${esc.hash} Integration test setup
76 |
77 | Integration tests require a live BigQuery and Kafka installation. Configuring those components is beyond the scope of this document.
78 |
79 | Once you have the test environment ready, integration specific environment variables must be set.
80 |
81 | ${esc.hash}${esc.hash}${esc.hash}${esc.hash} Local configuration
82 |
83 | - GOOGLE_APPLICATION_CREDENTIALS - the path to a json file that was download when the GCP account key was created.
84 | - KCBQ_TEST_BUCKET - the name of the bucket to use for testing,
85 | - KCBQ_TEST_DATASET - the name of the dataset to use for testing,
86 | - KCBQ_TEST_KEYFILE - same as the GOOGLE_APPLICATION_CREDENTIALS
87 | - KCBQ_TEST_PROJECT - the name of the project to use.
88 |
89 | ${esc.hash}${esc.hash}${esc.hash}${esc.hash} GitHub configuration
90 |
91 | To run the integration tests from a GitHub action the following variables must be set
92 |
93 | - GCP_CREDENTIALS - the contents of a json file that was download when the GCP account key was created.
94 | - KCBQ_TEST_BUCKET - the bucket to use for the tests
95 | - KCBQ_TEST_DATASET - the data set to use for the tests.
96 | - KCBQ_TEST_PROJECT - the project to use for the tests.
97 |
--------------------------------------------------------------------------------
/docs/src/site/site.xml:
--------------------------------------------------------------------------------
1 |
2 |
26 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
50 |
51 |
58 |
59 |
68 |
69 |
72 |
73 |
74 | org.apache.maven.skins
75 | maven-fluido-skin
76 | 2.0.1
77 |
78 |
79 |
80 | false
81 | true
82 | true
83 |
84 | aiven-open/bigquery-connector-for-apache-kafka
85 | right
86 | orange
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/kcbq-api/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
26 |
29 | 4.0.0
30 |
31 |
32 | com.wepay.kcbq
33 | kcbq-parent
34 | 2.11.0-SNAPSHOT
35 | ..
36 |
37 |
38 | kcbq-api
39 | kafka-connect-bigquery-api
40 |
41 |
42 | ${project.parent.basedir}
43 |
44 |
45 |
46 |
47 | org.apache.kafka
48 | connect-api
49 |
50 |
51 |
52 |
53 |
54 |
55 | org.apache.maven.plugins
56 | maven-compiler-plugin
57 |
58 |
59 | org.apache.maven.plugins
60 | maven-checkstyle-plugin
61 |
62 |
63 | org.apache.maven.plugins
64 | maven-jar-plugin
65 |
66 |
67 | org.apache.maven.plugins
68 | maven-site-plugin
69 |
70 | true
71 | false
72 | false
73 |
74 |
75 |
76 | org.apache.maven.plugins
77 | maven-project-info-reports-plugin
78 |
79 | true
80 |
81 |
82 |
83 | org.apache.maven.plugins
84 | maven-surefire-plugin
85 |
86 | true
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/KafkaSchemaRecordType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.api;
25 |
26 |
27 | /**
28 | * Enum class for Kafka schema or record type, either value or key.
29 | */
30 | public enum KafkaSchemaRecordType {
31 |
32 | VALUE("value"),
33 | KEY("key");
34 |
35 | private final String str;
36 |
37 | KafkaSchemaRecordType(String str) {
38 | this.str = str;
39 | }
40 |
41 | public String toString() {
42 | return this.str;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/kcbq-api/src/main/java/com/wepay/kafka/connect/bigquery/api/SchemaRetriever.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.api;
25 |
26 | import java.util.Map;
27 | import org.apache.kafka.connect.data.Schema;
28 | import org.apache.kafka.connect.sink.SinkRecord;
29 |
30 | /**
31 | * Interface for retrieving the most up-to-date schemas for a given Sink Record. Used in
32 | * automatic table creation and schema updates.
33 | */
34 | public interface SchemaRetriever {
35 | /**
36 | * Called with all of the configuration settings passed to the connector via its
37 | * {@link org.apache.kafka.connect.sink.SinkConnector#start(Map)} method.
38 | *
39 | * @param properties The configuration settings of the connector.
40 | */
41 | void configure(Map properties);
42 |
43 | /**
44 | * Retrieve the most current key schema for the given sink record.
45 | *
46 | * @param record The record to retrieve a key schema for.
47 | * @return The key Schema for the given record.
48 | */
49 | Schema retrieveKeySchema(SinkRecord record);
50 |
51 | /**
52 | * Retrieve the most current value schema for the given sink record.
53 | *
54 | * @param record The record to retrieve a value schema for.
55 | * @return The value Schema for the given record.
56 | */
57 | Schema retrieveValueSchema(SinkRecord record);
58 | }
59 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/assembly/release-tar.xml:
--------------------------------------------------------------------------------
1 |
25 |
28 | release-tar
29 |
30 | tar
31 |
32 | false
33 |
34 |
35 | /
36 | true
37 | false
38 | runtime
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/assembly/release-zip.xml:
--------------------------------------------------------------------------------
1 |
25 |
28 | release-zip
29 |
30 | zip
31 |
32 | false
33 |
34 |
35 | /
36 | true
37 | false
38 | runtime
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/ErrantRecordHandler.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery;
25 |
26 | import com.google.cloud.bigquery.BigQueryError;
27 | import java.util.Arrays;
28 | import java.util.List;
29 | import java.util.Map;
30 | import java.util.Set;
31 | import org.apache.kafka.connect.sink.ErrantRecordReporter;
32 | import org.apache.kafka.connect.sink.SinkRecord;
33 | import org.slf4j.Logger;
34 | import org.slf4j.LoggerFactory;
35 |
36 | public class ErrantRecordHandler {
37 | private static final Logger logger = LoggerFactory.getLogger(ErrantRecordHandler.class);
38 | private static final List allowedBigQueryErrorReason = Arrays.asList("invalid");
39 | private final ErrantRecordReporter errantRecordReporter;
40 |
41 | public ErrantRecordHandler(ErrantRecordReporter errantRecordReporter) {
42 | this.errantRecordReporter = errantRecordReporter;
43 | }
44 |
45 | public void reportErrantRecords(Set records, Exception e) {
46 | if (errantRecordReporter != null) {
47 | logger.debug("Sending {} records to DLQ", records.size());
48 | for (SinkRecord r : records) {
49 | // Reporting records in async mode
50 | errantRecordReporter.report(r, e);
51 | }
52 | } else {
53 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null");
54 | }
55 | }
56 |
57 | public void reportErrantRecords(Map rowToError) {
58 | if (errantRecordReporter != null) {
59 | logger.debug("Sending {} records to DLQ", rowToError.size());
60 | for (Map.Entry rowToErrorEntry : rowToError.entrySet()) {
61 | // Reporting records in async mode
62 | errantRecordReporter.report(rowToErrorEntry.getKey(), rowToErrorEntry.getValue());
63 | }
64 | } else {
65 | logger.warn("Cannot send Records to DLQ as ErrantRecordReporter is null");
66 | }
67 | }
68 |
69 | public ErrantRecordReporter getErrantRecordReporter() {
70 | return errantRecordReporter;
71 | }
72 |
73 | public boolean isErrorReasonAllowed(List bqErrorList) {
74 | for (BigQueryError bqError : bqErrorList) {
75 | boolean errorMatch = false;
76 | String bqErrorReason = bqError.getReason();
77 | for (String allowedBqErrorReason : allowedBigQueryErrorReason) {
78 | if (bqErrorReason.equalsIgnoreCase(allowedBqErrorReason)) {
79 | errorMatch = true;
80 | break;
81 | }
82 | }
83 | if (!errorMatch) {
84 | return false;
85 | }
86 | }
87 | return true;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkTaskConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import java.util.Map;
27 | import org.apache.kafka.common.config.ConfigDef;
28 |
29 | /**
30 | * Class for task-specific configuration properties.
31 | */
32 | public class BigQuerySinkTaskConfig extends BigQuerySinkConfig {
33 |
34 | public static final String GCS_BQ_TASK_CONFIG = "GCSBQTask";
35 | public static final String TASK_ID_CONFIG = "taskId";
36 | public static final ConfigDef.Importance TASK_ID_IMPORTANCE = ConfigDef.Importance.LOW;
37 | private static final ConfigDef.Type GCS_BQ_TASK_TYPE = ConfigDef.Type.BOOLEAN;
38 | private static final boolean GCS_BQ_TASK_DEFAULT = false;
39 | private static final ConfigDef.Importance GCS_BQ_TASK_IMPORTANCE = ConfigDef.Importance.LOW;
40 | private static final ConfigDef.Type TASK_ID_TYPE = ConfigDef.Type.INT;
41 |
42 | /**
43 | * @param properties A Map detailing configuration properties and their respective values.
44 | */
45 | public BigQuerySinkTaskConfig(Map properties) {
46 | super(config(), properties);
47 | }
48 |
49 | /**
50 | * Return a ConfigDef object used to define this config's fields.
51 | *
52 | * @return A ConfigDef object used to define this config's fields.
53 | */
54 | public static ConfigDef config() {
55 | return BigQuerySinkConfig.getConfig()
56 | .defineInternal(
57 | GCS_BQ_TASK_CONFIG,
58 | GCS_BQ_TASK_TYPE,
59 | GCS_BQ_TASK_DEFAULT,
60 | GCS_BQ_TASK_IMPORTANCE
61 | ).defineInternal(
62 | TASK_ID_CONFIG,
63 | TASK_ID_TYPE,
64 | ConfigDef.NO_DEFAULT_VALUE,
65 | TASK_ID_IMPORTANCE
66 | );
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/GcsBucketValidator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.AUTO_CREATE_BUCKET_CONFIG;
27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.ENABLE_BATCH_CONFIG;
28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.GCS_BUCKET_NAME_CONFIG;
29 |
30 | import com.google.cloud.storage.Bucket;
31 | import com.google.cloud.storage.Storage;
32 | import com.google.common.annotations.VisibleForTesting;
33 | import com.wepay.kafka.connect.bigquery.GcpClientBuilder;
34 | import java.util.Arrays;
35 | import java.util.Collection;
36 | import java.util.Collections;
37 | import java.util.List;
38 | import java.util.Optional;
39 |
40 | public class GcsBucketValidator extends MultiPropertyValidator {
41 |
42 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList(
43 | ENABLE_BATCH_CONFIG, AUTO_CREATE_BUCKET_CONFIG
44 | ));
45 |
46 | public GcsBucketValidator() {
47 | super(GCS_BUCKET_NAME_CONFIG);
48 | }
49 |
50 | @Override
51 | protected Collection dependents() {
52 | return DEPENDENTS;
53 | }
54 |
55 | @Override
56 | protected Optional doValidate(BigQuerySinkConfig config) {
57 | Storage gcs;
58 | try {
59 | gcs = new GcpClientBuilder.GcsBuilder()
60 | .withConfig(config)
61 | .build();
62 | } catch (RuntimeException e) {
63 | return Optional.of(String.format(
64 | "Failed to construct GCS client%s",
65 | e.getMessage() != null ? ": " + e.getMessage() : ""
66 | ));
67 | }
68 | return doValidate(gcs, config);
69 | }
70 |
71 | @VisibleForTesting
72 | Optional doValidate(Storage gcs, BigQuerySinkConfig config) {
73 | List batchLoadedTopics = config.getList(ENABLE_BATCH_CONFIG);
74 | if (batchLoadedTopics == null || batchLoadedTopics.isEmpty()) {
75 | // Batch loading is disabled; no need to validate the GCS bucket
76 | return Optional.empty();
77 | }
78 |
79 | String bucketName = config.getString(GCS_BUCKET_NAME_CONFIG);
80 | if (bucketName == null || bucketName.trim().isEmpty()) {
81 | return Optional.of("When GCS batch loading is enabled, a bucket must be provided");
82 | }
83 |
84 | if (config.getBoolean(AUTO_CREATE_BUCKET_CONFIG)) {
85 | return Optional.empty();
86 | }
87 |
88 | Bucket bucket = gcs.get(bucketName);
89 | if (bucket == null) {
90 | return Optional.of(String.format(
91 | "Automatic bucket creation is disabled but the GCS bucket %s does not exist. "
92 | + "Please either manually create this table before restarting the connector or enable automatic bucket creation "
93 | + "by the connector",
94 | bucketName
95 | ));
96 | }
97 |
98 | return Optional.empty();
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/MultiPropertyValidator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import java.util.Collection;
27 | import java.util.List;
28 | import java.util.Map;
29 | import java.util.Objects;
30 | import java.util.Optional;
31 | import org.apache.kafka.common.config.ConfigValue;
32 |
33 | public abstract class MultiPropertyValidator {
34 |
35 | private final String propertyName;
36 |
37 | protected MultiPropertyValidator(String propertyName) {
38 | this.propertyName = propertyName;
39 | }
40 |
41 | public String propertyName() {
42 | return propertyName;
43 | }
44 |
45 | public Optional validate(ConfigValue value, ConfigT config, Map valuesByName) {
46 | // Only perform follow-up validation if the property doesn't already have an error associated with it
47 | if (!value.errorMessages().isEmpty()) {
48 | return Optional.empty();
49 | }
50 |
51 | boolean dependentsAreValid = dependents().stream()
52 | .map(valuesByName::get)
53 | .filter(Objects::nonNull)
54 | .map(ConfigValue::errorMessages)
55 | .allMatch(List::isEmpty);
56 | // Also ensure that all of the other properties that the validation for this one depends on don't already have errors
57 | if (!dependentsAreValid) {
58 | return Optional.empty();
59 | }
60 |
61 | try {
62 | return doValidate(config);
63 | } catch (RuntimeException e) {
64 | return Optional.of(
65 | "An unexpected error occurred during validation"
66 | + (e.getMessage() != null ? ": " + e.getMessage() : "")
67 | );
68 | }
69 | }
70 |
71 | protected abstract Collection dependents();
72 |
73 | protected abstract Optional doValidate(ConfigT config);
74 | }
75 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningModeValidator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG;
27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG;
28 |
29 | import java.util.Arrays;
30 | import java.util.Collection;
31 | import java.util.Collections;
32 | import java.util.Optional;
33 |
34 | public class PartitioningModeValidator extends MultiPropertyValidator {
35 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList(
36 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG
37 | ));
38 |
39 | public PartitioningModeValidator() {
40 | super(BIGQUERY_PARTITION_DECORATOR_CONFIG);
41 | }
42 |
43 | @Override
44 | protected Collection dependents() {
45 | return DEPENDENTS;
46 | }
47 |
48 | @Override
49 | protected Optional doValidate(BigQuerySinkConfig config) {
50 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG)) {
51 | return Optional.empty();
52 | }
53 |
54 | if (config.getTimestampPartitionFieldName().isPresent()) {
55 | return Optional.of(String.format("Only one partitioning mode may be specified for the connector. "
56 | + "Use either %s OR %s.",
57 | BIGQUERY_PARTITION_DECORATOR_CONFIG,
58 | BIGQUERY_TIMESTAMP_PARTITION_FIELD_NAME_CONFIG
59 | ));
60 | } else {
61 | return Optional.empty();
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/PartitioningTypeValidator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.BIGQUERY_PARTITION_DECORATOR_CONFIG;
27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TABLE_CREATE_CONFIG;
28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.TIME_PARTITIONING_TYPE_CONFIG;
29 |
30 | import com.google.cloud.bigquery.TimePartitioning;
31 | import java.util.Arrays;
32 | import java.util.Collection;
33 | import java.util.Collections;
34 | import java.util.Optional;
35 |
36 | public class PartitioningTypeValidator extends MultiPropertyValidator {
37 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList(
38 | BIGQUERY_PARTITION_DECORATOR_CONFIG, TABLE_CREATE_CONFIG
39 | ));
40 |
41 | public PartitioningTypeValidator() {
42 | super(TIME_PARTITIONING_TYPE_CONFIG);
43 | }
44 |
45 | @Override
46 | protected Collection dependents() {
47 | return DEPENDENTS;
48 | }
49 |
50 | @Override
51 | protected Optional doValidate(BigQuerySinkConfig config) {
52 | if (!config.getBoolean(BIGQUERY_PARTITION_DECORATOR_CONFIG) || !config.getBoolean(TABLE_CREATE_CONFIG)) {
53 | return Optional.empty();
54 | }
55 |
56 | Optional timePartitioningType = config.getTimePartitioningType();
57 |
58 | if (!Optional.of(TimePartitioning.Type.DAY).equals(timePartitioningType)) {
59 | return Optional.of(
60 | "Tables must be partitioned by DAY when using partition decorator syntax. "
61 | + "Either configure the connector with the DAY time partitioning type, "
62 | + "disable automatic table creation, or disable partition decorator syntax."
63 | );
64 | }
65 |
66 | return Optional.empty();
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/UpsertDeleteValidator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.config;
25 |
26 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.DELETE_ENABLED_CONFIG;
27 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.KAFKA_KEY_FIELD_NAME_CONFIG;
28 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_INTERVAL_MS_CONFIG;
29 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.MERGE_RECORDS_THRESHOLD_CONFIG;
30 | import static com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig.UPSERT_ENABLED_CONFIG;
31 |
32 | import java.util.Arrays;
33 | import java.util.Collection;
34 | import java.util.Collections;
35 | import java.util.Optional;
36 | import org.slf4j.Logger;
37 | import org.slf4j.LoggerFactory;
38 |
39 | public abstract class UpsertDeleteValidator extends MultiPropertyValidator {
40 | private static final Collection DEPENDENTS = Collections.unmodifiableCollection(Arrays.asList(
41 | MERGE_INTERVAL_MS_CONFIG, MERGE_RECORDS_THRESHOLD_CONFIG, KAFKA_KEY_FIELD_NAME_CONFIG
42 | ));
43 | private static final Logger logger = LoggerFactory.getLogger(UpsertDeleteValidator.class);
44 |
45 | private UpsertDeleteValidator(String propertyName) {
46 | super(propertyName);
47 | }
48 |
49 | @Override
50 | protected Collection dependents() {
51 | return DEPENDENTS;
52 | }
53 |
54 | @Override
55 | protected Optional doValidate(BigQuerySinkConfig config) {
56 | if (!modeEnabled(config)) {
57 | return Optional.empty();
58 | }
59 |
60 | long mergeInterval = config.getLong(MERGE_INTERVAL_MS_CONFIG);
61 | long mergeRecordsThreshold = config.getLong(MERGE_RECORDS_THRESHOLD_CONFIG);
62 |
63 | if (mergeInterval == -1 && mergeRecordsThreshold == -1) {
64 | return Optional.of(String.format(
65 | "%s and %s cannot both be -1",
66 | MERGE_INTERVAL_MS_CONFIG,
67 | MERGE_RECORDS_THRESHOLD_CONFIG
68 | ));
69 | }
70 |
71 | if (mergeInterval != -1 && mergeInterval < 10_000L) {
72 | logger.warn(String.format(
73 | "%s should not be set to less than 10 seconds. A validation would be introduced in a future release to "
74 | + "this effect.",
75 | MERGE_INTERVAL_MS_CONFIG
76 | ));
77 | }
78 |
79 | if (!config.getKafkaKeyFieldName().isPresent()) {
80 | return Optional.of(String.format(
81 | "%s must be specified when %s is set to true",
82 | KAFKA_KEY_FIELD_NAME_CONFIG,
83 | propertyName()
84 | ));
85 | }
86 |
87 | return Optional.empty();
88 | }
89 |
90 | /**
91 | * @param config the user-provided configuration
92 | * @return whether the write mode for the validator (i.e., either upsert or delete) is enabled
93 | */
94 | protected abstract boolean modeEnabled(BigQuerySinkConfig config);
95 |
96 | public static class UpsertValidator extends UpsertDeleteValidator {
97 | public UpsertValidator() {
98 | super(UPSERT_ENABLED_CONFIG);
99 | }
100 |
101 | @Override
102 | protected boolean modeEnabled(BigQuerySinkConfig config) {
103 | return config.getBoolean(UPSERT_ENABLED_CONFIG);
104 | }
105 | }
106 |
107 | public static class DeleteValidator extends UpsertDeleteValidator {
108 | public DeleteValidator() {
109 | super(DELETE_ENABLED_CONFIG);
110 | }
111 |
112 | @Override
113 | protected boolean modeEnabled(BigQuerySinkConfig config) {
114 | return config.getBoolean(DELETE_ENABLED_CONFIG);
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/RecordConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.convert;
25 |
26 | import com.wepay.kafka.connect.bigquery.api.KafkaSchemaRecordType;
27 | import org.apache.kafka.connect.sink.SinkRecord;
28 |
29 | /**
30 | * Interface for converting from a {@link SinkRecord} to some other kind of record.
31 | *
32 | * @param The type of record to convert to.
33 | */
34 | public interface RecordConverter {
35 | /**
36 | * @param record The record to convert.
37 | * @param recordType The type of the record to convert, either value or key.
38 | * @return The converted record.
39 | */
40 | R convertRecord(SinkRecord record, KafkaSchemaRecordType recordType);
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/SchemaConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.convert;
25 |
26 | import org.apache.kafka.connect.data.Schema;
27 |
28 | /**
29 | * Interface for converting from a {@link Schema Kafka Connect Schema} to some other kind of schema.
30 | *
31 | * @param The kind of schema to convert to.
32 | */
33 | public interface SchemaConverter {
34 | /**
35 | * @param schema The schema to convert.
36 | * @return The converted schema.
37 | */
38 | S convertSchema(Schema schema);
39 | }
40 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/convert/logicaltype/LogicalConverterRegistry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.convert.logicaltype;
25 |
26 | import java.util.Map;
27 | import java.util.concurrent.ConcurrentHashMap;
28 |
29 | /**
30 | * Registry for finding and accessing {@link LogicalTypeConverter}s.
31 | */
32 | public class LogicalConverterRegistry {
33 |
34 | private static Map converterMap = new ConcurrentHashMap<>();
35 |
36 | /**
37 | * Registers the logical type name. Will override existing value if any.
38 | *
39 | * @param logicalTypeName the logical type name to register.
40 | * @param converter the converter for the name. May not be {@code null}.
41 | */
42 | public static void register(String logicalTypeName, LogicalTypeConverter converter) {
43 | converterMap.put(logicalTypeName, converter);
44 | }
45 |
46 | /**
47 | * Registers the logical type name if it was not previously registered.
48 | *
49 | * @param logicalTypeName the logical type name to register.
50 | * @param converter the converter for the name. May not be {@code null}.
51 | */
52 | public static void registerIfAbsent(String logicalTypeName, LogicalTypeConverter converter) {
53 | converterMap.putIfAbsent(logicalTypeName, converter);
54 | }
55 |
56 | /**
57 | * Unregisters (removes) the logical type name if it was previously registered. After an {@code unregister} call
58 | * the result of {@link #isRegisteredLogicalType(String)} is guaranteed to be false.
59 | *
60 | * @param logicalTypeName the logical type name to unregister.
61 | */
62 | public static void unregister(String logicalTypeName) {
63 | if (logicalTypeName != null) {
64 | converterMap.remove(logicalTypeName);
65 | }
66 | }
67 |
68 | /**
69 | * Gets the converter registered with the logical type name.
70 | *
71 | * @param logicalTypeName the logical type name. May be {@code null}.
72 | * @return the LogicalTypeConverter or {@code null} if none is registered or {@code null} passed for {@code logicalTypeName}.
73 | */
74 | public static LogicalTypeConverter getConverter(String logicalTypeName) {
75 | return logicalTypeName == null ? null : converterMap.get(logicalTypeName);
76 | }
77 |
78 | /**
79 | * Determines if a converter is registered with the logical type name.
80 | *
81 | * @param typeName the logical type name.
82 | * @return }{@code true} if there is a converter registered, {@code false} otherwise.
83 | */
84 | public static boolean isRegisteredLogicalType(String typeName) {
85 | return typeName != null && converterMap.containsKey(typeName);
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryConnectException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.exception;
25 |
26 | import com.google.cloud.bigquery.BigQueryError;
27 | import java.util.List;
28 | import java.util.Map;
29 | import org.apache.kafka.connect.errors.ConnectException;
30 |
31 | /**
32 | * Class for exceptions that occur while interacting with BigQuery, such as login failures, schema
33 | * update failures, and table insertion failures.
34 | */
35 | public class BigQueryConnectException extends ConnectException {
36 | public BigQueryConnectException(String msg) {
37 | super(msg);
38 | }
39 |
40 | public BigQueryConnectException(String msg, Throwable thr) {
41 | super(msg, thr);
42 | }
43 |
44 | public BigQueryConnectException(Throwable thr) {
45 | super(thr);
46 | }
47 |
48 | public BigQueryConnectException(String tableInfo, Map> errors) {
49 | super(formatInsertAllErrors(tableInfo, errors));
50 | }
51 |
52 | private static String formatInsertAllErrors(String tableInfo, Map> errorsMap) {
53 | StringBuilder messageBuilder = new StringBuilder();
54 | messageBuilder.append(String.format("table: %s insertion failed for the following rows:", tableInfo));
55 | for (Map.Entry> errorsEntry : errorsMap.entrySet()) {
56 | for (BigQueryError error : errorsEntry.getValue()) {
57 | messageBuilder.append(String.format(
58 | "%n\t[row index %d] (location %s, reason: %s): %s",
59 | errorsEntry.getKey(),
60 | error.getLocation(),
61 | error.getReason(),
62 | error.getMessage()
63 | ));
64 | }
65 | }
66 | return messageBuilder.toString();
67 | }
68 |
69 | @Override
70 | public String toString() {
71 | return getCause() != null
72 | ? super.toString() + "\nCaused by: " + getCause().getLocalizedMessage()
73 | : super.toString();
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/BigQueryStorageWriteApiConnectException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.exception;
25 |
26 |
27 | import com.google.cloud.bigquery.storage.v1.RowError;
28 | import java.util.List;
29 | import java.util.Map;
30 | import org.apache.kafka.connect.errors.ConnectException;
31 |
32 | /**
33 | * Exception Class for exceptions that occur while interacting with BigQuery Storage Write API, such as login failures, schema
34 | * update failures, and table insertion failures.
35 | */
36 | public class BigQueryStorageWriteApiConnectException extends ConnectException {
37 |
38 | public BigQueryStorageWriteApiConnectException(String message) {
39 | super(message);
40 | }
41 |
42 | public BigQueryStorageWriteApiConnectException(String message, Throwable error) {
43 | super(message, error);
44 | }
45 |
46 | public BigQueryStorageWriteApiConnectException(String tableName, List errors) {
47 | super(formatRowErrors(tableName, errors));
48 | }
49 |
50 | public BigQueryStorageWriteApiConnectException(String tableName, Map errors) {
51 | super(formatRowErrors(tableName, errors));
52 | }
53 |
54 | private static String formatRowErrors(String tableName, List errors) {
55 | StringBuilder builder = new StringBuilder();
56 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName));
57 | for (RowError error : errors) {
58 | builder.append(String.format(
59 | "\n [row index %d] (Failure reason : %s) ",
60 | error.getIndex(),
61 | error.getMessage())
62 | );
63 | }
64 | return builder.toString();
65 | }
66 |
67 | private static String formatRowErrors(String tableName, Map errors) {
68 | StringBuilder builder = new StringBuilder();
69 | builder.append(String.format("Insertion failed at table %s for following rows: ", tableName));
70 | for (Map.Entry error : errors.entrySet()) {
71 | builder.append(String.format(
72 | "\n [row index %d] (Failure reason : %s) ",
73 | error.getKey(),
74 | error.getValue()
75 | ));
76 | }
77 | return builder.toString();
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ConversionConnectException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.exception;
25 |
26 | import org.apache.kafka.connect.errors.ConnectException;
27 |
28 | /**
29 | * Class for exceptions that occur while converting between Kafka Connect and BigQuery schemas and
30 | * records.
31 | */
32 | public class ConversionConnectException extends ConnectException {
33 | public ConversionConnectException(String msg) {
34 | super(msg);
35 | }
36 |
37 | public ConversionConnectException(String msg, Throwable thr) {
38 | super(msg, thr);
39 | }
40 |
41 | public ConversionConnectException(Throwable thr) {
42 | super(thr);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/ExpectedInterruptException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.exception;
25 |
26 | import org.apache.kafka.connect.errors.ConnectException;
27 |
28 | public class ExpectedInterruptException extends ConnectException {
29 |
30 | public ExpectedInterruptException(String message) {
31 | super(message);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/exception/GcsConnectException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.exception;
25 |
26 | import org.apache.kafka.connect.errors.ConnectException;
27 |
28 | /**
29 | * Class for exceptions that occur while interacting with Google Cloud Storage, such as login
30 | * failures.
31 | */
32 | public class GcsConnectException extends ConnectException {
33 | public GcsConnectException(String msg) {
34 | super(msg);
35 | }
36 |
37 | public GcsConnectException(String msg, Throwable thr) {
38 | super(msg, thr);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/retrieve/IdentitySchemaRetriever.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.retrieve;
25 |
26 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever;
27 | import java.util.Map;
28 | import org.apache.kafka.connect.data.Schema;
29 | import org.apache.kafka.connect.sink.SinkRecord;
30 |
31 | /**
32 | * Fetches the key Schema and value Schema from a Sink Record
33 | */
34 | public class IdentitySchemaRetriever implements SchemaRetriever {
35 |
36 | @Override
37 | public void configure(Map properties) {
38 | }
39 |
40 | @Override
41 | public Schema retrieveKeySchema(SinkRecord record) {
42 | return record.keySchema();
43 | }
44 |
45 | @Override
46 | public Schema retrieveValueSchema(SinkRecord record) {
47 | return record.valueSchema();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/FieldNameSanitizer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.utils;
25 |
26 | import java.util.HashMap;
27 | import java.util.Map;
28 |
29 | public class FieldNameSanitizer {
30 |
31 | // Replace all non-letter, non-digit characters with underscore. Append underscore in front of
32 | // name if it does not begin with alphabet or underscore.
33 | public static String sanitizeName(String name) {
34 | String sanitizedName = name.replaceAll("[^a-zA-Z0-9_]", "_");
35 | if (sanitizedName.matches("^[^a-zA-Z_].*")) {
36 | sanitizedName = "_" + sanitizedName;
37 | }
38 | return sanitizedName;
39 | }
40 |
41 |
42 | // Big Query specifies field name must begin with a alphabet or underscore and can only contain
43 | // letters, numbers, and underscores.
44 | // Note: a.b and a/b will have the same value after sanitization which will cause Duplicate key
45 | // Exception.
46 | @SuppressWarnings("unchecked")
47 | public static Map replaceInvalidKeys(Map map) {
48 | Map result = new HashMap<>();
49 | map.forEach((key, value) -> {
50 | String sanitizedKey = sanitizeName(key);
51 | if (value instanceof Map) {
52 | result.put(sanitizedKey, replaceInvalidKeys((Map) value));
53 | } else {
54 | result.put(sanitizedKey, value);
55 | }
56 | });
57 | return result;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/GsonUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.utils;
25 |
26 | import com.google.gson.Gson;
27 | import com.google.gson.GsonBuilder;
28 | import com.google.gson.TypeAdapter;
29 | import com.google.gson.stream.JsonReader;
30 | import com.google.gson.stream.JsonToken;
31 | import com.google.gson.stream.JsonWriter;
32 | import java.io.IOException;
33 | import java.nio.ByteBuffer;
34 | import java.util.Base64;
35 |
36 | /**
37 | * Gson utilities for safe JSON handling on Java 9+.
38 | *
39 | *
Exposes a preconfigured {@link Gson} instance that registers a hierarchy adapter for {@link ByteBuffer}
40 | * , ensuring serialization does not rely on illegal reflection into JDK internals (e.g.,
41 | * ByteBuffer#hb) which would otherwise throw {@code InaccessibleObjectException} on Java 9+.
42 | */
43 | public final class GsonUtils {
44 |
45 | /** A ready-to-use Gson that safely serializes ByteBuffer (as Base64 strings). */
46 | public static final Gson SAFE_GSON =
47 | new GsonBuilder()
48 | // Use hierarchy adapter so HeapByteBuffer/DirectByteBuffer subclasses are covered.
49 | .registerTypeHierarchyAdapter(ByteBuffer.class, new ByteBufferTypeAdapter())
50 | .create();
51 |
52 | private GsonUtils() {
53 | // no instances
54 | }
55 |
56 | /**
57 | * Serializes {@link ByteBuffer} values as Base64 strings and deserializes them back. Registered
58 | * as a hierarchy adapter so it handles all ByteBuffer subclasses.
59 | */
60 | static final class ByteBufferTypeAdapter extends TypeAdapter {
61 |
62 | @Override
63 | public void write(JsonWriter out, ByteBuffer value) throws IOException {
64 | if (value == null) {
65 | out.nullValue();
66 | return;
67 | }
68 | // Duplicate to avoid mutating the original buffer's position/limit.
69 | ByteBuffer dup = value.duplicate();
70 | byte[] bytes = new byte[dup.remaining()];
71 | dup.get(bytes);
72 | out.value(Base64.getEncoder().encodeToString(bytes));
73 | }
74 |
75 | @Override
76 | public ByteBuffer read(JsonReader in) throws IOException {
77 | if (in.peek() == JsonToken.NULL) {
78 | in.nextNull();
79 | return null;
80 | }
81 | byte[] bytes = Base64.getDecoder().decode(in.nextString());
82 | return ByteBuffer.wrap(bytes);
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/SleepUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.utils;
25 |
26 | import java.util.concurrent.ThreadLocalRandom;
27 |
28 | public final class SleepUtils {
29 |
30 | public static void waitRandomTime(Time time, long sleepMs, long jitterMs) throws InterruptedException {
31 | time.sleep(sleepMs + ThreadLocalRandom.current().nextLong(jitterMs));
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/TableNameUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.utils;
25 |
26 | import com.google.cloud.bigquery.TableId;
27 | import com.google.cloud.bigquery.storage.v1.TableName;
28 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig;
29 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig;
30 | import java.util.Map;
31 | import org.apache.kafka.connect.errors.ConnectException;
32 |
33 | public class TableNameUtils {
34 |
35 | public static String table(TableId table) {
36 | return String.format("table `%s`.`%s`", table.getDataset(), table.getTable());
37 | }
38 |
39 | public static TableName tableName(TableId id) {
40 | return TableName.of(id.getProject(), id.getDataset(), id.getTable());
41 | }
42 |
43 | public static String intTable(TableId table) {
44 | return "intermediate " + table(table);
45 | }
46 |
47 | public static String destTable(TableId table) {
48 | return "destination " + table(table);
49 | }
50 |
51 | public static TableId tableId(TableName name) {
52 | return TableId.of(name.getProject(), name.getDataset(), name.getTable());
53 | }
54 |
55 | public static PartitionedTableId partitionedTableId(TableName name) {
56 | return new PartitionedTableId.Builder(tableId(name)).build();
57 | }
58 |
59 | public static String[] getDataSetAndTableName(BigQuerySinkTaskConfig config, String topic) {
60 | String tableName;
61 | Map topic2TableMap = config.getTopic2TableMap().orElse(null);
62 | String dataset = config.getString(BigQuerySinkConfig.DEFAULT_DATASET_CONFIG);
63 |
64 | if (topic2TableMap != null) {
65 | tableName = topic2TableMap.getOrDefault(topic, topic);
66 | } else {
67 | String[] smtReplacement = topic.split(":");
68 |
69 | if (smtReplacement.length == 2) {
70 | dataset = smtReplacement[0];
71 | tableName = smtReplacement[1];
72 | } else if (smtReplacement.length == 1) {
73 | tableName = smtReplacement[0];
74 | } else {
75 | throw new ConnectException(String.format(
76 | "Incorrect regex replacement format in topic name '%s'. "
77 | + "SMT replacement should either produce the : format "
78 | + "or just the format.",
79 | topic
80 | ));
81 | }
82 | if (config.getBoolean(BigQuerySinkConfig.SANITIZE_TOPICS_CONFIG)) {
83 | tableName = FieldNameSanitizer.sanitizeName(tableName);
84 | }
85 | }
86 |
87 | return new String[]{dataset, tableName};
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/utils/Time.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.utils;
25 |
26 | /**
27 | * Largely adapted from the
28 | * Kafka Time interface,
29 | * which is not public API and therefore cannot be relied upon as a dependency.
30 | */
31 | public interface Time {
32 |
33 | Time SYSTEM = new Time() {
34 | @Override
35 | public void sleep(long durationMs) throws InterruptedException {
36 | Thread.sleep(durationMs);
37 | }
38 |
39 | @Override
40 | public long milliseconds() {
41 | return System.currentTimeMillis();
42 | }
43 | };
44 |
45 | void sleep(long durationMs) throws InterruptedException;
46 |
47 | long milliseconds();
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/RecordBatches.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write;
25 |
26 | import java.util.List;
27 |
28 | public class RecordBatches {
29 |
30 | private final List records;
31 |
32 | private int batchStart;
33 | private int batchSize;
34 |
35 | public RecordBatches(List records) {
36 | this.records = records;
37 | this.batchStart = 0;
38 | this.batchSize = records.size();
39 | }
40 |
41 | public List currentBatch() {
42 | int size = Math.min(records.size() - batchStart, batchSize);
43 | return records.subList(batchStart, batchStart + size);
44 | }
45 |
46 | public void advanceToNextBatch() {
47 | batchStart += batchSize;
48 | }
49 |
50 | public void reduceBatchSize() {
51 | if (batchSize <= 1) {
52 | throw new IllegalStateException("Cannot reduce batch size any further");
53 | }
54 | batchSize /= 2;
55 | }
56 |
57 | public boolean completed() {
58 | return batchStart >= records.size();
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/CountDownRunnable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.batch;
25 |
26 | import java.util.concurrent.CountDownLatch;
27 | import org.apache.kafka.connect.errors.ConnectException;
28 |
29 | /**
30 | * A Runnable that counts down, and then waits for the countdown to be finished.
31 | */
32 | public class CountDownRunnable implements Runnable {
33 |
34 | private CountDownLatch countDownLatch;
35 |
36 | public CountDownRunnable(CountDownLatch countDownLatch) {
37 | this.countDownLatch = countDownLatch;
38 | }
39 |
40 | @Override
41 | public void run() {
42 | countDownLatch.countDown();
43 | try {
44 | /*
45 | * Hog this thread until ALL threads are finished counting down. This is needed so that
46 | * this thread doesn't start processing another countdown. If countdown tasks are holding onto
47 | * all the threads, then we know that nothing that went in before the countdown is still
48 | * processing.
49 | */
50 | countDownLatch.await();
51 | } catch (InterruptedException err) {
52 | throw new ConnectException("Thread interrupted while waiting for countdown.", err);
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/batch/TableWriterBuilder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.batch;
25 |
26 | import com.google.cloud.bigquery.TableId;
27 | import org.apache.kafka.connect.sink.SinkRecord;
28 |
29 | /**
30 | * Interface for building a {@link TableWriter} or TableWriterGCS.
31 | */
32 | public interface TableWriterBuilder {
33 |
34 | /**
35 | * Add a record to the builder.
36 | *
37 | * @param sinkRecord the row to add.
38 | * @param table the table the row will be written to.
39 | */
40 | void addRow(SinkRecord sinkRecord, TableId table);
41 |
42 | /**
43 | * Create a {@link TableWriter} from this builder.
44 | *
45 | * @return a TableWriter containing the given writer, table, topic, and all added rows.
46 | */
47 | Runnable build();
48 | }
49 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/ConvertedRecord.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.storage;
25 |
26 | import org.apache.kafka.connect.sink.SinkRecord;
27 | import org.json.JSONObject;
28 |
29 | public class ConvertedRecord {
30 |
31 | private final SinkRecord original;
32 | private final JSONObject converted;
33 |
34 | public ConvertedRecord(SinkRecord original, JSONObject converted) {
35 | this.original = original;
36 | this.converted = converted;
37 | }
38 |
39 | public SinkRecord original() {
40 | return original;
41 | }
42 |
43 | public JSONObject converted() {
44 | return converted;
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/JsonStreamWriterFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.storage;
25 |
26 | import com.google.cloud.bigquery.storage.v1.JsonStreamWriter;
27 | import com.google.protobuf.Descriptors;
28 | import java.io.IOException;
29 |
30 | /**
31 | * A functional interface for creating {@link JsonStreamWriter} instances.
32 | */
33 | @FunctionalInterface
34 | public interface JsonStreamWriterFactory {
35 | JsonStreamWriter create(String streamOrTableName) throws Descriptors.DescriptorValidationException,
36 | IOException, InterruptedException;
37 | }
38 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StorageApiBatchModeHandler.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.storage;
25 |
26 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig;
27 | import java.util.List;
28 | import java.util.Map;
29 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
30 | import org.apache.kafka.common.TopicPartition;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | /**
35 | * Handles all operations related to Batch Storage Write API
36 | */
37 | public class StorageApiBatchModeHandler {
38 |
39 | private static final Logger logger = LoggerFactory.getLogger(StorageApiBatchModeHandler.class);
40 | private final StorageWriteApiBatchApplicationStream streamApi;
41 |
42 | public StorageApiBatchModeHandler(StorageWriteApiBatchApplicationStream streamApi, BigQuerySinkTaskConfig config) {
43 | this.streamApi = streamApi;
44 | }
45 |
46 | /**
47 | * Used by the scheduler to commit all eligible streams and create new active
48 | * streams.
49 | */
50 | public void refreshStreams() {
51 | logger.trace("Storage Write API commit stream attempt by scheduler");
52 | streamApi.refreshStreams();
53 | }
54 |
55 | /**
56 | * Saves the offsets assigned to a particular stream on a table. This is required to commit offsets sequentially
57 | * even if the execution takes place in parallel at different times.
58 | *
59 | * @param tableName Name of tha table in project/dataset/tablename format
60 | * @param rows Records which would be written to table {tableName} sent to define schema if table creation is
61 | * attempted
62 | * @return Returns the streamName on which offsets are updated
63 | */
64 | public String updateOffsetsOnStream(
65 | String tableName,
66 | List rows) {
67 | logger.trace("Updating offsets on current stream of table {}", tableName);
68 | return this.streamApi.updateOffsetsOnStream(tableName, rows);
69 | }
70 |
71 | /**
72 | * Gets offsets which are committed on BigQuery table.
73 | *
74 | * @return Returns Map of topic, partition, offset mapping
75 | */
76 | public Map getCommitableOffsets() {
77 | logger.trace("Getting list of commitable offsets for batch mode");
78 | return this.streamApi.getCommitableOffsets();
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamState.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.storage;
25 |
26 | /**
27 | * Enums for Stream states
28 | */
29 | public enum StreamState {
30 | CREATED,
31 | APPEND,
32 | FINALISED,
33 | COMMITTED,
34 | INACTIVE
35 | }
36 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/write/storage/StreamWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery.write.storage;
25 |
26 | import com.google.api.core.ApiFuture;
27 | import com.google.cloud.bigquery.storage.v1.AppendRowsResponse;
28 | import com.google.protobuf.Descriptors;
29 | import java.io.IOException;
30 | import org.json.JSONArray;
31 |
32 | public interface StreamWriter {
33 |
34 | /**
35 | * Write the provided rows
36 | *
37 | * @param rows the rows to write; may not be null
38 | * @return the response from BigQuery for the write attempt
39 | */
40 | ApiFuture appendRows(
41 | JSONArray rows
42 | ) throws Descriptors.DescriptorValidationException, IOException;
43 |
44 | /**
45 | * Invoked if the underlying stream appears to be closed. Implementing classes
46 | * should respond by re-initialize the underlying stream.
47 | */
48 | void refresh();
49 |
50 | /**
51 | * Invoked when all rows have either been written to BigQuery or intentionally
52 | * discarded (e.g., reported to an {@link com.wepay.kafka.connect.bigquery.ErrantRecordHandler}).
53 | */
54 | void onSuccess();
55 |
56 | String streamName();
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/kcbq-connector/src/main/java/io/aiven/kafka/utils/VersionInfo.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Licensed to the Apache Software Foundation (ASF) under one *
4 | * or more contributor license agreements. See the NOTICE file *
5 | * distributed with this work for additional information *
6 | * regarding copyright ownership. The ASF licenses this file *
7 | * to you under the Apache License, Version 2.0 (the *
8 | * "License"); you may not use this file except in compliance *
9 | * with the License. You may obtain a copy of the License at *
10 | * *
11 | * http://www.apache.org/licenses/LICENSE-2.0 *
12 | * *
13 | * Unless required by applicable law or agreed to in writing, *
14 | * software distributed under the License is distributed on an *
15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
16 | * KIND, either express or implied. See the License for the *
17 | * specific language governing permissions and limitations *
18 | * under the License. *
19 | */
20 |
21 | package io.aiven.kafka.utils;
22 |
23 | /**
24 | * This class was originally developed by the Apache RAT project
25 | * A formatter for Package information about a class.
26 | *
27 | * @see Package
28 | */
29 | public final class VersionInfo {
30 | /**
31 | * The version info string.
32 | */
33 | private final Package pkg;
34 |
35 | private String orDefault(final String value, final String defaultValue) {
36 | return value == null ? defaultValue : value;
37 | }
38 |
39 | /**
40 | * Constructor that uses the VersionInfo package for information.
41 | */
42 | public VersionInfo() {
43 | this(VersionInfo.class);
44 | }
45 |
46 | /**
47 | * Constructor for a specific class.
48 | *
49 | * @param clazz the class to get the Package information from.
50 | */
51 | public VersionInfo(final Class> clazz) {
52 | pkg = clazz.getPackage();
53 | }
54 |
55 | /**
56 | * Default string representation of the implementation information from the package.
57 | *
58 | * @return The string representation.
59 | */
60 | @Override
61 | public String toString() {
62 | return String.format("%s %s (%s)", getTitle(), getVersion(), getVendor());
63 | }
64 |
65 | /**
66 | * Gets the implementation version of the package. Will return "VERSION-NUMBER" if
67 | * package information is not available.
68 | *
69 | * @return the implementation version.
70 | */
71 | public String getVersion() {
72 | return orDefault(pkg.getImplementationVersion(), "VERSION-NUMBER");
73 | }
74 |
75 | /**
76 | * Gets the implementation vendor of the package. Will return "VENDOR-NAME" if
77 | * package information is not available.
78 | *
79 | * @return the implementation vendor
80 | */
81 | public String getVendor() {
82 | return orDefault(pkg.getImplementationVendor(), "VENDOR-NAME");
83 | }
84 |
85 | /**
86 | * Gets the implementation title of the package. Will return "TITLE" if
87 | * package information is not available.
88 | *
89 | * @return the implementation title
90 | */
91 | public String getTitle() {
92 | return orDefault(pkg.getImplementationTitle(), "TITLE");
93 | }
94 |
95 | /**
96 | * Gets the specification version of the package. Will return "SPEC-VERSION" if
97 | * package information is not available.
98 | *
99 | * @return the specification version.
100 | */
101 | public String getSpecVersion() {
102 | return orDefault(pkg.getSpecificationVersion(), "SPEC-VERSION");
103 | }
104 |
105 | /**
106 | * Gets the specification vendor of the package. Will return "SPEC-VENDOR" if
107 | * package information is not available.
108 | *
109 | * @return the specification vendor
110 | */
111 | public String getSpecVendor() {
112 | return orDefault(pkg.getSpecificationVendor(), "SPEC-VENDOR");
113 | }
114 |
115 | /**
116 | * Gets the specification title of the package. Will return "SPEC-TITLE" if
117 | * package information is not available.
118 | *
119 | * @return the specification title
120 | */
121 | public String getSpecTitle() {
122 | return orDefault(pkg.getSpecificationTitle(), "SPEC-TITLE");
123 | }
124 |
125 | public static void main(String[] args) {
126 | VersionInfo versionInfo = new VersionInfo();
127 | System.out.println(versionInfo);
128 | System.out.format("Spec: %s %s %s%n", versionInfo.getSpecTitle(), versionInfo.getSpecVersion(), versionInfo.getSpecVendor());
129 | }
130 | }
--------------------------------------------------------------------------------
/kcbq-connector/src/main/resources/META-INF/services/org.apache.kafka.connect.sink.SinkConnector:
--------------------------------------------------------------------------------
1 | com.wepay.kafka.connect.bigquery.BigQuerySinkConnector
2 |
--------------------------------------------------------------------------------
/kcbq-connector/src/test/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnectorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 Copyright 2022 Aiven Oy and
3 | * bigquery-connector-for-apache-kafka project contributors
4 | *
5 | * This software contains code derived from the Confluent BigQuery
6 | * Kafka Connector, Copyright Confluent, Inc, which in turn
7 | * contains code derived from the WePay BigQuery Kafka Connector,
8 | * Copyright WePay, Inc.
9 | *
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing,
17 | * software distributed under the License is distributed on an
18 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 | * KIND, either express or implied. See the License for the
20 | * specific language governing permissions and limitations
21 | * under the License.
22 | */
23 |
24 | package com.wepay.kafka.connect.bigquery;
25 |
26 | import static org.junit.jupiter.api.Assertions.assertEquals;
27 | import static org.junit.jupiter.api.Assertions.assertNotNull;
28 | import static org.junit.jupiter.api.Assertions.assertNotSame;
29 |
30 | import com.wepay.kafka.connect.bigquery.api.SchemaRetriever;
31 | import com.wepay.kafka.connect.bigquery.config.BigQuerySinkTaskConfig;
32 | import java.util.HashMap;
33 | import java.util.List;
34 | import java.util.Map;
35 | import org.apache.kafka.connect.data.Schema;
36 | import org.apache.kafka.connect.sink.SinkRecord;
37 | import org.junit.jupiter.api.BeforeAll;
38 | import org.junit.jupiter.api.Test;
39 |
40 | public class BigQuerySinkConnectorTest {
41 | private static SinkPropertiesFactory propertiesFactory;
42 |
43 | @BeforeAll
44 | public static void initializePropertiesFactory() {
45 | propertiesFactory = new SinkPropertiesFactory();
46 | }
47 |
48 | @Test
49 | public void testTaskClass() {
50 | assertEquals(BigQuerySinkTask.class, new BigQuerySinkConnector().taskClass());
51 | }
52 |
53 | @Test
54 | public void testTaskConfigs() {
55 | Map properties = propertiesFactory.getProperties();
56 |
57 | BigQuerySinkConnector testConnector = new BigQuerySinkConnector();
58 |
59 | testConnector.start(properties);
60 |
61 | for (int i : new int[]{1, 2, 10, 100}) {
62 | Map expectedProperties = new HashMap<>(properties);
63 | List