├── .asf.yaml ├── .editorconfig ├── .github └── workflows │ └── codeql-analysis.yml ├── .gitignore ├── .mvn └── wrapper │ └── maven-wrapper.properties ├── CHANGES.md ├── LICENSE ├── NOTICE ├── README.md ├── TROUBLESHOOT.md ├── cloudbuild ├── nightly │ ├── Dockerfile │ ├── cloudbuild.yaml │ ├── gcp-settings.xml │ ├── nightly.sh │ └── scripts │ │ ├── bounded_table_write.sh │ │ ├── create_dataproc_cluster.sh │ │ ├── python-scripts │ │ ├── assert_table_count.py │ │ ├── create_cluster.py │ │ ├── create_sink_table.py │ │ ├── create_unbounded_sink_table.py │ │ ├── delete_buckets_and_clusters.py │ │ ├── insert_dynamic_files.py │ │ ├── parse_logs.py │ │ ├── requirements.txt │ │ └── utils.py │ │ ├── table_write.sh │ │ └── unbounded_table_write.sh └── presubmit │ ├── Dockerfile │ ├── cloudbuild.yaml │ ├── gcp-settings.xml │ └── presubmit.sh ├── flink-1.17-connector-bigquery ├── flink-connector-bigquery-examples │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── flink │ │ └── bigquery │ │ └── examples │ │ └── BigQueryExample.java ├── flink-connector-bigquery-integration-test │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── flink │ │ └── bigquery │ │ └── integration │ │ └── BigQueryIntegrationTest.java ├── flink-connector-bigquery-table-api-examples │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── flink │ │ └── bigquery │ │ └── examples │ │ └── BigQueryTableExample.java ├── flink-connector-bigquery │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── google │ │ │ │ └── cloud │ │ │ │ └── flink │ │ │ │ └── bigquery │ │ │ │ ├── sink │ │ │ │ ├── BigQueryBaseSink.java │ │ │ │ ├── BigQueryDefaultSink.java │ │ │ │ ├── BigQueryExactlyOnceSink.java │ │ │ │ ├── BigQuerySink.java │ │ │ │ ├── BigQuerySinkConfig.java │ │ │ │ ├── TwoPhaseCommittingStatefulSink.java │ │ │ │ ├── client │ │ │ │ │ └── BigQueryClientWithErrorHandling.java │ │ │ │ ├── committer │ │ │ │ │ ├── BigQueryCommittable.java │ │ │ │ │ ├── BigQueryCommittableSerializer.java │ │ │ │ │ └── BigQueryCommitter.java │ │ │ │ ├── exceptions │ │ │ │ │ └── BigQuerySerializationException.java │ │ │ │ ├── serializer │ │ │ │ │ ├── AvroSchemaConvertor.java │ │ │ │ │ ├── AvroToProtoSerializer.java │ │ │ │ │ ├── BigQueryProtoSerializer.java │ │ │ │ │ ├── BigQuerySchemaProvider.java │ │ │ │ │ ├── BigQuerySchemaProviderImpl.java │ │ │ │ │ ├── BigQueryTableSchemaProvider.java │ │ │ │ │ └── RowDataToProtoSerializer.java │ │ │ │ ├── state │ │ │ │ │ └── BigQueryStreamState.java │ │ │ │ ├── throttle │ │ │ │ │ ├── BigQueryWriterThrottler.java │ │ │ │ │ └── Throttler.java │ │ │ │ └── writer │ │ │ │ │ ├── BaseWriter.java │ │ │ │ │ ├── BigQueryBufferedWriter.java │ │ │ │ │ ├── BigQueryDefaultWriter.java │ │ │ │ │ ├── BigQueryWriterState.java │ │ │ │ │ ├── BigQueryWriterStateSerializer.java │ │ │ │ │ └── CreateTableOptions.java │ │ │ │ ├── source │ │ │ │ ├── BigQuerySource.java │ │ │ │ ├── emitter │ │ │ │ │ └── BigQueryRecordEmitter.java │ │ │ │ ├── enumerator │ │ │ │ │ ├── BigQuerySourceEnumState.java │ │ │ │ │ ├── BigQuerySourceEnumStateSerializer.java │ │ │ │ │ └── BigQuerySourceEnumerator.java │ │ │ │ ├── reader │ │ │ │ │ ├── BigQuerySourceReader.java │ │ │ │ │ ├── BigQuerySourceReaderContext.java │ │ │ │ │ └── deserializer │ │ │ │ │ │ ├── AvroDeserializationSchema.java │ │ │ │ │ │ ├── AvroToRowDataConverters.java │ │ │ │ │ │ ├── AvroToRowDataDeserializationSchema.java │ │ │ │ │ │ └── BigQueryDeserializationSchema.java │ │ │ │ └── split │ │ │ │ │ ├── BigQuerySourceSplit.java │ │ │ │ │ ├── BigQuerySourceSplitSerializer.java │ │ │ │ │ ├── BigQuerySourceSplitState.java │ │ │ │ │ ├── assigner │ │ │ │ │ ├── BigQuerySourceSplitAssigner.java │ │ │ │ │ └── BoundedSplitAssigner.java │ │ │ │ │ └── reader │ │ │ │ │ └── BigQuerySourceSplitReader.java │ │ │ │ └── table │ │ │ │ ├── BigQueryDynamicTableFactory.java │ │ │ │ ├── BigQueryDynamicTableSink.java │ │ │ │ ├── BigQueryDynamicTableSource.java │ │ │ │ ├── config │ │ │ │ ├── BigQueryConnectorOptions.java │ │ │ │ ├── BigQueryReadTableConfig.java │ │ │ │ ├── BigQuerySinkTableConfig.java │ │ │ │ ├── BigQueryTableConfig.java │ │ │ │ └── BigQueryTableConfigurationProvider.java │ │ │ │ └── restrictions │ │ │ │ └── BigQueryRestriction.java │ │ └── resources │ │ │ ├── META-INF │ │ │ └── services │ │ │ │ └── org.apache.flink.table.factories.Factory │ │ │ └── connector.properties │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── google │ │ │ └── cloud │ │ │ └── flink │ │ │ └── bigquery │ │ │ ├── fakes │ │ │ └── StorageClientFaker.java │ │ │ ├── sink │ │ │ ├── BigQueryDefaultSinkTest.java │ │ │ ├── BigQueryExactlyOnceSinkTest.java │ │ │ ├── BigQuerySinkConfigTest.java │ │ │ ├── BigQuerySinkTest.java │ │ │ ├── client │ │ │ │ └── BigQueryClientWithErrorHandlingTest.java │ │ │ ├── committer │ │ │ │ ├── BigQueryCommittableSerializerTest.java │ │ │ │ └── BigQueryCommitterTest.java │ │ │ ├── serializer │ │ │ │ ├── AvroSchemaConvertorTest.java │ │ │ │ ├── AvroToProtoSerializerTest.java │ │ │ │ ├── BigQuerySchemaProviderTest.java │ │ │ │ ├── FakeBigQuerySerializer.java │ │ │ │ ├── RowDataToProtoSerializerTest.java │ │ │ │ ├── TestBigQuerySchemas.java │ │ │ │ └── TestSchemaProvider.java │ │ │ ├── throttle │ │ │ │ └── BigQueryWriterThrottlerTest.java │ │ │ └── writer │ │ │ │ ├── BigQueryBufferedWriterTest.java │ │ │ │ ├── BigQueryDefaultWriterTest.java │ │ │ │ └── BigQueryWriterStateSerializerTest.java │ │ │ ├── source │ │ │ ├── BigQuerySourceIntegrationTestCase.java │ │ │ ├── BigQuerySourceTest.java │ │ │ ├── enumerator │ │ │ │ └── BigQuerySourceEnumStateSerializerTest.java │ │ │ └── split │ │ │ │ ├── BigQuerySourceSplitSerializerTest.java │ │ │ │ ├── BigQuerySourceSplitStateTest.java │ │ │ │ ├── assigner │ │ │ │ └── BigQuerySourceSplitAssignerTest.java │ │ │ │ └── reader │ │ │ │ ├── BigQuerySourceSplitReaderTest.java │ │ │ │ └── deserializer │ │ │ │ └── AvroToRowDataConvertersTest.java │ │ │ └── table │ │ │ ├── BigQueryDynamicTableFactoryTest.java │ │ │ ├── BigQueryDynamicTableSinkIntegrationTestCase.java │ │ │ ├── BigQueryDynamicTableSinkTest.java │ │ │ ├── BigQueryDynamicTableSourceIntegrationTestCase.java │ │ │ └── config │ │ │ └── BigQuerySinkTableConfigTest.java │ │ └── resources │ │ └── log4j2-test.properties └── pom.xml ├── flink-connector-bigquery-common ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── flink │ │ └── bigquery │ │ ├── common │ │ ├── config │ │ │ ├── BigQueryConnectOptions.java │ │ │ └── CredentialsOptions.java │ │ ├── exceptions │ │ │ └── BigQueryConnectorException.java │ │ └── utils │ │ │ ├── AvroToBigQuerySchemaTransform.java │ │ │ ├── BigQueryPartitionUtils.java │ │ │ ├── BigQueryStateSerde.java │ │ │ ├── BigQueryTableInfo.java │ │ │ ├── GoogleCredentialsSupplier.java │ │ │ └── SchemaTransform.java │ │ ├── services │ │ ├── BigQueryServices.java │ │ ├── BigQueryServicesFactory.java │ │ ├── BigQueryServicesImpl.java │ │ ├── BigQueryUtils.java │ │ ├── PartitionIdWithInfo.java │ │ └── TablePartitionInfo.java │ │ └── source │ │ ├── config │ │ └── BigQueryReadOptions.java │ │ └── split │ │ ├── SplitDiscoverer.java │ │ └── SplitDiscoveryScheduler.java │ └── test │ ├── java │ └── com │ │ └── google │ │ └── cloud │ │ └── flink │ │ └── bigquery │ │ ├── common │ │ └── utils │ │ │ ├── AvroToBigQuerySchemaTransformTest.java │ │ │ ├── BigQueryPartitionUtilsTest.java │ │ │ ├── BigQueryStateSerdeTest.java │ │ │ └── SchemaTransformTest.java │ │ ├── fakes │ │ └── StorageClientFaker.java │ │ └── services │ │ ├── BigQueryServicesTest.java │ │ ├── BigQueryUtilsTest.java │ │ └── TablePartitionInfoTest.java │ └── resources │ └── log4j2-test.properties ├── mvnw ├── mvnw.cmd ├── pom.xml └── tools ├── ci └── log4j.properties └── maven ├── checkstyle.xml ├── clover.xml └── suppressions.xml /.asf.yaml: -------------------------------------------------------------------------------- 1 | github: 2 | enabled_merge_buttons: 3 | squash: true 4 | merge: false 5 | rebase: true 6 | labels: 7 | - flink 8 | - bigquery 9 | - connector 10 | autolink_jira: FLINK 11 | collaborators: 12 | - prodriguezdefino 13 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '19 18 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'java' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v2 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v2 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v2 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .flattened-pom.xml 3 | pom.xml.tag 4 | pom.xml.releaseBackup 5 | pom.xml.versionsBackup 6 | pom.xml.next 7 | release.properties 8 | dependency-reduced-pom.xml 9 | buildNumber.properties 10 | .mvn/timing.properties 11 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 12 | .mvn/wrapper/maven-wrapper.jar 13 | .idea/ 14 | .DS_Store 15 | tools/flink 16 | tools/flink-* 17 | tools/releasing/release 18 | tools/japicmp-output 19 | java.header 20 | .java-version 21 | nb-* 22 | **nbproject 23 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.4/apache-maven-3.8.4-bin.zip 18 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.0/maven-wrapper-3.1.0.jar 19 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## Next 4 | 5 | ## 1.0.0 - 2025-02-25 6 | 7 | * Propagate generics to BigQuerySink and BigQuerySinkConfig. Users of DataStream API 8 | will need to strongly type the sink's input in BigQuerySinkConfig. SQL/Table API users 9 | will not be affected. 10 | * Increase maximum allowed sink parallelism to 512 for BigQuery's multi-regions (US and EU). 11 | * Remove unbounded source and bounded query source. 12 | * Create a shaded jar for the connector library. 13 | * Allow sink to throw a fatal error if record cannot be serialized to BigQuery's input format in sink. 14 | * Fix integer and float handling in sink by upcasting to long and double respectively. 15 | Check [issue 219](https://github.com/GoogleCloudDataproc/flink-bigquery-connector/issues/219) for details. 16 | 17 | ## 0.5.0 - 2025-01-15 18 | 19 | * Support creation of new table in BigQuery sink. This is integrated with Datastream and Table/SQL API. 20 | * Remove need for BigQuerySchemaProvider in BigQuery sink configs. 21 | * Deprecate unbounded source. To be completely removed in next release. 22 | 23 | ## 0.4.0 - 2024-11-04 24 | 25 | * Support exactly-once consistency in BigQuery sink. This is integrated with Datastream and Table/SQL API. 26 | * Add Flink metrics for monitoring BigQuery sink. 27 | * Package unshaded guava dependency for enforcing the correct version used by BigQuery client. 28 | 29 | ## 0.3.0 - 2024-08-07 30 | 31 | * Support BigQuery sink in Flink's Table API. 32 | * BigQuery sink's maximum parallelism is increased from 100 to 128, beyond which the application will fail. 33 | * Modifies the following config keys for connector source in Table API: 34 | 35 | | Before | After | 36 | |---------------------------|----------------------------| 37 | | `read.discoveryinterval` | `read.discovery-interval` | 38 | | `credentials.accesstoken` | `credentials.access-token` | 39 | | `read.streams.maxcount` | `read.streams.max-count` | 40 | 41 | ## 0.2.0 - 2024-05-13 42 | 43 | * Release BigQuery sink with at-least-once support. 44 | * Avro's GenericRecord to BigQuery proto is the only out-of-the-box serializer offered for now. 45 | * BigQuery sink's maximum parallelism is capped at 100, beyond which the application with fail. 46 | 47 | ## 0.1.0-preview - 2023-12-14 48 | 49 | * Initial release with BQ source support 50 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Flink BigQuery Connector 2 | Copyright 2023 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby 8 | granted, provided that this permission notice appear in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING 11 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, 12 | DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 13 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE 14 | USE OR PERFORMANCE OF THIS SOFTWARE. 15 | 16 | 17 | -------------------------------------------------------------------------------- /cloudbuild/nightly/Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile creates an image for running presubmit tests. 2 | FROM openjdk:8 3 | 4 | RUN \ 5 | echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | \ 6 | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ 7 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | \ 8 | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ 9 | apt-get update -y && \ 10 | apt-get install google-cloud-cli -y \ 11 | && apt clean 12 | # Install Python and Basic Python Tools 13 | RUN apt-get -y install python3 && apt clean 14 | # Get pip to download and install requirements 15 | RUN apt-get -y install python3-pip && apt clean 16 | # For md5sum 17 | RUN apt-get -y install coreutils && apt clean 18 | COPY ./cloudbuild/nightly/scripts/python-scripts/requirements.txt /workspace/ 19 | RUN pip3 install -r /workspace/requirements.txt 20 | -------------------------------------------------------------------------------- /cloudbuild/nightly/gcp-settings.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | google-maven-central 8 | GCS Maven Central mirror 9 | https://maven-central.storage-download.googleapis.com/maven2/ 10 | central 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/bounded_table_write.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2022 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | PROPERTIES=$1 17 | BOUNDED_JOB_SINK_PARALLELISM=$2 18 | IS_SQL=$3 19 | IS_EXACTLY_ONCE_ENABLED=$4 20 | ENABLE_TABLE_CREATION=$5 21 | 22 | # We won't run this async as we can wait for a bounded job to succeed or fail. 23 | if [ "$ENABLE_TABLE_CREATION" == False ] 24 | then 25 | echo "Creating destination table before test" 26 | # Create the destination table from the source table schema. 27 | python3 cloudbuild/nightly/scripts/python-scripts/create_sink_table.py -- --project_name "$PROJECT_NAME" --dataset_name "$DATASET_NAME" --source_table_name "$SOURCE" --destination_table_name "$DESTINATION_TABLE_NAME" 28 | # Set the expiration time to 1 hour. 29 | bq update --expiration 3600 "$DATASET_NAME"."$DESTINATION_TABLE_NAME" 30 | fi 31 | # Run the sink JAR JOB 32 | gcloud dataproc jobs submit flink --id "$JOB_ID" --jar="$GCS_JAR_LOCATION" --cluster="$CLUSTER_NAME" --region="$REGION" --properties="$PROPERTIES" -- --gcp-source-project "$PROJECT_NAME" --bq-source-dataset "$DATASET_NAME" --bq-source-table "$SOURCE" --gcp-dest-project "$PROJECT_NAME" --bq-dest-dataset "$DATASET_NAME" --bq-dest-table "$DESTINATION_TABLE_NAME" --sink-parallelism "$BOUNDED_JOB_SINK_PARALLELISM" --is-sql "$IS_SQL" --exactly-once "$IS_EXACTLY_ONCE_ENABLED" --enable-table-creation "$ENABLE_TABLE_CREATION" -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/create_dataproc_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2022 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | CLUSTER_NAME=$1 18 | REGION_ARRAY_STRING=$2 19 | NUM_WORKERS=$3 20 | REGION_SAVING_FILE=$4 21 | WORKER_MACHINE_TYPE=$5 22 | 23 | 24 | # Set the project, location and zone for the cluster creation. 25 | gcloud config set project "$PROJECT_ID" 26 | # Create the cluster 27 | # The script retries to create from the list of regions provided. 28 | python3 cloudbuild/nightly/scripts/python-scripts/create_cluster.py -- --region_array_string "$REGION_ARRAY_STRING" --project_id \ 29 | "$PROJECT_ID" --cluster_name "$CLUSTER_NAME" --dataproc_image_version "$DATAPROC_IMAGE_VERSION" --num_workers "$NUM_WORKERS" \ 30 | --initialisation_action_script_uri "$INITIALISATION_ACTION_SCRIPT_URI" --region_saving_file "$REGION_SAVING_FILE" --worker_machine_type "$WORKER_MACHINE_TYPE" 31 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/python-scripts/create_sink_table.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections.abc import Sequence 3 | 4 | from absl import app 5 | from google.cloud import bigquery 6 | from absl import logging 7 | from google.cloud.bigquery import DatasetReference 8 | 9 | 10 | def extract_source_table_schema(client, dataset_ref, source_table_name): 11 | table_ref = dataset_ref.table(source_table_name) 12 | table = client.get_table(table_ref) # API Request 13 | return table.schema 14 | 15 | 16 | def create_destination_table(project_name, dataset_name, source_table_name, destination_table_name): 17 | # Construct a BigQuery client object. 18 | client = bigquery.Client(project=project_name) 19 | # Obtain the dataset reference for the dataset 20 | dataset_ref = DatasetReference(project_name, dataset_name) 21 | # Obtain the Source Table schema. 22 | source_table_schema = extract_source_table_schema(client, dataset_ref, source_table_name) 23 | logging.info( 24 | "Obtained Schema for the table {}.{}.{}".format(project_name, dataset_name, 25 | destination_table_name)) 26 | table_id = f"{project_name}.{dataset_name}.{destination_table_name}" 27 | table = bigquery.Table(table_id, schema=source_table_schema) 28 | try: 29 | table = client.create_table(table) # Make an API request. 30 | logging.info( 31 | "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) 32 | ) 33 | except Exception as _: 34 | raise RuntimeError("Table could not be created!") 35 | 36 | 37 | def main(argv: Sequence[str]) -> None: 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument( 40 | '--project_name', 41 | dest='project_name', 42 | help='Project Id which contains the table to be read.', 43 | type=str, 44 | default='', 45 | required=True, 46 | ) 47 | parser.add_argument( 48 | '--dataset_name', 49 | dest='dataset_name', 50 | help='Dataset Name which contains the table to be read.', 51 | type=str, 52 | default='', 53 | required=True, 54 | ) 55 | parser.add_argument( 56 | '--source_table_name', 57 | dest='source_table_name', 58 | help='Table Name of the table which is source for write test.', 59 | type=str, 60 | default='', 61 | required=True, 62 | ) 63 | 64 | parser.add_argument( 65 | '--destination_table_name', 66 | dest='destination_table_name', 67 | help='Table Name of the table which is destination for write test.', 68 | type=str, 69 | default='', 70 | required=True, 71 | ) 72 | 73 | args = parser.parse_args(argv[1:]) 74 | 75 | # Providing the values. 76 | project_name = args.project_name 77 | dataset_name = args.dataset_name 78 | source_table_name = args.source_table_name 79 | destination_table_name = args.destination_table_name 80 | 81 | # Create the destination table from the source table schema. 82 | create_destination_table(project_name, dataset_name, source_table_name, destination_table_name) 83 | 84 | 85 | if __name__ == '__main__': 86 | app.run(main) 87 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py: -------------------------------------------------------------------------------- 1 | """The following operations are performed for internal, unbounded read-write tests: 2 | 1. Copying source files to a temporary GCS directory which acts as a new source. 3 | 2. Creating a destination table with a hardcoded schema. 4 | 3. Running the Flink job in unbounded mode while dynamically adding new files to the source. 5 | """ 6 | 7 | import argparse 8 | from collections.abc import Sequence 9 | 10 | from absl import app 11 | from google.cloud import bigquery 12 | from absl import logging 13 | from google.cloud.bigquery import DatasetReference 14 | 15 | def create_destination_table(project_name, dataset_name, destination_table_name): 16 | """Function to create a BigQuery destination table with a hardcoded table schema. 17 | Args: 18 | project_name: The project ID where the table will be created. 19 | dataset_name: The dataset name where the table will be created. 20 | destination_table_name: The name of the destination table to be created. 21 | """ 22 | # Construct a BigQuery client object. 23 | client = bigquery.Client(project=project_name) 24 | 25 | table_id = f"{project_name}.{dataset_name}.{destination_table_name}" 26 | # This is a hardcoded schema specifically for internal, unbounded read-write tests only. 27 | # It defines the schema of the BigQuery table used in the test, 28 | # with fields for a unique key, name, number, and timestamp. 29 | table_schema = [ 30 | bigquery.SchemaField("unique_key", "STRING", mode="REQUIRED"), 31 | bigquery.SchemaField("name", "STRING", mode="REQUIRED"), 32 | bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), 33 | bigquery.SchemaField("ts", "TIMESTAMP", mode="REQUIRED"), 34 | ] 35 | table = bigquery.Table(table_id, table_schema) 36 | try: 37 | table = client.create_table(table) # Make an API request. 38 | logging.info( 39 | "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) 40 | ) 41 | except Exception as _: 42 | raise RuntimeError("Table could not be created!") 43 | 44 | 45 | def main(argv: Sequence[str]) -> None: 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument( 48 | '--project_name', 49 | dest='project_name', 50 | help='Project Id for creation of the destination table.', 51 | type=str, 52 | default='', 53 | required=True, 54 | ) 55 | parser.add_argument( 56 | '--dataset_name', 57 | dest='dataset_name', 58 | help='Dataset Name for creation of the destination table.', 59 | type=str, 60 | default='', 61 | required=True, 62 | ) 63 | 64 | parser.add_argument( 65 | '--destination_table_name', 66 | dest='destination_table_name', 67 | help='Table Name of the table which is destination for write test.', 68 | type=str, 69 | default='', 70 | required=True, 71 | ) 72 | 73 | args = parser.parse_args(argv[1:]) 74 | 75 | # Providing the values. 76 | project_name = args.project_name 77 | dataset_name = args.dataset_name 78 | destination_table_name = args.destination_table_name 79 | 80 | # Create the destination table from the hardcoded schema. 81 | create_destination_table(project_name, dataset_name, destination_table_name) 82 | 83 | 84 | if __name__ == '__main__': 85 | app.run(main) 86 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/python-scripts/delete_buckets_and_clusters.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License 14 | 15 | import argparse 16 | from absl import logging 17 | from absl import app 18 | from collections.abc import Sequence 19 | import os 20 | from google.cloud import dataproc_v1 21 | 22 | 23 | def clear_all_dataproc_dependencies(project_id, cluster_name, region): 24 | delete_cluster_buckets(project_id, cluster_name, region) 25 | delete_cluster(project_id, cluster_name, region) 26 | 27 | 28 | def delete_cluster_buckets(project_id, cluster_name, region): 29 | """Function to clean up cluster specific directories from its temporary and staging GCS 30 | buckets. 31 | Args: project_id: the project id to which the cluster and job belongs. region: Region 32 | to which the cluster belongs. cluster_name: Name of the cluster on which the jobs need to be 33 | deleted. 34 | """ 35 | logging.info(f'Attempting to delete cluster {cluster_name} buckets') 36 | cluster_client = dataproc_v1.ClusterControllerClient( 37 | client_options={'api_endpoint': f'{region}-dataproc.googleapis.com:443'} 38 | ) 39 | get_cluster_request = dataproc_v1.GetClusterRequest( 40 | project_id=project_id, region=region, cluster_name=cluster_name 41 | ) 42 | cluster_resource = cluster_client.get_cluster(get_cluster_request) 43 | 44 | cluster_id = cluster_resource.cluster_uuid 45 | temp_bucket_location = cluster_resource.config.temp_bucket+'/'+cluster_id 46 | staging_bucket_location = cluster_resource.config.config_bucket+('/google-cloud-dataproc' 47 | '-metainfo/')+cluster_id 48 | delete_bucket(temp_bucket_location) 49 | delete_bucket(staging_bucket_location) 50 | logging.info(f'Delete all the buckets connected to cluster {cluster_name}.') 51 | 52 | 53 | def delete_bucket(bucket_name): 54 | logging.info(f'Attempting to delete the bucket {bucket_name}') 55 | os.system(f'gcloud storage rm --recursive gs://{bucket_name}') 56 | logging.info(f'Bucket {bucket_name} deleted') 57 | 58 | 59 | def delete_cluster(project_id, cluster_name, region): 60 | """Function to delete a dataproc cluster. 61 | Args: 62 | project_id: the project id to which the cluster and job belongs. 63 | region: Region to which the cluster belongs. 64 | cluster_name: Name of the cluster on which the jobs need to be deleted. 65 | """ 66 | logging.info(f'Attempting to delete the cluster {cluster_name}') 67 | # Create the cluster client. 68 | cluster_client = dataproc_v1.ClusterControllerClient( 69 | client_options={'api_endpoint': f'{region}-dataproc.googleapis.com:443'} 70 | ) 71 | # Delete the cluster once the job has terminated. 72 | operation = cluster_client.delete_cluster( 73 | request={ 74 | 'project_id': project_id, 75 | 'region': region, 76 | 'cluster_name': cluster_name, 77 | } 78 | ) 79 | operation.result() 80 | logging.info(f'Cluster {cluster_name} successfully deleted.') 81 | 82 | 83 | def main(argv: Sequence[str]) -> None: 84 | parser = argparse.ArgumentParser() 85 | parser.add_argument( 86 | '--cluster_name', 87 | dest='cluster_name', 88 | help='Name of the cluster to be created.', 89 | type=str, 90 | required=True, 91 | ) 92 | parser.add_argument( 93 | '--project_id', 94 | dest='project_id', 95 | help='Project Id in which the cluster is created.', 96 | type=str, 97 | required=True, 98 | ) 99 | parser.add_argument( 100 | '--region', 101 | dest='region', 102 | help='Region in which the cluster is present.', 103 | type=str, 104 | required=True, 105 | ) 106 | 107 | args = parser.parse_args(argv[1:]) 108 | # Providing the values. 109 | cluster_name = args.cluster_name 110 | region = args.region 111 | project_id = args.project_id 112 | 113 | clear_all_dataproc_dependencies(project_id, cluster_name, region) 114 | 115 | 116 | if __name__ == '__main__': 117 | app.run(main) 118 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/python-scripts/insert_dynamic_files.py: -------------------------------------------------------------------------------- 1 | """Python script to dynamically insert files to a GCS Bucket.""" 2 | 3 | import argparse 4 | from collections.abc import Sequence 5 | import datetime 6 | import logging 7 | import csv 8 | import time 9 | from datetime import datetime, timedelta 10 | from google.cloud import storage 11 | from absl import app 12 | 13 | 14 | def sleep_for_seconds(duration): 15 | logging.info( 16 | 'Going to sleep, waiting for connector to read existing, Time: %s', 17 | datetime.now() 18 | ) 19 | # Buffer time to ensure that new partitions are created 20 | # after previous read session and before next split discovery. 21 | time.sleep(duration) 22 | 23 | 24 | def main(argv: Sequence[str]) -> None: 25 | """Main function to parse arguments and run the file insertion process. 26 | This function simulates an unbounded data stream by periodically copying and 27 | modifying a source CSV file in a GCS bucket. It introduces new unique keys 28 | and uploads the modified copies with incremental names, triggering new 29 | file discovery for flink source. 30 | Args: 31 | argv: A sequence of command-line arguments. 32 | """ 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument( 35 | '--refresh_interval', 36 | dest='refresh_interval', 37 | help='Minutes between checking new data', 38 | type=int, 39 | required=True, 40 | ) 41 | parser.add_argument( 42 | '--project_name', 43 | dest='project_name', 44 | help='Project Id which contains the GCS bucket files to be read.', 45 | type=str, 46 | required=True, 47 | ) 48 | 49 | parser.add_argument( 50 | '--gcs_source_uri', 51 | dest='gcs_source_uri', 52 | help='GCS Bucket which has the source csv file', 53 | type=str, 54 | required=True, 55 | ) 56 | 57 | 58 | args = parser.parse_args(argv[1:]) 59 | sleep_for_seconds(2.5*60) 60 | 61 | # Providing the values. 62 | project_name = args.project_name 63 | gcs_source_uri = args.gcs_source_uri 64 | refresh_interval = int(args.refresh_interval) 65 | bucket_name = gcs_source_uri.split("/")[2] 66 | 67 | # Split the URI into parts 68 | parts = gcs_source_uri.split("/") 69 | 70 | # Create the Storage Client 71 | source_blob_path = "/".join(parts[3:]) 72 | destination_folder = "/".join(parts[3:-1]) + "/" 73 | 74 | storage_client = storage.Client(project=project_name) 75 | bucket = storage_client.bucket(bucket_name) 76 | source_blob = bucket.blob(source_blob_path + "source.csv") 77 | 78 | # Download the CSV file to memory 79 | data = source_blob.download_as_string().decode('utf-8') 80 | reader = csv.reader(data.splitlines()) 81 | 82 | # Prepare data for the two copies 83 | copies = [] 84 | counter = 60001 # Initialize the counter to add unique values 85 | for i in range(3): 86 | current_time = datetime.utcnow() 87 | new_rows = [] 88 | for row in reader: 89 | unique_key = f"{counter}-{row[1]}" # Combine counter and name 90 | new_row = [unique_key] + row[1:] # Create the modified row 91 | new_rows.append(new_row) 92 | counter += 1 # Increment the counter for each record 93 | 94 | copies.append((current_time, new_rows)) 95 | reader = csv.reader(data.splitlines()) # Reset the reader for the next copy 96 | 97 | # Upload the modified copies 98 | copy_count = 1 99 | for current_time, rows in copies: 100 | destination_blob_name = f"{destination_folder}source_{copy_count}.csv" 101 | blob = bucket.blob(destination_blob_name) 102 | 103 | # Write the modified data to a string buffer 104 | output = '\n'.join([','.join(row) for row in rows]) 105 | 106 | blob.upload_from_string(output, content_type='text/csv') 107 | 108 | logging.info(f"Copied and modified file uploaded to gs://{bucket_name}/{destination_blob_name}") 109 | copy_count += 1 110 | 111 | sleep_for_seconds(refresh_interval*60) 112 | 113 | 114 | if __name__ == '__main__': 115 | app.run(main) 116 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/python-scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.0.0 2 | avro==1.11.3 3 | cachetools==5.3.2 4 | certifi==2024.7.4 5 | charset-normalizer==3.3.2 6 | google-api-core==2.14.0 7 | google-api-python-client==2.109.0 8 | google-auth==2.24.0 9 | google-auth-httplib2==0.1.1 10 | google-cloud==0.34.0 11 | google-cloud-bigquery==3.13.0 12 | google-cloud-core==2.3.3 13 | google-cloud-dataproc==5.7.0 14 | google-cloud-storage==2.13.0 15 | google-crc32c==1.5.0 16 | google-resumable-media==2.6.0 17 | googleapis-common-protos==1.61.0 18 | grpc-google-iam-v1==0.12.7 19 | grpcio==1.59.3 20 | grpcio-status==1.59.3 21 | httplib2==0.22.0 22 | idna==3.7 23 | packaging==23.2 24 | proto-plus==1.22.3 25 | protobuf==4.25.1 26 | pyasn1==0.5.1 27 | pyasn1-modules==0.3.0 28 | pyparsing==3.1.1 29 | python-dateutil==2.8.2 30 | requests==2.32.0 31 | rsa==4.9 32 | six==1.16.0 33 | uritemplate==4.1.1 34 | urllib3==2.2.2 35 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/table_write.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2022 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Collect the arguments. 18 | PROJECT_ID=$1 19 | CLUSTER_NAME=$2 20 | REGION=$3 21 | PROJECT_NAME=$4 22 | DATASET_NAME=$5 23 | SOURCE=$6 24 | DESTINATION_TABLE_NAME=$7 25 | IS_EXACTLY_ONCE_ENABLED=$8 26 | MODE=$9 27 | PROPERTIES=${10} 28 | SINK_PARALLELISM=${11} 29 | IS_SQL=${12} 30 | ENABLE_TABLE_CREATION=${13} 31 | set -euxo pipefail 32 | gcloud config set project "$PROJECT_ID" 33 | 34 | # Obtain Timestamp 35 | timestamp=$(date +"%Y%m%d%H%M%S") 36 | # Create a random JOB_ID 37 | JOB_ID=$(echo "$RANDOM" | md5sum | cut -c 1-30) 38 | # Adds timestamp to job_id to prevent repetition. 39 | JOB_ID="$JOB_ID"_"$timestamp" 40 | 41 | if [ "$MODE" == "bounded" ] 42 | then 43 | echo [LOGS: "$PROJECT_NAME"."$DATASET_NAME"."$SOURCE" Write Test in Bounded mode] Created JOB ID: "$JOB_ID" 44 | # Modify the destination table name. 45 | DESTINATION_TABLE_NAME="$SOURCE"-"$timestamp" 46 | if [ "$IS_SQL" == True ] 47 | then 48 | echo "SQL Mode is Enabled!" 49 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"-SQL 50 | fi 51 | if [ "$IS_EXACTLY_ONCE_ENABLED" == True ] 52 | then 53 | echo "Exactly once is Enabled!" 54 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"-EXO 55 | else 56 | echo "At least once is Enabled!" 57 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"-ALO 58 | fi 59 | source cloudbuild/nightly/scripts/bounded_table_write.sh "$PROPERTIES" "$SINK_PARALLELISM" "$IS_SQL" "$IS_EXACTLY_ONCE_ENABLED" "$ENABLE_TABLE_CREATION" 60 | elif [ "$MODE" == "unbounded" ] 61 | then 62 | echo [LOGS: "$PROJECT_NAME" "$SOURCE" Write Test in Unbounded Mode] Created JOB ID: "$JOB_ID" 63 | source cloudbuild/nightly/scripts/unbounded_table_write.sh "$PROPERTIES" "$timestamp" "$SINK_PARALLELISM" "$IS_SQL" "$IS_EXACTLY_ONCE_ENABLED" 64 | else 65 | echo "Invalid 'MODE' provided. Please provide 'bounded' or 'unbounded'!" 66 | exit 1 67 | fi 68 | 69 | # Now check the success of the job 70 | # Mode helps in checking for unbounded job separately. 71 | if [ "$IS_EXACTLY_ONCE_ENABLED" == True ] 72 | then 73 | echo "Asserting Exactly Once Result" 74 | python3 cloudbuild/nightly/scripts/python-scripts/assert_table_count.py -- --project_name "$PROJECT_NAME" --dataset_name "$DATASET_NAME" --source "$SOURCE" --destination_table_name "$DESTINATION_TABLE_NAME" --mode "$MODE" --is_exactly_once 75 | else 76 | echo "Asserting At-least Once Result" 77 | python3 cloudbuild/nightly/scripts/python-scripts/assert_table_count.py -- --project_name "$PROJECT_NAME" --dataset_name "$DATASET_NAME" --source "$SOURCE" --destination_table_name "$DESTINATION_TABLE_NAME" --mode "$MODE" 78 | fi 79 | ret=$? 80 | if [ $ret -ne 0 ] 81 | then 82 | echo Run Failed 83 | exit 1 84 | else 85 | echo Run Succeeded! 86 | fi 87 | 88 | -------------------------------------------------------------------------------- /cloudbuild/nightly/scripts/unbounded_table_write.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2022 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | PROPERTIES=$1 17 | timestamp=$2 18 | UNBOUNDED_JOB_SINK_PARALLELISM=$3 19 | IS_SQL=$4 20 | IS_EXACTLY_ONCE_ENABLED=$5 21 | 22 | NEW_GCS_SOURCE_URI="$GCS_SOURCE_URI"temp/"$timestamp"/ 23 | if [ "$IS_SQL" == True ] 24 | then 25 | NEW_GCS_SOURCE_URI="$NEW_GCS_SOURCE_URI"SQL/ 26 | fi 27 | if [ "$IS_EXACTLY_ONCE_ENABLED" == True ] 28 | then 29 | NEW_GCS_SOURCE_URI="$NEW_GCS_SOURCE_URI"EXO/ 30 | else 31 | NEW_GCS_SOURCE_URI="$NEW_GCS_SOURCE_URI"ALO/ 32 | fi 33 | # Copy Source File to a temp directory 34 | gcloud storage cp "$GCS_SOURCE_URI"source.csv "$NEW_GCS_SOURCE_URI"source.csv 35 | # Lifecycle policy of deletion in 1 day already set, no need to add expiration to this directory 36 | 37 | # Modify the destination table name for all tests. 38 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"_"$timestamp" 39 | 40 | if [ "$IS_SQL" == True ] 41 | then 42 | echo "SQL Mode is Enabled!" 43 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"_SQL 44 | fi 45 | if [ "$IS_EXACTLY_ONCE_ENABLED" == True ] 46 | then 47 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"_EXO 48 | else 49 | DESTINATION_TABLE_NAME="$DESTINATION_TABLE_NAME"_ALO 50 | fi 51 | # Create the destination table from hardcoded table schema. 52 | python3 cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py -- --project_name "$PROJECT_NAME" --dataset_name "$DATASET_NAME" --destination_table_name "$DESTINATION_TABLE_NAME" 53 | # Set the expiration time to 1 hour. 54 | bq update --expiration 3600 "$DATASET_NAME"."$DESTINATION_TABLE_NAME" 55 | 56 | # Running this job async to make sure it exits so that dynamic data can be added 57 | gcloud dataproc jobs submit flink --id "$JOB_ID" --jar="$GCS_JAR_LOCATION" --cluster="$CLUSTER_NAME" --region="$REGION" --properties="$PROPERTIES" --async -- --gcp-source-project "$PROJECT_NAME" --gcs-source-uri "$NEW_GCS_SOURCE_URI" --mode unbounded --file-discovery-interval "$FILE_DISCOVERY_INTERVAL" --gcp-dest-project "$PROJECT_NAME" --bq-dest-dataset "$DATASET_NAME" --bq-dest-table "$DESTINATION_TABLE_NAME" --sink-parallelism "$UNBOUNDED_JOB_SINK_PARALLELISM" --is-sql "$IS_SQL" --exactly-once "$IS_EXACTLY_ONCE_ENABLED" 58 | 59 | # Dynamically adding new files. This is timed 2.5 min wait for read and 5 min refresh time. 60 | python3 cloudbuild/nightly/scripts/python-scripts/insert_dynamic_files.py -- --project_name "$PROJECT_NAME" --gcs_source_uri "$NEW_GCS_SOURCE_URI" --refresh_interval "$FILE_DISCOVERY_INTERVAL" 61 | 62 | # Now the Dataproc job will automatically succeed after stipulated time (18 minutes hardcoded). 63 | # we wait for it to succeed or finish. 64 | gcloud dataproc jobs wait "$JOB_ID" --region "$REGION" --project "$PROJECT_NAME" 65 | 66 | # Explicitly deleting the newly created files in the GCS bucket 67 | gcloud storage rm --recursive "$NEW_GCS_SOURCE_URI" 68 | -------------------------------------------------------------------------------- /cloudbuild/presubmit/Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile creates an image for running presubmit tests. 2 | FROM openjdk:8 3 | -------------------------------------------------------------------------------- /cloudbuild/presubmit/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | # 1. Create a Docker image containing flink-bigquery-connector repo 3 | - name: 'gcr.io/cloud-builders/docker' 4 | id: 'docker-build' 5 | args: ['build', '--tag=gcr.io/$PROJECT_ID/dataproc-flink-bigquery-connector-presubmit', '-f', 'cloudbuild/presubmit/Dockerfile', '.'] 6 | 7 | # 2. Fetch maven and dependencies 8 | - name: 'gcr.io/$PROJECT_ID/dataproc-flink-bigquery-connector-presubmit' 9 | id: 'init' 10 | waitFor: ['docker-build'] 11 | entrypoint: 'bash' 12 | args: ['/workspace/cloudbuild/presubmit/presubmit.sh', 'init'] 13 | env: 14 | - 'CODECOV_TOKEN=${_CODECOV_TOKEN}' 15 | 16 | # 3. Run unit & integration tests 17 | - name: 'gcr.io/$PROJECT_ID/dataproc-flink-bigquery-connector-presubmit' 18 | id: 'unit-tests' 19 | waitFor: ['init'] 20 | entrypoint: 'bash' 21 | args: ['/workspace/cloudbuild/presubmit/presubmit.sh', 'tests'] 22 | env: 23 | - 'CODECOV_TOKEN=${_CODECOV_TOKEN}' 24 | 25 | # Tests take around 20 mins in general. 26 | timeout: 1800s 27 | 28 | options: 29 | machineType: 'N1_HIGHCPU_32' 30 | -------------------------------------------------------------------------------- /cloudbuild/presubmit/gcp-settings.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | google-maven-central 8 | GCS Maven Central mirror 9 | https://maven-central.storage-download.googleapis.com/maven2/ 10 | central 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /cloudbuild/presubmit/presubmit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2022 Google Inc. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the 'License'); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an 'AS IS' BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -euxo pipefail 18 | 19 | if [ -z "${CODECOV_TOKEN}" ]; then 20 | echo "missing environment variable CODECOV_TOKEN" 21 | exit 1 22 | fi 23 | 24 | readonly MVN="./mvnw -B -e -s /workspace/cloudbuild/presubmit/gcp-settings.xml -Dmaven.repo.local=/workspace/.repository" 25 | readonly STEP=$1 26 | 27 | cd /workspace 28 | 29 | case $STEP in 30 | # Download maven and all the dependencies 31 | init) 32 | $MVN clean install -DskipTests -Pflink_1.17 33 | exit 34 | ;; 35 | 36 | # Run unit & integration tests 37 | tests) 38 | $MVN clean clover:setup verify clover:aggregate clover:check clover:clover -Pflink_1.17,clover 39 | ;; 40 | 41 | *) 42 | echo "Unknown step $STEP" 43 | exit 1 44 | ;; 45 | esac 46 | 47 | # Upload test coverage report to Codecov 48 | bash <(curl -s https://codecov.io/bash) -K -F "${STEP}" 49 | 50 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/BigQueryDefaultSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink; 18 | 19 | import org.apache.flink.api.connector.sink2.SinkWriter; 20 | 21 | import com.google.cloud.flink.bigquery.services.BigQueryServicesImpl; 22 | import com.google.cloud.flink.bigquery.sink.writer.BigQueryDefaultWriter; 23 | 24 | /** 25 | * Sink to write data into a BigQuery table using {@link BigQueryDefaultWriter}. 26 | * 27 | *

Depending on the checkpointing mode, this sink offers the following consistency guarantees: 28 | *

  • {@link CheckpointingMode#EXACTLY_ONCE}: at-least-once write consistency. 29 | *
  • {@link CheckpointingMode#AT_LEAST_ONCE}: at-least-once write consistency. 30 | *
  • Checkpointing disabled (NOT RECOMMENDED!): no consistency guarantee. 31 | * 32 | * @param Type of input to sink. 33 | */ 34 | class BigQueryDefaultSink extends BigQueryBaseSink { 35 | 36 | BigQueryDefaultSink(BigQuerySinkConfig sinkConfig) { 37 | super(sinkConfig); 38 | traceId = BigQueryServicesImpl.generateTraceId("default"); 39 | } 40 | 41 | @Override 42 | public SinkWriter createWriter(InitContext context) { 43 | checkParallelism(context.getNumberOfParallelSubtasks()); 44 | return new BigQueryDefaultWriter<>( 45 | tablePath, 46 | connectOptions, 47 | schemaProvider, 48 | serializer, 49 | createTableOptions(), 50 | fatalizeSerializer, 51 | maxParallelism, 52 | traceId, 53 | context); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/BigQueryExactlyOnceSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink; 18 | 19 | import org.apache.flink.api.connector.sink2.Committer; 20 | import org.apache.flink.core.io.SimpleVersionedSerializer; 21 | 22 | import com.google.cloud.flink.bigquery.services.BigQueryServicesImpl; 23 | import com.google.cloud.flink.bigquery.sink.committer.BigQueryCommittable; 24 | import com.google.cloud.flink.bigquery.sink.committer.BigQueryCommittableSerializer; 25 | import com.google.cloud.flink.bigquery.sink.committer.BigQueryCommitter; 26 | import com.google.cloud.flink.bigquery.sink.writer.BigQueryBufferedWriter; 27 | import com.google.cloud.flink.bigquery.sink.writer.BigQueryWriterState; 28 | import com.google.cloud.flink.bigquery.sink.writer.BigQueryWriterStateSerializer; 29 | 30 | import java.util.Collection; 31 | import java.util.Comparator; 32 | import java.util.UUID; 33 | 34 | /** 35 | * Sink to write data into a BigQuery table using {@link BigQueryBufferedWriter}. 36 | * 37 | *

    Depending on the checkpointing mode, this writer offers the following consistency guarantees: 38 | *

  • {@link CheckpointingMode#EXACTLY_ONCE}: exactly-once write consistency. 39 | *
  • {@link CheckpointingMode#AT_LEAST_ONCE}: at-least-once write consistency. 40 | *
  • Checkpointing disabled (NOT RECOMMENDED!): no consistency guarantee. 41 | * 42 | * @param Type of input to sink. 43 | */ 44 | public class BigQueryExactlyOnceSink extends BigQueryBaseSink 45 | implements TwoPhaseCommittingStatefulSink { 46 | 47 | BigQueryExactlyOnceSink(BigQuerySinkConfig sinkConfig) { 48 | super(sinkConfig); 49 | traceId = BigQueryServicesImpl.generateTraceId(UUID.randomUUID().toString()); 50 | } 51 | 52 | @Override 53 | public PrecommittingStatefulSinkWriter 54 | createWriter(InitContext context) { 55 | checkParallelism(context.getNumberOfParallelSubtasks()); 56 | return new BigQueryBufferedWriter<>( 57 | tablePath, 58 | connectOptions, 59 | schemaProvider, 60 | serializer, 61 | createTableOptions(), 62 | fatalizeSerializer, 63 | maxParallelism, 64 | traceId, 65 | context); 66 | } 67 | 68 | @Override 69 | public PrecommittingStatefulSinkWriter 70 | restoreWriter(InitContext context, Collection recoveredState) { 71 | if (recoveredState == null || recoveredState.isEmpty()) { 72 | return createWriter(context); 73 | } 74 | // If multiple states are found, restore one with the latest checkpoint. 75 | BigQueryWriterState stateToRestore = 76 | recoveredState.stream() 77 | .max(Comparator.comparingLong(state -> state.getCheckpointId())) 78 | .get(); 79 | return new BigQueryBufferedWriter<>( 80 | stateToRestore.getStreamName(), 81 | stateToRestore.getStreamOffset(), 82 | tablePath, 83 | stateToRestore.getTotalRecordsSeen(), 84 | stateToRestore.getTotalRecordsWritten(), 85 | stateToRestore.getTotalRecordsCommitted(), 86 | connectOptions, 87 | schemaProvider, 88 | serializer, 89 | createTableOptions(), 90 | fatalizeSerializer, 91 | maxParallelism, 92 | traceId, 93 | context); 94 | } 95 | 96 | @Override 97 | public Committer createCommitter() { 98 | return new BigQueryCommitter(connectOptions); 99 | } 100 | 101 | @Override 102 | public SimpleVersionedSerializer getCommittableSerializer() { 103 | return new BigQueryCommittableSerializer(); 104 | } 105 | 106 | @Override 107 | public SimpleVersionedSerializer getWriterStateSerializer() { 108 | return new BigQueryWriterStateSerializer(); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/BigQuerySink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink; 18 | 19 | import org.apache.flink.api.connector.sink2.Sink; 20 | import org.apache.flink.connector.base.DeliveryGuarantee; 21 | 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | /** 26 | * Class wrapping BigQuery sinks with appropriate configurations. 27 | * 28 | *

    With {@link DeliveryGuarantee#AT_LEAST_ONCE}, the Sink added to Flink job will be {@link 29 | * BigQueryDefaultSink}. 30 | * 31 | *

    With {@link DeliveryGuarantee#EXACTLY_ONCE}, the Sink added to Flink job will be {@link 32 | * BigQueryExactlyOnceSink}. 33 | * 34 | *

    Eventual data consistency at destination is also dependent on checkpointing mode. Look at 35 | * {@link BigQueryDefaultSink} and {@link BigQueryExactlyOnceSink} for write consistencies offered 36 | * across combinations of {@link CheckpointingMode} and sink's {@link DeliveryGuarantee}. It is 37 | * recommended that checkpointing is enabled to avoid unexpected behavior. 38 | */ 39 | public class BigQuerySink { 40 | 41 | private static final Logger LOG = LoggerFactory.getLogger(BigQuerySink.class); 42 | 43 | public static Sink get(BigQuerySinkConfig sinkConfig) { 44 | if (sinkConfig.getDeliveryGuarantee() == DeliveryGuarantee.AT_LEAST_ONCE) { 45 | return new BigQueryDefaultSink<>(sinkConfig); 46 | } 47 | if (sinkConfig.getDeliveryGuarantee() == DeliveryGuarantee.EXACTLY_ONCE) { 48 | return new BigQueryExactlyOnceSink<>(sinkConfig); 49 | } 50 | LOG.error( 51 | "BigQuery sink does not support {} delivery guarantee. Use AT_LEAST_ONCE or EXACTLY_ONCE.", 52 | sinkConfig.getDeliveryGuarantee()); 53 | throw new UnsupportedOperationException( 54 | String.format("%s is not supported", sinkConfig.getDeliveryGuarantee())); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/TwoPhaseCommittingStatefulSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.connector.sink2.Sink; 21 | import org.apache.flink.api.connector.sink2.StatefulSink; 22 | import org.apache.flink.api.connector.sink2.TwoPhaseCommittingSink; 23 | 24 | import java.io.IOException; 25 | import java.util.Collection; 26 | 27 | /** 28 | * A combination of {@link TwoPhaseCommittingSink} and {@link StatefulSink}. 29 | * 30 | *

    Interface for a sink that supports TPC protocol and statefulness. 31 | * 32 | * @param Type of the sink's input. 33 | * @param Type of the sink writer's state. 34 | * @param Type of the committables. 35 | */ 36 | @Internal 37 | public interface TwoPhaseCommittingStatefulSink 38 | extends TwoPhaseCommittingSink, StatefulSink { 39 | 40 | @Override 41 | PrecommittingStatefulSinkWriter createWriter( 42 | Sink.InitContext context) throws IOException; 43 | 44 | @Override 45 | PrecommittingStatefulSinkWriter restoreWriter( 46 | Sink.InitContext context, Collection recoveredState) throws IOException; 47 | 48 | /** 49 | * A combination of {@link PrecommittingSinkWriter} and {@link StatefulSinkWriter}. 50 | * 51 | *

    Interface for a writer that supports TPC protocol and statefulness. 52 | * 53 | * @param Type of the sink's input. 54 | * @param Type of the sink writer's state. 55 | * @param Type of the committables. 56 | */ 57 | interface PrecommittingStatefulSinkWriter 58 | extends TwoPhaseCommittingSink.PrecommittingSinkWriter, 59 | StatefulSink.StatefulSinkWriter {} 60 | } 61 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/committer/BigQueryCommittable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.committer; 18 | 19 | import com.google.cloud.flink.bigquery.sink.state.BigQueryStreamState; 20 | 21 | /** 22 | * Information required for a commit operation, passed from {@link BigQueryBufferedWriter} to {@link 23 | * BigQueryCommitter}. 24 | */ 25 | public class BigQueryCommittable extends BigQueryStreamState { 26 | 27 | private final long producerId; 28 | 29 | public BigQueryCommittable(long producerId, String streamName, long streamOffset) { 30 | super(streamName, streamOffset); 31 | this.producerId = producerId; 32 | } 33 | 34 | public long getProducerId() { 35 | return producerId; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/committer/BigQueryCommittableSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.committer; 18 | 19 | import org.apache.flink.core.io.SimpleVersionedSerializer; 20 | 21 | import java.io.ByteArrayInputStream; 22 | import java.io.ByteArrayOutputStream; 23 | import java.io.DataInputStream; 24 | import java.io.DataOutputStream; 25 | import java.io.IOException; 26 | 27 | /** Serializer and deserializer for {@link BigQueryCommittable}. */ 28 | public class BigQueryCommittableSerializer 29 | implements SimpleVersionedSerializer { 30 | 31 | @Override 32 | public int getVersion() { 33 | return 1; 34 | } 35 | 36 | @Override 37 | public byte[] serialize(BigQueryCommittable committable) throws IOException { 38 | try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 39 | final DataOutputStream out = new DataOutputStream(baos)) { 40 | out.writeLong(committable.getProducerId()); 41 | out.writeUTF(committable.getStreamName()); 42 | out.writeLong(committable.getStreamOffset()); 43 | out.flush(); 44 | return baos.toByteArray(); 45 | } 46 | } 47 | 48 | @Override 49 | public BigQueryCommittable deserialize(int version, byte[] serialized) throws IOException { 50 | try (final ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 51 | final DataInputStream in = new DataInputStream(bais)) { 52 | final Long producerId = in.readLong(); 53 | final String streamName = in.readUTF(); 54 | final long streamOffset = in.readLong(); 55 | BigQueryCommittable committable = 56 | new BigQueryCommittable(producerId, streamName, streamOffset); 57 | return committable; 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/committer/BigQueryCommitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.committer; 18 | 19 | import org.apache.flink.api.connector.sink2.Committer; 20 | 21 | import com.google.api.gax.rpc.ApiException; 22 | import com.google.cloud.bigquery.storage.v1.FlushRowsResponse; 23 | import com.google.cloud.flink.bigquery.common.config.BigQueryConnectOptions; 24 | import com.google.cloud.flink.bigquery.common.exceptions.BigQueryConnectorException; 25 | import com.google.cloud.flink.bigquery.services.BigQueryServices; 26 | import com.google.cloud.flink.bigquery.services.BigQueryServicesFactory; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import java.io.Closeable; 31 | import java.io.IOException; 32 | import java.util.Collection; 33 | 34 | /** 35 | * Committer implementation for {@link BigQueryExactlyOnceSink}. 36 | * 37 | *

    The committer is responsible for committing records buffered in BigQuery write stream to 38 | * BigQuery table. 39 | */ 40 | public class BigQueryCommitter implements Committer, Closeable { 41 | 42 | private static final Logger LOG = LoggerFactory.getLogger(BigQueryCommitter.class); 43 | 44 | private final BigQueryConnectOptions connectOptions; 45 | 46 | public BigQueryCommitter(BigQueryConnectOptions connectOptions) { 47 | this.connectOptions = connectOptions; 48 | } 49 | 50 | @Override 51 | public void commit(Collection> commitRequests) { 52 | if (commitRequests.isEmpty()) { 53 | LOG.info("No committable found. Nothing to commit!"); 54 | return; 55 | } 56 | try (BigQueryServices.StorageWriteClient writeClient = 57 | BigQueryServicesFactory.instance(connectOptions).storageWrite()) { 58 | for (CommitRequest commitRequest : commitRequests) { 59 | BigQueryCommittable committable = commitRequest.getCommittable(); 60 | long producerId = committable.getProducerId(); 61 | String streamName = committable.getStreamName(); 62 | long streamOffset = committable.getStreamOffset(); 63 | LOG.info("Committing records appended by producer {}", producerId); 64 | LOG.debug( 65 | "Invoking flushRows API on stream {} till offset {}", 66 | streamName, 67 | streamOffset); 68 | FlushRowsResponse response = writeClient.flushRows(streamName, streamOffset); 69 | if (response.getOffset() != streamOffset) { 70 | LOG.error( 71 | "BigQuery FlushRows API failed. Returned offset {}, expected {}", 72 | response.getOffset(), 73 | streamOffset); 74 | throw new BigQueryConnectorException( 75 | String.format("Commit operation failed for producer %d", producerId)); 76 | } 77 | } 78 | } catch (IOException | ApiException e) { 79 | throw new BigQueryConnectorException("Commit operation failed", e); 80 | } 81 | } 82 | 83 | @Override 84 | public void close() { 85 | // No op. 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/exceptions/BigQuerySerializationException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.exceptions; 18 | 19 | /** This class wraps errors found during serialization of Flink records to BigQuery protos. */ 20 | public class BigQuerySerializationException extends Exception { 21 | 22 | public BigQuerySerializationException(String message) { 23 | super(message); 24 | } 25 | 26 | public BigQuerySerializationException(String message, Throwable error) { 27 | super(message, error); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/serializer/BigQueryProtoSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.serializer; 18 | 19 | import com.google.cloud.flink.bigquery.sink.exceptions.BigQuerySerializationException; 20 | import com.google.protobuf.ByteString; 21 | import org.apache.avro.Schema; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Base class for defining a Flink record to BigQuery proto serializer. 27 | * 28 | *

    One BigQueryProtoSerializer should correspond to a single BigQuery table. 29 | * 30 | * @param Type of records to be written to BigQuery. 31 | */ 32 | public abstract class BigQueryProtoSerializer implements Serializable { 33 | 34 | /** 35 | * Convert Flink record to proto ByteString compatible with BigQuery table. 36 | * 37 | * @param record Record to serialize. 38 | * @return ByteString. 39 | * @throws BigQuerySerializationException If serialization failed. 40 | */ 41 | public abstract ByteString serialize(IN record) throws BigQuerySerializationException; 42 | 43 | /** 44 | * Initializes the serializer with a BigQuery table schema. This will be called once for every 45 | * serializer instance before its first serialize call. 46 | * 47 | * @param schemaProvider BigQuery table's schema information. 48 | */ 49 | public void init(BigQuerySchemaProvider schemaProvider) {} 50 | 51 | /** 52 | * Derives Avro {@link Schema} describing the data record. This is primarily used by the sink to 53 | * infer schema for creating new destination BigQuery table if one doesn't already exist. 54 | * 55 | * @param record Record to check for schema 56 | * @return Schema. 57 | */ 58 | public abstract Schema getAvroSchema(IN record); 59 | } 60 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/serializer/BigQuerySchemaProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.serializer; 18 | 19 | import com.google.protobuf.DescriptorProtos.DescriptorProto; 20 | import com.google.protobuf.Descriptors.Descriptor; 21 | import org.apache.avro.Schema; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Interface to derive {@link Descriptor} for Generic Record serialization, along with access to 27 | * Avro {@link Schema} and {@link DescriptorProto}. 28 | */ 29 | public interface BigQuerySchemaProvider extends Serializable { 30 | 31 | /** 32 | * Returns a {@link DescriptorProto} object essential for obtaining Proto Rows Builder and 33 | * Descriptor instances. 34 | * 35 | * @return DescriptorProto 36 | */ 37 | DescriptorProto getDescriptorProto(); 38 | 39 | /** 40 | * Returns a {@link Descriptor} object essential for obtaining Dynamic Message instances. 41 | * 42 | * @return Descriptor 43 | */ 44 | Descriptor getDescriptor(); 45 | 46 | /** 47 | * Returns a {@link Schema} object required for obtaining Descriptor and DescriptorProto 48 | * instances. 49 | * 50 | * @return AvroSchema 51 | */ 52 | Schema getAvroSchema(); 53 | 54 | /** 55 | * Returns true if BigQuery table's schema is unknown, else false. Schema can be unknown due to 56 | * reasons like table does not exist before Flink job. 57 | * 58 | * @return boolean 59 | */ 60 | boolean schemaUnknown(); 61 | } 62 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/state/BigQueryStreamState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.state; 18 | 19 | /** State representation of a BigQuery write stream. */ 20 | public abstract class BigQueryStreamState { 21 | 22 | protected final String streamName; 23 | protected final long streamOffset; 24 | 25 | public BigQueryStreamState(String streamName, long streamOffset) { 26 | this.streamName = streamName; 27 | this.streamOffset = streamOffset; 28 | } 29 | 30 | public String getStreamName() { 31 | return streamName; 32 | } 33 | 34 | public long getStreamOffset() { 35 | return streamOffset; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/throttle/BigQueryWriterThrottler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.throttle; 18 | 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import java.util.concurrent.TimeUnit; 23 | 24 | /** 25 | * Throttler implementation for BigQuery sink writers. 26 | * 27 | *

    BigQuery APIs used by this sink's writers are subject BigQuery imposed quotas and limits. 28 | * 29 | *

    The createTable API is used if destination BigQuery table does not already exist, and this API 30 | * allows 10 QPS. 31 | * 32 | *

    The {@link BigQueryBufferedWriter} invokes BigQuery's CreateWriteStream API before its initial 33 | * write to a BigQuery table. This API expects a low QPS (~3) for best performance in steady state, 34 | * since write stream creation is an expensive operation. 35 | * 36 | *

    This throttler allocates writers into buckets which correspond to a specific "wait" duration 37 | * before invoking above BigQuery APIs. Given the distributed nature of Flink deployments, we aim to 38 | * achieve 3 QPS on a best effort basis. 39 | */ 40 | public class BigQueryWriterThrottler implements Throttler { 41 | 42 | private static final Logger LOG = LoggerFactory.getLogger(BigQueryWriterThrottler.class); 43 | private final int writerId; 44 | private final int maxBuckets; 45 | 46 | public BigQueryWriterThrottler(int writerId, int maxParallelism) { 47 | this.writerId = writerId; 48 | this.maxBuckets = maxParallelism / 3; 49 | } 50 | 51 | @Override 52 | public void throttle() { 53 | int waitSeconds = writerId % maxBuckets; 54 | LOG.debug("Throttling writer {} for {} second", writerId, waitSeconds); 55 | try { 56 | // Sleep does nothing if input is 0 or less. 57 | TimeUnit.SECONDS.sleep(waitSeconds); 58 | } catch (InterruptedException e) { 59 | LOG.warn("Throttle attempt interrupted in subtask {}", writerId); 60 | Thread.currentThread().interrupt(); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/throttle/Throttler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.throttle; 18 | 19 | /** Limits the rate at which an operation can be performed. */ 20 | public interface Throttler { 21 | 22 | /** Limits the rate by waiting if necessary. */ 23 | void throttle(); 24 | } 25 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/writer/BigQueryWriterState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.writer; 18 | 19 | import com.google.cloud.flink.bigquery.sink.state.BigQueryStreamState; 20 | 21 | /** State representation of a {@link BigQueryBufferedWriter}. */ 22 | public class BigQueryWriterState extends BigQueryStreamState { 23 | 24 | // Used for Flink metrics. 25 | private final long totalRecordsSeen; 26 | private final long totalRecordsWritten; 27 | private final long totalRecordsCommitted; 28 | private final long checkpointId; 29 | 30 | public BigQueryWriterState( 31 | String streamName, 32 | long streamOffset, 33 | long totalRecordsSeen, 34 | long totalRecordsWritten, 35 | long totalRecordsCommitted, 36 | long checkpointId) { 37 | super(streamName, streamOffset); 38 | this.totalRecordsSeen = totalRecordsSeen; 39 | this.totalRecordsWritten = totalRecordsWritten; 40 | this.totalRecordsCommitted = totalRecordsCommitted; 41 | this.checkpointId = checkpointId; 42 | } 43 | 44 | public long getTotalRecordsSeen() { 45 | return totalRecordsSeen; 46 | } 47 | 48 | public long getTotalRecordsWritten() { 49 | return totalRecordsWritten; 50 | } 51 | 52 | public long getTotalRecordsCommitted() { 53 | return totalRecordsCommitted; 54 | } 55 | 56 | public long getCheckpointId() { 57 | return checkpointId; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/writer/BigQueryWriterStateSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.writer; 18 | 19 | import org.apache.flink.core.io.SimpleVersionedSerializer; 20 | 21 | import java.io.ByteArrayInputStream; 22 | import java.io.ByteArrayOutputStream; 23 | import java.io.DataInputStream; 24 | import java.io.DataOutputStream; 25 | import java.io.IOException; 26 | 27 | /** Serializer and deserializer for {@link BigQueryWriterState}. */ 28 | public class BigQueryWriterStateSerializer 29 | implements SimpleVersionedSerializer { 30 | 31 | @Override 32 | public int getVersion() { 33 | return 1; 34 | } 35 | 36 | @Override 37 | public byte[] serialize(BigQueryWriterState state) throws IOException { 38 | try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 39 | final DataOutputStream out = new DataOutputStream(baos)) { 40 | out.writeUTF(state.getStreamName()); 41 | out.writeLong(state.getStreamOffset()); 42 | out.writeLong(state.getTotalRecordsSeen()); 43 | out.writeLong(state.getTotalRecordsWritten()); 44 | out.writeLong(state.getTotalRecordsCommitted()); 45 | out.writeLong(state.getCheckpointId()); 46 | out.flush(); 47 | return baos.toByteArray(); 48 | } 49 | } 50 | 51 | @Override 52 | public BigQueryWriterState deserialize(int version, byte[] serialized) throws IOException { 53 | try (final ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 54 | final DataInputStream in = new DataInputStream(bais)) { 55 | final String streamName = in.readUTF(); 56 | final long streamOffset = in.readLong(); 57 | final long totalRecordsSeen = in.readLong(); 58 | final long totalRecordsWritten = in.readLong(); 59 | final long totalRecordsCommitted = in.readLong(); 60 | final long checkpointId = in.readLong(); 61 | return new BigQueryWriterState( 62 | streamName, 63 | streamOffset, 64 | totalRecordsSeen, 65 | totalRecordsWritten, 66 | totalRecordsCommitted, 67 | checkpointId); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/sink/writer/CreateTableOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.writer; 18 | 19 | import com.google.cloud.bigquery.TimePartitioning; 20 | 21 | import java.util.List; 22 | 23 | /** Options for creating new BigQuery table. */ 24 | public class CreateTableOptions { 25 | 26 | private final boolean enableTableCreation; 27 | private final String partitionField; 28 | private final TimePartitioning.Type partitionType; 29 | private final long partitionExpirationMillis; 30 | private final List clusteredFields; 31 | private final String region; 32 | 33 | public CreateTableOptions( 34 | boolean enableTableCreation, 35 | String partitionField, 36 | TimePartitioning.Type partitionType, 37 | Long partitionExpirationMillis, 38 | List clusteredFields, 39 | String region) { 40 | this.enableTableCreation = enableTableCreation; 41 | this.partitionField = partitionField; 42 | this.partitionType = partitionType; 43 | if (partitionExpirationMillis == null) { 44 | this.partitionExpirationMillis = 0; 45 | } else { 46 | this.partitionExpirationMillis = partitionExpirationMillis; 47 | } 48 | this.clusteredFields = clusteredFields; 49 | this.region = region; 50 | } 51 | 52 | public boolean enableTableCreation() { 53 | return enableTableCreation; 54 | } 55 | 56 | public String getPartitionField() { 57 | return partitionField; 58 | } 59 | 60 | public TimePartitioning.Type getPartitionType() { 61 | return partitionType; 62 | } 63 | 64 | public long getPartitionExpirationMillis() { 65 | return partitionExpirationMillis; 66 | } 67 | 68 | public List getClusteredFields() { 69 | return clusteredFields; 70 | } 71 | 72 | public String getRegion() { 73 | return region; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/emitter/BigQueryRecordEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.emitter; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.connector.source.SourceOutput; 21 | import org.apache.flink.connector.base.source.reader.RecordEmitter; 22 | import org.apache.flink.util.Collector; 23 | 24 | import com.google.cloud.flink.bigquery.source.reader.BigQuerySourceReader; 25 | import com.google.cloud.flink.bigquery.source.reader.deserializer.BigQueryDeserializationSchema; 26 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplitState; 27 | import org.apache.avro.generic.GenericRecord; 28 | 29 | /** 30 | * The {@link RecordEmitter} implementation for {@link BigQuerySourceReader} .We would always update 31 | * the last consumed message id in this emitter. 32 | * 33 | * @param the emitted type. 34 | */ 35 | @Internal 36 | public class BigQueryRecordEmitter 37 | implements RecordEmitter { 38 | 39 | private final BigQueryDeserializationSchema deserializationSchema; 40 | private final SourceOutputWrapper sourceOutputWrapper; 41 | 42 | public BigQueryRecordEmitter( 43 | BigQueryDeserializationSchema deserializationSchema) { 44 | this.deserializationSchema = deserializationSchema; 45 | this.sourceOutputWrapper = new SourceOutputWrapper<>(); 46 | } 47 | 48 | @Override 49 | public void emitRecord( 50 | GenericRecord record, SourceOutput output, BigQuerySourceSplitState splitState) 51 | throws Exception { 52 | // Update current offset. 53 | splitState.updateOffset(); 54 | // Sink the record to source output. 55 | sourceOutputWrapper.setSourceOutput(output); 56 | deserializationSchema.deserialize(record, sourceOutputWrapper); 57 | } 58 | 59 | private static class SourceOutputWrapper implements Collector { 60 | private SourceOutput sourceOutput; 61 | 62 | @Override 63 | public void collect(T record) { 64 | sourceOutput.collect(record); 65 | } 66 | 67 | @Override 68 | public void close() {} 69 | 70 | private void setSourceOutput(SourceOutput sourceOutput) { 71 | this.sourceOutput = sourceOutput; 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/enumerator/BigQuerySourceEnumState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.enumerator; 18 | 19 | import org.apache.flink.annotation.PublicEvolving; 20 | 21 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplit; 22 | 23 | import java.util.ArrayList; 24 | import java.util.HashMap; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.Objects; 28 | 29 | /** The state representation for the BigQuery source enumerator. */ 30 | @PublicEvolving 31 | public class BigQuerySourceEnumState { 32 | 33 | private final List lastSeenPartitions; 34 | private final List remaniningTableStreams; 35 | private final List completedTableStreams; 36 | private final List remainingSourceSplits; 37 | private final Map assignedSourceSplits; 38 | private final Boolean initialized; 39 | 40 | public BigQuerySourceEnumState( 41 | List lastSeenPartitions, 42 | List remaniningTableStreams, 43 | List completedTableStreams, 44 | List remainingSourceSplits, 45 | Map assignedSourceSplits, 46 | Boolean initialized) { 47 | this.lastSeenPartitions = new ArrayList<>(lastSeenPartitions); 48 | this.remaniningTableStreams = new ArrayList<>(remaniningTableStreams); 49 | this.completedTableStreams = new ArrayList<>(completedTableStreams); 50 | this.remainingSourceSplits = new ArrayList<>(remainingSourceSplits); 51 | this.assignedSourceSplits = new HashMap<>(assignedSourceSplits); 52 | this.initialized = initialized; 53 | } 54 | 55 | public List getLastSeenPartitions() { 56 | return this.lastSeenPartitions; 57 | } 58 | 59 | public List getRemaniningTableStreams() { 60 | return remaniningTableStreams; 61 | } 62 | 63 | public List getCompletedTableStreams() { 64 | return completedTableStreams; 65 | } 66 | 67 | public List getRemainingSourceSplits() { 68 | return remainingSourceSplits; 69 | } 70 | 71 | public Map getAssignedSourceSplits() { 72 | return assignedSourceSplits; 73 | } 74 | 75 | public Boolean isInitialized() { 76 | return initialized; 77 | } 78 | 79 | public static BigQuerySourceEnumState initialState() { 80 | return new BigQuerySourceEnumState( 81 | new ArrayList<>(), 82 | new ArrayList<>(), 83 | new ArrayList<>(), 84 | new ArrayList<>(), 85 | new HashMap<>(), 86 | false); 87 | } 88 | 89 | @Override 90 | public int hashCode() { 91 | return Objects.hash( 92 | this.lastSeenPartitions, 93 | this.remaniningTableStreams, 94 | this.completedTableStreams, 95 | this.remainingSourceSplits, 96 | this.assignedSourceSplits, 97 | this.initialized); 98 | } 99 | 100 | @Override 101 | public boolean equals(Object obj) { 102 | if (this == obj) { 103 | return true; 104 | } 105 | if (obj == null) { 106 | return false; 107 | } 108 | if (getClass() != obj.getClass()) { 109 | return false; 110 | } 111 | final BigQuerySourceEnumState other = (BigQuerySourceEnumState) obj; 112 | return Objects.equals(this.lastSeenPartitions, other.lastSeenPartitions) 113 | && Objects.equals(this.remaniningTableStreams, other.remaniningTableStreams) 114 | && Objects.equals(this.completedTableStreams, other.completedTableStreams) 115 | && Objects.equals(this.remainingSourceSplits, other.remainingSourceSplits) 116 | && Objects.equals(this.assignedSourceSplits, other.assignedSourceSplits) 117 | && Objects.equals(this.initialized, other.initialized); 118 | } 119 | 120 | @Override 121 | public String toString() { 122 | return String.format( 123 | "BigQuerySourceEnumState{" 124 | + "lastSeenPartitions=%s" 125 | + ", remaniningTableStreams=%s" 126 | + ", completedTableStreams=%s" 127 | + ", remainingSourceSplits=%s" 128 | + ", assignedSourceSplits=%s" 129 | + ", initialized=%s}", 130 | lastSeenPartitions, 131 | remaniningTableStreams, 132 | completedTableStreams, 133 | remainingSourceSplits, 134 | assignedSourceSplits, 135 | initialized); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/reader/BigQuerySourceReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.reader; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.connector.source.SourceReaderContext; 21 | import org.apache.flink.configuration.Configuration; 22 | import org.apache.flink.connector.base.source.reader.RecordEmitter; 23 | import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds; 24 | import org.apache.flink.connector.base.source.reader.SingleThreadMultiplexSourceReaderBase; 25 | import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; 26 | import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue; 27 | 28 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplit; 29 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplitState; 30 | import org.apache.avro.generic.GenericRecord; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import java.util.Map; 35 | import java.util.function.Supplier; 36 | 37 | /** 38 | * The common BigQuery source reader for both ordered & unordered message consuming. 39 | * 40 | * @param The output message type for Flink. 41 | */ 42 | @Internal 43 | public class BigQuerySourceReader 44 | extends SingleThreadMultiplexSourceReaderBase< 45 | GenericRecord, OUT, BigQuerySourceSplit, BigQuerySourceSplitState> { 46 | private static final Logger LOG = LoggerFactory.getLogger(BigQuerySourceReader.class); 47 | 48 | public BigQuerySourceReader( 49 | FutureCompletingBlockingQueue> elementsQueue, 50 | Supplier> splitReaderSupplier, 51 | RecordEmitter recordEmitter, 52 | Configuration config, 53 | SourceReaderContext context) { 54 | super(elementsQueue, splitReaderSupplier, recordEmitter, config, context); 55 | } 56 | 57 | public BigQuerySourceReader( 58 | FutureCompletingBlockingQueue> elementsQueue, 59 | Supplier> splitReaderSupplier, 60 | RecordEmitter recordEmitter, 61 | SourceReaderContext context) { 62 | super(elementsQueue, splitReaderSupplier, recordEmitter, new Configuration(), context); 63 | } 64 | 65 | @Override 66 | public void start() { 67 | if (getNumberOfCurrentlyAssignedSplits() == 0) { 68 | context.sendSplitRequest(); 69 | } 70 | } 71 | 72 | @Override 73 | protected void onSplitFinished(Map finishedSplitIds) { 74 | for (BigQuerySourceSplitState splitState : finishedSplitIds.values()) { 75 | BigQuerySourceSplit sourceSplit = splitState.toBigQuerySourceSplit(); 76 | LOG.info("Read for split {} is completed.", sourceSplit.splitId()); 77 | } 78 | context.sendSplitRequest(); 79 | } 80 | 81 | @Override 82 | protected BigQuerySourceSplitState initializedState(BigQuerySourceSplit split) { 83 | return new BigQuerySourceSplitState(split); 84 | } 85 | 86 | @Override 87 | protected BigQuerySourceSplit toSplitType(String string, BigQuerySourceSplitState sst) { 88 | return sst.toBigQuerySourceSplit(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/reader/BigQuerySourceReaderContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.reader; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.connector.source.SourceEvent; 21 | import org.apache.flink.api.connector.source.SourceReaderContext; 22 | import org.apache.flink.configuration.Configuration; 23 | import org.apache.flink.metrics.groups.SourceReaderMetricGroup; 24 | import org.apache.flink.util.UserCodeClassLoader; 25 | 26 | import java.util.concurrent.atomic.AtomicLong; 27 | 28 | /** A {@link SourceReaderContext} proxy that adds limit and counts for state management. */ 29 | @Internal 30 | public class BigQuerySourceReaderContext implements SourceReaderContext { 31 | 32 | private final SourceReaderContext readerContext; 33 | private final AtomicLong readCount = new AtomicLong(0); 34 | private final int limit; 35 | 36 | public BigQuerySourceReaderContext(SourceReaderContext readerContext, int limit) { 37 | this.readerContext = readerContext; 38 | this.limit = limit; 39 | } 40 | 41 | @Override 42 | public SourceReaderMetricGroup metricGroup() { 43 | return readerContext.metricGroup(); 44 | } 45 | 46 | @Override 47 | public Configuration getConfiguration() { 48 | return readerContext.getConfiguration(); 49 | } 50 | 51 | @Override 52 | public String getLocalHostName() { 53 | return readerContext.getLocalHostName(); 54 | } 55 | 56 | @Override 57 | public int getIndexOfSubtask() { 58 | return readerContext.getIndexOfSubtask(); 59 | } 60 | 61 | @Override 62 | public void sendSplitRequest() { 63 | readerContext.sendSplitRequest(); 64 | } 65 | 66 | @Override 67 | public void sendSourceEventToCoordinator(SourceEvent sourceEvent) { 68 | readerContext.sendSourceEventToCoordinator(sourceEvent); 69 | } 70 | 71 | @Override 72 | public UserCodeClassLoader getUserCodeClassLoader() { 73 | return readerContext.getUserCodeClassLoader(); 74 | } 75 | 76 | public Long updateReadCount(Long newReads) { 77 | return readCount.addAndGet(newReads); 78 | } 79 | 80 | public Long currentReadCount() { 81 | return readCount.get(); 82 | } 83 | 84 | public boolean isLimitPushedDown() { 85 | return limit > 0; 86 | } 87 | 88 | public boolean willExceedLimit(int newReads) { 89 | return limit > 0 && (readCount.get() + newReads) >= limit; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/reader/deserializer/AvroDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.reader.deserializer; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.common.typeinfo.TypeInformation; 21 | import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo; 22 | 23 | import org.apache.avro.Schema; 24 | import org.apache.avro.generic.GenericRecord; 25 | 26 | /** 27 | * A simple Identity de-serialization for pipelines that just want {@link GenericRecord} as response 28 | * from BigQuery. 29 | */ 30 | @Internal 31 | public class AvroDeserializationSchema 32 | implements BigQueryDeserializationSchema { 33 | 34 | private final String avroSchemaString; 35 | 36 | public AvroDeserializationSchema(String avroSchemaString) { 37 | this.avroSchemaString = avroSchemaString; 38 | } 39 | 40 | @Override 41 | public GenericRecord deserialize(GenericRecord record) { 42 | return record; 43 | } 44 | 45 | @Override 46 | public TypeInformation getProducedType() { 47 | return new GenericRecordAvroTypeInfo(new Schema.Parser().parse(avroSchemaString)); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/reader/deserializer/AvroToRowDataDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.reader.deserializer; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.api.common.typeinfo.TypeInformation; 21 | import org.apache.flink.table.data.GenericRowData; 22 | import org.apache.flink.table.data.RowData; 23 | import org.apache.flink.table.types.logical.RowType; 24 | 25 | import com.google.cloud.flink.bigquery.common.exceptions.BigQueryConnectorException; 26 | import org.apache.avro.generic.GenericRecord; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | /** Simple implementation for the Deserialization schema (from Avro GenericRecord to RowData). */ 31 | @Internal 32 | public class AvroToRowDataDeserializationSchema 33 | implements BigQueryDeserializationSchema { 34 | private final AvroToRowDataConverters.AvroToRowDataConverter converter; 35 | private final TypeInformation typeInfo; 36 | private static final Logger LOG = 37 | LoggerFactory.getLogger(AvroToRowDataDeserializationSchema.class); 38 | 39 | public AvroToRowDataDeserializationSchema(RowType rowType, TypeInformation typeInfo) { 40 | this.converter = AvroToRowDataConverters.createRowConverter(rowType); 41 | this.typeInfo = typeInfo; 42 | } 43 | 44 | @Override 45 | public RowData deserialize(GenericRecord record) throws BigQueryConnectorException { 46 | try { 47 | return (GenericRowData) converter.convert(record); 48 | } catch (Exception e) { 49 | LOG.error( 50 | String.format( 51 | "Error in converting Avro Generic Record %s to Row Data.%nError: %s.%nCause:%s ", 52 | record.toString(), e.getMessage(), e.getCause())); 53 | throw new BigQueryConnectorException("Error in converting to Row Data", e); 54 | } 55 | } 56 | 57 | @Override 58 | public TypeInformation getProducedType() { 59 | return typeInfo; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/reader/deserializer/BigQueryDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.reader.deserializer; 18 | 19 | import org.apache.flink.annotation.PublicEvolving; 20 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable; 21 | import org.apache.flink.util.Collector; 22 | 23 | import com.google.cloud.flink.bigquery.common.exceptions.BigQueryConnectorException; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import java.io.Serializable; 28 | 29 | /** 30 | * A schema bridge for de-serializing the BigQuery's return types ({@code GenericRecord} or {@link 31 | * ArrowRecord}) into a flink managed instance. 32 | * 33 | * @param The input type to de-serialize. 34 | * @param The output record type for to sink for downstream processing. 35 | */ 36 | @PublicEvolving 37 | public interface BigQueryDeserializationSchema 38 | extends Serializable, ResultTypeQueryable { 39 | 40 | Logger LOG = LoggerFactory.getLogger(BigQueryDeserializationSchema.class); 41 | 42 | /** 43 | * De-serializes the IN type record. 44 | * 45 | * @param record The BSON document to de-serialize. 46 | * @return The de-serialized message as an object (null if the message cannot be de-serialized). 47 | * @throws BigQueryConnectorException In case of problems while de-serializing. 48 | */ 49 | OUT deserialize(IN record) throws BigQueryConnectorException; 50 | 51 | /** 52 | * De-serializes the IN type record. 53 | * 54 | *

    Can output multiple records through the {@link Collector}. Note that number and size of 55 | * the produced records should be relatively small. Depending on the source implementation 56 | * records can be buffered in memory or collecting records might delay emitting checkpoint 57 | * barrier. 58 | * 59 | * @param record The IN document to de-serialize. 60 | * @param out The collector to put the resulting messages. 61 | */ 62 | default void deserialize(IN record, Collector out) throws BigQueryConnectorException { 63 | OUT deserialize = deserialize(record); 64 | if (deserialize == null) { 65 | return; 66 | } 67 | try { 68 | out.collect(deserialize); 69 | } catch (Exception e) { 70 | LOG.error( 71 | String.format( 72 | "Failed to forward the deserialized record %s to the next operator.%nError %s%nCause %s", 73 | deserialize, e.getMessage(), e.getCause())); 74 | throw new BigQueryConnectorException( 75 | "Failed to forward the deserialized record to the next operator.", e); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/split/BigQuerySourceSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import org.apache.flink.annotation.PublicEvolving; 20 | import org.apache.flink.api.connector.source.SourceSplit; 21 | 22 | import java.io.Serializable; 23 | import java.util.Objects; 24 | 25 | /** A {@link SourceSplit} implementation for a BigQuery Read API stream. */ 26 | @PublicEvolving 27 | public class BigQuerySourceSplit implements SourceSplit, Serializable { 28 | 29 | private final String streamName; 30 | private final Long offset; 31 | 32 | public BigQuerySourceSplit(String streamName) { 33 | this.streamName = streamName; 34 | this.offset = 0L; 35 | } 36 | 37 | public BigQuerySourceSplit(String streamName, Long offset) { 38 | this.streamName = streamName; 39 | this.offset = offset; 40 | } 41 | 42 | @Override 43 | public String splitId() { 44 | return streamName; 45 | } 46 | 47 | public String getStreamName() { 48 | return streamName; 49 | } 50 | 51 | public Long getOffset() { 52 | return offset; 53 | } 54 | 55 | @Override 56 | public int hashCode() { 57 | return Objects.hash(this.streamName, this.offset); 58 | } 59 | 60 | @Override 61 | public boolean equals(Object obj) { 62 | if (this == obj) { 63 | return true; 64 | } 65 | if (obj == null) { 66 | return false; 67 | } 68 | if (getClass() != obj.getClass()) { 69 | return false; 70 | } 71 | final BigQuerySourceSplit other = (BigQuerySourceSplit) obj; 72 | if (!Objects.equals(this.streamName, other.streamName)) { 73 | return false; 74 | } 75 | return Objects.equals(this.offset, other.offset); 76 | } 77 | 78 | @Override 79 | public String toString() { 80 | return "BigQuerySourceSplit{" + "streamName=" + streamName + ", offset=" + offset + '}'; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/split/BigQuerySourceSplitSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.core.io.SimpleVersionedSerializer; 21 | 22 | import java.io.ByteArrayInputStream; 23 | import java.io.ByteArrayOutputStream; 24 | import java.io.DataInputStream; 25 | import java.io.DataOutputStream; 26 | import java.io.IOException; 27 | 28 | /** The {@link SimpleVersionedSerializer serializer} for {@link BigQuerySourceSplit}. */ 29 | @Internal 30 | public class BigQuerySourceSplitSerializer 31 | implements SimpleVersionedSerializer { 32 | 33 | public static final BigQuerySourceSplitSerializer INSTANCE = 34 | new BigQuerySourceSplitSerializer(); 35 | // This version should be bumped after modifying the source split or the enum states. 36 | public static final int VERSION = 0; 37 | 38 | private BigQuerySourceSplitSerializer() { 39 | // singleton instance 40 | } 41 | 42 | @Override 43 | public int getVersion() { 44 | return VERSION; 45 | } 46 | 47 | @Override 48 | public byte[] serialize(BigQuerySourceSplit obj) throws IOException { 49 | // VERSION 0 serialization 50 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 51 | DataOutputStream out = new DataOutputStream(baos)) { 52 | serializeBigQuerySourceSplit(out, obj); 53 | out.flush(); 54 | return baos.toByteArray(); 55 | } 56 | } 57 | 58 | @Override 59 | public BigQuerySourceSplit deserialize(int version, byte[] serialized) throws IOException { 60 | if (getVersion() != version) { 61 | throw new IllegalArgumentException( 62 | String.format( 63 | "The provided serializer version (%d) is not expected (expected : %s).", 64 | version, VERSION)); 65 | } 66 | // VERSION 0 deserialization 67 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 68 | DataInputStream in = new DataInputStream(bais)) { 69 | return deserializeBigQuerySourceSplit(version, in); 70 | } 71 | } 72 | 73 | public void serializeBigQuerySourceSplit(DataOutputStream out, BigQuerySourceSplit split) 74 | throws IOException { 75 | out.writeUTF(split.getStreamName()); 76 | out.writeLong(split.getOffset()); 77 | } 78 | 79 | public BigQuerySourceSplit deserializeBigQuerySourceSplit(int version, DataInputStream in) 80 | throws IOException { 81 | switch (version) { 82 | case VERSION: 83 | String streamName = in.readUTF(); 84 | long offset = in.readLong(); 85 | return new BigQuerySourceSplit(streamName, offset); 86 | default: 87 | throw new IOException("Unknown version: " + version); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/split/BigQuerySourceSplitState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | 21 | import java.util.Objects; 22 | 23 | /** BigQuery source split state for {@link BigQuerySourceSplit}. */ 24 | @Internal 25 | public class BigQuerySourceSplitState { 26 | private final BigQuerySourceSplit split; 27 | private Long offset; 28 | 29 | public BigQuerySourceSplitState(BigQuerySourceSplit split) { 30 | this.split = split; 31 | offset = split.getOffset(); 32 | } 33 | 34 | public BigQuerySourceSplit toBigQuerySourceSplit() { 35 | return new BigQuerySourceSplit(split.getStreamName(), offset); 36 | } 37 | 38 | public void updateOffset() { 39 | offset++; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return "BigQuerySourceSplitState{" + "split=" + split + ", offset=" + offset + '}'; 45 | } 46 | 47 | @Override 48 | public int hashCode() { 49 | return Objects.hash(this.split, this.offset); 50 | } 51 | 52 | @Override 53 | public boolean equals(Object obj) { 54 | if (this == obj) { 55 | return true; 56 | } 57 | if (obj == null) { 58 | return false; 59 | } 60 | if (getClass() != obj.getClass()) { 61 | return false; 62 | } 63 | final BigQuerySourceSplitState other = (BigQuerySourceSplitState) obj; 64 | if (!Objects.equals(this.split, other.split)) { 65 | return false; 66 | } 67 | return Objects.equals(this.offset, other.offset); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/java/com/google/cloud/flink/bigquery/source/split/assigner/BoundedSplitAssigner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split.assigner; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.util.Preconditions; 21 | 22 | import com.google.cloud.bigquery.storage.v1.DataFormat; 23 | import com.google.cloud.bigquery.storage.v1.ReadSession; 24 | import com.google.cloud.flink.bigquery.source.config.BigQueryReadOptions; 25 | import com.google.cloud.flink.bigquery.source.enumerator.BigQuerySourceEnumState; 26 | import com.google.cloud.flink.bigquery.source.split.SplitDiscoverer; 27 | 28 | /** 29 | * A bounded implementation for a split assigner based on the BigQuery {@link ReadSession} streams. 30 | */ 31 | @Internal 32 | public class BoundedSplitAssigner extends BigQuerySourceSplitAssigner { 33 | 34 | BoundedSplitAssigner(BigQueryReadOptions readOptions, BigQuerySourceEnumState sourceEnumState) { 35 | super(readOptions, sourceEnumState); 36 | } 37 | 38 | @Override 39 | public void discoverSplits() { 40 | 41 | this.remainingTableStreams.addAll( 42 | SplitDiscoverer.discoverSplits( 43 | this.readOptions.getBigQueryConnectOptions(), 44 | DataFormat.AVRO, 45 | this.readOptions.getColumnNames(), 46 | this.readOptions.getRowRestriction(), 47 | this.readOptions.getSnapshotTimestampInMillis(), 48 | this.readOptions.getMaxStreamCount())); 49 | } 50 | 51 | @Override 52 | public boolean noMoreSplits() { 53 | Preconditions.checkState( 54 | initialized, "The noMoreSplits method was called but not initialized."); 55 | return remainingTableStreams.isEmpty() && remainingSourceSplits.isEmpty(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | com.google.cloud.flink.bigquery.table.BigQueryDynamicTableFactory 17 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/main/resources/connector.properties: -------------------------------------------------------------------------------- 1 | connector=BigQuery 2 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/BigQuerySinkTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink; 18 | 19 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 20 | import org.apache.flink.api.common.restartstrategy.RestartStrategies.RestartStrategyConfiguration; 21 | import org.apache.flink.api.common.time.Time; 22 | import org.apache.flink.connector.base.DeliveryGuarantee; 23 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 24 | 25 | import com.google.cloud.flink.bigquery.fakes.StorageClientFaker; 26 | import com.google.cloud.flink.bigquery.sink.serializer.FakeBigQuerySerializer; 27 | import com.google.cloud.flink.bigquery.sink.serializer.TestBigQuerySchemas; 28 | import com.google.protobuf.ByteString; 29 | import org.junit.After; 30 | import org.junit.Before; 31 | import org.junit.Test; 32 | 33 | import static org.junit.Assert.assertTrue; 34 | 35 | /** Tests for {@link BigQuerySink}. */ 36 | public class BigQuerySinkTest { 37 | 38 | private StreamExecutionEnvironment env; 39 | 40 | private static final RestartStrategyConfiguration NO_RESTART_STRATEGY = 41 | RestartStrategies.noRestart(); 42 | private static final RestartStrategyConfiguration INVALID_FIXED_DELAY_RESTART_STRATEGY = 43 | RestartStrategies.fixedDelayRestart(20, Time.seconds(5)); 44 | 45 | @Before 46 | public void setUp() { 47 | env = new StreamExecutionEnvironment(); 48 | } 49 | 50 | @After 51 | public void tearDown() throws Exception { 52 | env.close(); 53 | } 54 | 55 | @Test 56 | public void testGet_withAtLeastOnce() { 57 | env.setRestartStrategy(NO_RESTART_STRATEGY); 58 | BigQuerySinkConfig sinkConfig = 59 | BigQuerySinkConfig.newBuilder() 60 | .connectOptions(StorageClientFaker.createConnectOptionsForWrite(null)) 61 | .schemaProvider(TestBigQuerySchemas.getSimpleRecordSchema()) 62 | .serializer(new FakeBigQuerySerializer(ByteString.copyFromUtf8("foo"))) 63 | .deliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 64 | .streamExecutionEnvironment(env) 65 | .build(); 66 | assertTrue(BigQuerySink.get(sinkConfig) instanceof BigQueryDefaultSink); 67 | } 68 | 69 | @Test 70 | public void testGet_withExactlyOnce() { 71 | env.setRestartStrategy(NO_RESTART_STRATEGY); 72 | BigQuerySinkConfig sinkConfig = 73 | BigQuerySinkConfig.newBuilder() 74 | .connectOptions(StorageClientFaker.createConnectOptionsForWrite(null)) 75 | .schemaProvider(TestBigQuerySchemas.getSimpleRecordSchema()) 76 | .serializer(new FakeBigQuerySerializer(ByteString.copyFromUtf8("foo"))) 77 | .deliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE) 78 | .streamExecutionEnvironment(env) 79 | .build(); 80 | assertTrue(BigQuerySink.get(sinkConfig) instanceof BigQueryExactlyOnceSink); 81 | } 82 | 83 | @Test(expected = UnsupportedOperationException.class) 84 | public void testGet_withNoneDeliveryGuarantee() { 85 | env.setRestartStrategy(NO_RESTART_STRATEGY); 86 | BigQuerySinkConfig sinkConfig = 87 | BigQuerySinkConfig.newBuilder() 88 | .connectOptions(StorageClientFaker.createConnectOptionsForWrite(null)) 89 | .schemaProvider(TestBigQuerySchemas.getSimpleRecordSchema()) 90 | .serializer(new FakeBigQuerySerializer(ByteString.copyFromUtf8("foo"))) 91 | .deliveryGuarantee(DeliveryGuarantee.NONE) 92 | .streamExecutionEnvironment(env) 93 | .build(); 94 | BigQuerySink.get(sinkConfig); 95 | } 96 | 97 | @Test(expected = IllegalArgumentException.class) 98 | public void testGet_withInvalidRestartStrategy() { 99 | env.setRestartStrategy(INVALID_FIXED_DELAY_RESTART_STRATEGY); 100 | BigQuerySinkConfig sinkConfig = 101 | BigQuerySinkConfig.newBuilder() 102 | .connectOptions(StorageClientFaker.createConnectOptionsForWrite(null)) 103 | .schemaProvider(TestBigQuerySchemas.getSimpleRecordSchema()) 104 | .serializer(new FakeBigQuerySerializer(ByteString.copyFromUtf8("foo"))) 105 | .deliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE) 106 | .streamExecutionEnvironment(env) 107 | .build(); 108 | assertTrue(BigQuerySink.get(sinkConfig) instanceof BigQueryExactlyOnceSink); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/client/BigQueryClientWithErrorHandlingTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.client; 18 | 19 | import com.google.cloud.bigquery.BigQueryException; 20 | import com.google.cloud.flink.bigquery.common.config.BigQueryConnectOptions; 21 | import com.google.cloud.flink.bigquery.common.exceptions.BigQueryConnectorException; 22 | import com.google.cloud.flink.bigquery.fakes.StorageClientFaker; 23 | import org.junit.After; 24 | import org.junit.Before; 25 | import org.junit.Test; 26 | import org.mockito.Mockito; 27 | 28 | import static com.google.common.truth.Truth.assertThat; 29 | import static org.junit.Assert.assertThrows; 30 | import static org.mockito.Mockito.when; 31 | 32 | /** Tests for {@link BigQueryClientWithErrorHandling}. */ 33 | public class BigQueryClientWithErrorHandlingTest { 34 | 35 | BigQueryException mockedException; 36 | 37 | @Before 38 | public void setUp() { 39 | mockedException = Mockito.mock(BigQueryException.class); 40 | } 41 | 42 | @After 43 | public void tearDown() { 44 | mockedException = null; 45 | } 46 | 47 | @Test 48 | public void testTableExistsError() { 49 | BigQueryConnectOptions options = 50 | StorageClientFaker.createConnectOptionsForQuery( 51 | false, Mockito.mock(BigQueryException.class), null, null); 52 | BigQueryConnectorException exception = 53 | assertThrows( 54 | BigQueryConnectorException.class, 55 | () -> BigQueryClientWithErrorHandling.tableExists(options)); 56 | assertThat(exception) 57 | .hasMessageThat() 58 | .contains("Unable to check existence of BigQuery table"); 59 | } 60 | 61 | @Test 62 | public void testCreateDataset_withBigQueryException() { 63 | when(mockedException.getCode()).thenReturn(400); 64 | BigQueryConnectOptions options = 65 | StorageClientFaker.createConnectOptionsForQuery(false, null, mockedException, null); 66 | BigQueryConnectorException exception = 67 | assertThrows( 68 | BigQueryConnectorException.class, 69 | () -> BigQueryClientWithErrorHandling.createDataset(options, "foo")); 70 | assertThat(exception).hasMessageThat().contains("Unable to create BigQuery dataset"); 71 | } 72 | 73 | @Test 74 | public void testCreateDataset_ignoreAlreadyExistsError() { 75 | when(mockedException.getCode()).thenReturn(409); 76 | BigQueryConnectOptions options = 77 | StorageClientFaker.createConnectOptionsForQuery(false, null, mockedException, null); 78 | BigQueryClientWithErrorHandling.createDataset(options, "foo"); 79 | } 80 | 81 | @Test 82 | public void testCreateTable_withBigQueryException() { 83 | when(mockedException.getCode()).thenReturn(400); 84 | BigQueryConnectOptions options = 85 | StorageClientFaker.createConnectOptionsForQuery(false, null, null, mockedException); 86 | BigQueryConnectorException exception = 87 | assertThrows( 88 | BigQueryConnectorException.class, 89 | () -> BigQueryClientWithErrorHandling.createTable(options, null)); 90 | assertThat(exception).hasMessageThat().contains("Unable to create BigQuery table"); 91 | } 92 | 93 | @Test 94 | public void testCreateTable_ignoreAlreadyExistsError() { 95 | when(mockedException.getCode()).thenReturn(409); 96 | BigQueryConnectOptions options = 97 | StorageClientFaker.createConnectOptionsForQuery(false, null, null, mockedException); 98 | BigQueryClientWithErrorHandling.createTable(options, null); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/committer/BigQueryCommittableSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.committer; 18 | 19 | import org.junit.Test; 20 | 21 | import java.io.IOException; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** Tests for {@link BigQueryCommittableSerializer}. */ 26 | public class BigQueryCommittableSerializerTest { 27 | 28 | private static final BigQueryCommittableSerializer INSTANCE = 29 | new BigQueryCommittableSerializer(); 30 | private static final BigQueryCommittable COMMITTABLE = new BigQueryCommittable(12, "foo", 1996); 31 | 32 | @Test 33 | public void testSerde() throws IOException { 34 | byte[] ser = INSTANCE.serialize(COMMITTABLE); 35 | BigQueryCommittable de = INSTANCE.deserialize(INSTANCE.getVersion(), ser); 36 | assertEquals(12, de.getProducerId()); 37 | assertEquals("foo", de.getStreamName()); 38 | assertEquals(1996, de.getStreamOffset()); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/committer/BigQueryCommitterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.committer; 18 | 19 | import org.apache.flink.api.connector.sink2.Committer.CommitRequest; 20 | 21 | import com.google.api.core.ApiFuture; 22 | import com.google.cloud.bigquery.storage.v1.FlushRowsResponse; 23 | import com.google.cloud.flink.bigquery.common.exceptions.BigQueryConnectorException; 24 | import com.google.cloud.flink.bigquery.fakes.StorageClientFaker; 25 | import org.junit.Test; 26 | 27 | import java.util.Collections; 28 | 29 | /** Tests for {@link BigQueryCommitter}. */ 30 | public class BigQueryCommitterTest { 31 | 32 | @Test 33 | public void testCommit_withEmptyCommitRequest() { 34 | BigQueryCommitter committer = createCommitter(null); 35 | // BQ write client used in this test throws a RuntimeException if flushRows is invoked with 36 | // flushRowsResponse set as null. Since flush should not be called, this commit should not 37 | // throw any exception. 38 | committer.commit(Collections.EMPTY_LIST); 39 | } 40 | 41 | @Test 42 | public void testCommit() { 43 | BigQueryCommitter committer = 44 | createCommitter(FlushRowsResponse.newBuilder().setOffset(10L).build()); 45 | committer.commit( 46 | Collections.singletonList( 47 | new TestCommitRequest(new BigQueryCommittable(1L, "foo", 10L)))); 48 | } 49 | 50 | @Test(expected = BigQueryConnectorException.class) 51 | public void testCommit_withOffsetMismatch() { 52 | BigQueryCommitter committer = 53 | createCommitter(FlushRowsResponse.newBuilder().setOffset(5L).build()); 54 | committer.commit( 55 | Collections.singletonList( 56 | new TestCommitRequest(new BigQueryCommittable(1L, "foo", 10L)))); 57 | } 58 | 59 | @Test(expected = BigQueryConnectorException.class) 60 | public void testCommit_withFlushRowsApiFailure() { 61 | // BQ write client used in this test throws a RuntimeException if flushRows is invoked with 62 | // flushRowsResponse set as null. The committer should wrap client errors in a 63 | // BigQueryConnectorException. 64 | BigQueryCommitter committer = createCommitter(null); 65 | committer.commit( 66 | Collections.singletonList( 67 | new TestCommitRequest(new BigQueryCommittable(1L, "foo", 10L)))); 68 | } 69 | 70 | private BigQueryCommitter createCommitter(FlushRowsResponse flushRowsResponse) { 71 | return new BigQueryCommitter( 72 | StorageClientFaker.createConnectOptionsForWrite( 73 | new ApiFuture[] {null}, null, flushRowsResponse, null)); 74 | } 75 | 76 | static class TestCommitRequest implements CommitRequest { 77 | 78 | private final BigQueryCommittable committable; 79 | 80 | TestCommitRequest(BigQueryCommittable committable) { 81 | this.committable = committable; 82 | } 83 | 84 | @Override 85 | public BigQueryCommittable getCommittable() { 86 | return committable; 87 | } 88 | 89 | @Override 90 | public int getNumberOfRetries() { 91 | return 0; 92 | } 93 | 94 | @Override 95 | public void signalFailedWithKnownReason(Throwable t) { 96 | // Do nothing. 97 | } 98 | 99 | @Override 100 | public void signalFailedWithUnknownReason(Throwable t) { 101 | // Do nothing. 102 | } 103 | 104 | @Override 105 | public void retryLater() { 106 | // Do nothing. 107 | } 108 | 109 | @Override 110 | public void updateAndRetryLater(BigQueryCommittable committable) { 111 | // Do nothing. 112 | } 113 | 114 | @Override 115 | public void signalAlreadyCommitted() { 116 | // Do nothing. 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/serializer/FakeBigQuerySerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.serializer; 18 | 19 | import com.google.cloud.flink.bigquery.sink.exceptions.BigQuerySerializationException; 20 | import com.google.protobuf.ByteString; 21 | import org.apache.avro.Schema; 22 | 23 | /** Mock serializer for Sink unit tests. */ 24 | public class FakeBigQuerySerializer extends BigQueryProtoSerializer { 25 | 26 | private static final FakeBigQuerySerializer EMPTY_SERIALIZER = 27 | new FakeBigQuerySerializer(null, null, false); 28 | private static final FakeBigQuerySerializer ERRING_SERIALIZER = 29 | new FakeBigQuerySerializer(null, null, true); 30 | 31 | private final ByteString serializeResult; 32 | private final boolean throwException; 33 | private final Schema avroSchema; 34 | 35 | public static FakeBigQuerySerializer getEmptySerializer() { 36 | return EMPTY_SERIALIZER; 37 | } 38 | 39 | public static FakeBigQuerySerializer getErringSerializer() { 40 | return ERRING_SERIALIZER; 41 | } 42 | 43 | public FakeBigQuerySerializer(ByteString serializeResponse) { 44 | this(serializeResponse, null, false); 45 | } 46 | 47 | public FakeBigQuerySerializer(ByteString serializeResponse, Schema avroSchema) { 48 | this(serializeResponse, avroSchema, false); 49 | } 50 | 51 | public FakeBigQuerySerializer( 52 | ByteString serializeResponse, Schema avroSchema, boolean throwException) { 53 | this.serializeResult = serializeResponse; 54 | this.avroSchema = avroSchema; 55 | this.throwException = throwException; 56 | } 57 | 58 | @Override 59 | public ByteString serialize(Object record) throws BigQuerySerializationException { 60 | if (throwException) { 61 | throw new BigQuerySerializationException("Fake error for testing"); 62 | } 63 | return serializeResult; 64 | } 65 | 66 | @Override 67 | public void init(BigQuerySchemaProvider schemaProvider) {} 68 | 69 | @Override 70 | public Schema getAvroSchema(Object record) { 71 | return avroSchema; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/serializer/TestSchemaProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.serializer; 18 | 19 | import com.google.protobuf.DescriptorProtos.DescriptorProto; 20 | import com.google.protobuf.Descriptors.Descriptor; 21 | import org.apache.avro.Schema; 22 | 23 | /** 24 | * Class inheriting {@link BigQuerySchemaProvider} for {@link AvroToProtoSerializerTest} and {@link 25 | * BigQuerySchemaProviderTest}. 26 | */ 27 | public class TestSchemaProvider implements BigQuerySchemaProvider { 28 | private final Schema schema; 29 | private final Descriptor descriptor; 30 | 31 | public TestSchemaProvider(Schema schema, Descriptor descriptor) { 32 | this.schema = schema; 33 | this.descriptor = descriptor; 34 | } 35 | 36 | @Override 37 | public DescriptorProto getDescriptorProto() { 38 | return getDescriptor().toProto(); 39 | } 40 | 41 | @Override 42 | public Descriptor getDescriptor() { 43 | return descriptor; 44 | } 45 | 46 | @Override 47 | public Schema getAvroSchema() { 48 | return schema; 49 | } 50 | 51 | @Override 52 | public boolean schemaUnknown() { 53 | return schema == null; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/throttle/BigQueryWriterThrottlerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.throttle; 18 | 19 | import org.junit.Test; 20 | 21 | import java.time.Duration; 22 | import java.time.Instant; 23 | 24 | import static org.junit.Assert.assertTrue; 25 | 26 | /** Tests for {@link WriteStreamCreationThrottler}. */ 27 | public class BigQueryWriterThrottlerTest { 28 | 29 | @Test 30 | public void testThrottle_defaultMaxParallelism() { 31 | Duration duration = invokeThrottle(3, 128); 32 | assertTrue(duration.toMillis() >= 3000L); 33 | assertTrue(duration.toMillis() < 4000L); 34 | } 35 | 36 | @Test 37 | public void testThrottle_multiRegionMaxParallelism() { 38 | Duration duration = invokeThrottle(171, 512); 39 | assertTrue(duration.toMillis() >= 1000L); 40 | assertTrue(duration.toMillis() < 2000L); 41 | } 42 | 43 | @Test 44 | public void testThrottle_withInterruptedException() { 45 | // Force interruption 46 | Thread.currentThread().interrupt(); 47 | Duration duration = invokeThrottle(3, 128); 48 | assertTrue(duration.toMillis() < 3000L); 49 | } 50 | 51 | @Test 52 | public void testThrottle_withInvalidWriterId_expectNoThrottling() { 53 | Duration duration = invokeThrottle(-1, 128); 54 | long waitSeconds = duration.toMillis() / 1000; 55 | assertTrue(waitSeconds == 0); 56 | } 57 | 58 | private Duration invokeThrottle(int writerId, int maxParallelism) { 59 | BigQueryWriterThrottler throttler = new BigQueryWriterThrottler(writerId, maxParallelism); 60 | Instant start = Instant.now(); 61 | throttler.throttle(); 62 | Instant end = Instant.now(); 63 | return Duration.between(start, end); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/sink/writer/BigQueryWriterStateSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.sink.writer; 18 | 19 | import org.junit.Test; 20 | 21 | import java.io.IOException; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** Tests for {@link BigQueryWriterStateSerializer}. */ 26 | public class BigQueryWriterStateSerializerTest { 27 | 28 | private static final BigQueryWriterStateSerializer INSTANCE = 29 | new BigQueryWriterStateSerializer(); 30 | private static final BigQueryWriterState STATE = 31 | new BigQueryWriterState("foo", 1996, 24000, 23000, 1996, 4); 32 | 33 | @Test 34 | public void testSerde() throws IOException { 35 | byte[] ser = INSTANCE.serialize(STATE); 36 | BigQueryWriterState de = INSTANCE.deserialize(INSTANCE.getVersion(), ser); 37 | assertEquals("foo", de.getStreamName()); 38 | assertEquals(1996, de.getStreamOffset()); 39 | assertEquals(24000, de.getTotalRecordsSeen()); 40 | assertEquals(23000, de.getTotalRecordsWritten()); 41 | assertEquals(1996, de.getTotalRecordsCommitted()); 42 | assertEquals(4, de.getCheckpointId()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/source/BigQuerySourceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source; 18 | 19 | import org.apache.flink.api.common.typeinfo.TypeInformation; 20 | import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo; 21 | 22 | import com.google.cloud.flink.bigquery.fakes.StorageClientFaker; 23 | import com.google.cloud.flink.bigquery.source.config.BigQueryReadOptions; 24 | import org.apache.avro.generic.GenericRecord; 25 | import org.junit.Test; 26 | 27 | import java.io.IOException; 28 | 29 | import static com.google.common.truth.Truth.assertThat; 30 | 31 | /** Tests for {@link BigQuerySource}. */ 32 | public class BigQuerySourceTest { 33 | 34 | @Test 35 | public void testReadAvros() throws IOException { 36 | BigQueryReadOptions readOptions = 37 | StorageClientFaker.createReadOptions( 38 | 10, 2, StorageClientFaker.SIMPLE_AVRO_SCHEMA_STRING); 39 | BigQuerySource source = BigQuerySource.readAvros(readOptions); 40 | TypeInformation expected = 41 | new GenericRecordAvroTypeInfo(StorageClientFaker.SIMPLE_AVRO_SCHEMA); 42 | assertThat(source.getDeserializationSchema().getProducedType()).isEqualTo(expected); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/source/enumerator/BigQuerySourceEnumStateSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.enumerator; 18 | 19 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplit; 20 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplitSerializer; 21 | import org.junit.Test; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.TreeMap; 28 | 29 | import static com.google.common.truth.Truth.assertThat; 30 | 31 | /** */ 32 | public class BigQuerySourceEnumStateSerializerTest { 33 | 34 | private BigQuerySourceEnumState create() { 35 | 36 | List partitions = new ArrayList<>(); 37 | 38 | partitions.add("20230801"); 39 | 40 | List remainingTableStreams = new ArrayList<>(); 41 | 42 | remainingTableStreams.add("third stream"); 43 | remainingTableStreams.add("fourth stream"); 44 | remainingTableStreams.add("fifth stream"); 45 | 46 | List completedTableStreams = new ArrayList<>(); 47 | completedTableStreams.add("first stream"); 48 | 49 | List remainingSourceSplits = new ArrayList<>(); 50 | remainingSourceSplits.add(new BigQuerySourceSplit("second stream", 0L)); 51 | 52 | Map assignedSourceSplits = new TreeMap<>(); 53 | assignedSourceSplits.put("key1", remainingSourceSplits.get(0)); 54 | 55 | return new BigQuerySourceEnumState( 56 | partitions, 57 | remainingTableStreams, 58 | completedTableStreams, 59 | remainingSourceSplits, 60 | assignedSourceSplits, 61 | true); 62 | } 63 | 64 | @Test 65 | public void testEnumStateSerializerInitialState() throws IOException { 66 | BigQuerySourceEnumState initialState = BigQuerySourceEnumState.initialState(); 67 | 68 | byte[] serialized = BigQuerySourceEnumStateSerializer.INSTANCE.serialize(initialState); 69 | 70 | BigQuerySourceEnumState enumState1 = 71 | BigQuerySourceEnumStateSerializer.INSTANCE.deserialize( 72 | BigQuerySourceSplitSerializer.VERSION, serialized); 73 | 74 | assertThat(initialState).isEqualTo(enumState1); 75 | assertThat(initialState.hashCode()).isEqualTo(enumState1.hashCode()); 76 | } 77 | 78 | @Test 79 | public void testEnumStateSerializer() throws IOException { 80 | BigQuerySourceEnumState enumState = create(); 81 | 82 | byte[] serialized = BigQuerySourceEnumStateSerializer.INSTANCE.serialize(enumState); 83 | 84 | BigQuerySourceEnumState enumState1 = 85 | BigQuerySourceEnumStateSerializer.INSTANCE.deserialize( 86 | BigQuerySourceSplitSerializer.VERSION, serialized); 87 | 88 | assertThat(enumState).isEqualTo(enumState1); 89 | } 90 | 91 | @Test(expected = IllegalArgumentException.class) 92 | public void testWrongSerializerVersion() throws IOException { 93 | BigQuerySourceEnumState enumState = create(); 94 | 95 | byte[] serialized = BigQuerySourceEnumStateSerializer.INSTANCE.serialize(enumState); 96 | 97 | BigQuerySourceEnumStateSerializer.INSTANCE.deserialize(1000, serialized); 98 | 99 | // should never reach here 100 | assertThat(false).isTrue(); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/source/split/BigQuerySourceSplitSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import org.junit.Test; 20 | 21 | import java.io.IOException; 22 | 23 | import static com.google.common.truth.Truth.assertThat; 24 | 25 | /** */ 26 | public class BigQuerySourceSplitSerializerTest { 27 | 28 | @Test 29 | public void testSplitSerializer() throws IOException { 30 | BigQuerySourceSplit split = new BigQuerySourceSplit("some stream name", 10L); 31 | 32 | byte[] serialized = BigQuerySourceSplitSerializer.INSTANCE.serialize(split); 33 | 34 | BigQuerySourceSplit split1 = 35 | BigQuerySourceSplitSerializer.INSTANCE.deserialize( 36 | BigQuerySourceSplitSerializer.VERSION, serialized); 37 | 38 | assertThat(split).isEqualTo(split1); 39 | } 40 | 41 | @Test(expected = IllegalArgumentException.class) 42 | public void testWrongSerializerVersion() throws IOException { 43 | BigQuerySourceSplit split = new BigQuerySourceSplit("some stream name", 10L); 44 | 45 | byte[] serialized = BigQuerySourceSplitSerializer.INSTANCE.serialize(split); 46 | 47 | BigQuerySourceSplitSerializer.INSTANCE.deserialize(1000, serialized); 48 | 49 | // should never reach here 50 | assertThat(true).isFalse(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/source/split/BigQuerySourceSplitStateTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import org.junit.Test; 20 | 21 | import static com.google.common.truth.Truth.assertThat; 22 | 23 | /** */ 24 | public class BigQuerySourceSplitStateTest { 25 | 26 | @Test 27 | public void testSplitStateTransformation() { 28 | 29 | String streamName = "somestream"; 30 | BigQuerySourceSplit originalSplit = new BigQuerySourceSplit(streamName, 10L); 31 | assertThat(originalSplit.splitId()).isEqualTo(streamName); 32 | 33 | BigQuerySourceSplitState splitState = new BigQuerySourceSplitState(originalSplit); 34 | assertThat(splitState.toBigQuerySourceSplit()).isEqualTo(originalSplit); 35 | assertThat(splitState) 36 | .isEqualTo(new BigQuerySourceSplitState(splitState.toBigQuerySourceSplit())); 37 | } 38 | 39 | @Test 40 | public void testSplitsEquals() { 41 | 42 | String streamName1 = "somestream"; 43 | BigQuerySourceSplit split1 = new BigQuerySourceSplit(streamName1, 10L); 44 | String streamName2 = "somestream"; 45 | BigQuerySourceSplit split2 = new BigQuerySourceSplit(streamName2, 10L); 46 | assertThat(split1).isEqualTo(split2); 47 | 48 | BigQuerySourceSplitState splitState1 = new BigQuerySourceSplitState(split1); 49 | BigQuerySourceSplitState splitState2 = new BigQuerySourceSplitState(split2); 50 | assertThat(splitState1).isEqualTo(splitState2); 51 | 52 | BigQuerySourceSplit split3 = new BigQuerySourceSplit(streamName2, 11L); 53 | assertThat(split1).isNotEqualTo(split3); 54 | 55 | BigQuerySourceSplitState splitState3 = new BigQuerySourceSplitState(split3); 56 | assertThat(splitState1).isNotEqualTo(splitState3); 57 | } 58 | 59 | @Test 60 | public void testSplitStateMutation() { 61 | 62 | String streamName = "somestream"; 63 | BigQuerySourceSplit originalSplit = new BigQuerySourceSplit(streamName, 10L); 64 | BigQuerySourceSplitState splitState = new BigQuerySourceSplitState(originalSplit); 65 | 66 | splitState.updateOffset(); 67 | BigQuerySourceSplit otherSplit = new BigQuerySourceSplit(streamName, 11L); 68 | 69 | assertThat(splitState.toBigQuerySourceSplit()).isEqualTo(otherSplit); 70 | assertThat(splitState.toBigQuerySourceSplit().hashCode()).isEqualTo(otherSplit.hashCode()); 71 | // should be different since they started from different splits 72 | assertThat(splitState).isNotEqualTo(new BigQuerySourceSplitState(otherSplit)); 73 | assertThat(splitState.hashCode()) 74 | .isNotEqualTo(new BigQuerySourceSplitState(otherSplit).hashCode()); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/source/split/assigner/BigQuerySourceSplitAssignerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split.assigner; 18 | 19 | import com.google.cloud.flink.bigquery.fakes.StorageClientFaker; 20 | import com.google.cloud.flink.bigquery.source.config.BigQueryReadOptions; 21 | import com.google.cloud.flink.bigquery.source.enumerator.BigQuerySourceEnumState; 22 | import com.google.cloud.flink.bigquery.source.split.BigQuerySourceSplit; 23 | import com.google.common.truth.Truth8; 24 | import org.junit.Before; 25 | import org.junit.Test; 26 | 27 | import java.io.IOException; 28 | import java.util.Arrays; 29 | import java.util.Optional; 30 | 31 | import static com.google.common.truth.Truth.assertThat; 32 | 33 | /** */ 34 | public class BigQuerySourceSplitAssignerTest { 35 | 36 | private BigQueryReadOptions readOptions; 37 | 38 | @Before 39 | public void beforeTest() throws IOException { 40 | this.readOptions = 41 | StorageClientFaker.createReadOptions( 42 | 0, 2, StorageClientFaker.SIMPLE_AVRO_SCHEMA_STRING); 43 | } 44 | 45 | @Test 46 | public void testBoundedAssignment() { 47 | // initialize the assigner with default options since we are faking the bigquery services 48 | BigQuerySourceSplitAssigner assigner = 49 | BigQuerySourceSplitAssigner.createBounded( 50 | this.readOptions, BigQuerySourceEnumState.initialState()); 51 | // request the retrieval of the bigquery table info 52 | assigner.openAndDiscoverSplits(); 53 | 54 | // should retrieve the first split representing the firt stream 55 | Optional maybeSplit = assigner.getNext(); 56 | Truth8.assertThat(maybeSplit).isPresent(); 57 | // should retrieve the second split representing the second stream 58 | maybeSplit = assigner.getNext(); 59 | Truth8.assertThat(maybeSplit).isPresent(); 60 | BigQuerySourceSplit split = maybeSplit.get(); 61 | // no more splits should be available 62 | maybeSplit = assigner.getNext(); 63 | Truth8.assertThat(maybeSplit).isEmpty(); 64 | assertThat(assigner.noMoreSplits()).isTrue(); 65 | // lets check on the enum state 66 | BigQuerySourceEnumState state = assigner.snapshotState(0); 67 | assertThat(state.getRemaniningTableStreams()).isEmpty(); 68 | assertThat(state.getRemainingSourceSplits()).isEmpty(); 69 | // add some splits back 70 | assigner.addSplitsBack(Arrays.asList(split)); 71 | // check again on the enum state 72 | state = assigner.snapshotState(0); 73 | assertThat(state.getRemaniningTableStreams()).isEmpty(); 74 | assertThat(state.getRemainingSourceSplits()).isNotEmpty(); 75 | // empty it again and check 76 | assigner.getNext(); 77 | maybeSplit = assigner.getNext(); 78 | Truth8.assertThat(maybeSplit).isEmpty(); 79 | assertThat(assigner.noMoreSplits()).isTrue(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/java/com/google/cloud/flink/bigquery/table/config/BigQuerySinkTableConfigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 The Apache Software Foundation. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.table.config; 18 | 19 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 20 | import org.apache.flink.connector.base.DeliveryGuarantee; 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | 23 | import org.junit.Test; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** Tests for {@link BigQuerySinkTableConfig}. */ 28 | public class BigQuerySinkTableConfigTest { 29 | 30 | @Test 31 | public void testConstructor_withAtLeastOnce() { 32 | BigQuerySinkTableConfig config = 33 | BigQuerySinkTableConfig.newBuilder() 34 | .project("foo") 35 | .dataset("bar") 36 | .table("qux") 37 | .build(); 38 | assertEquals("foo", config.getProject()); 39 | assertEquals("bar", config.getDataset()); 40 | assertEquals("qux", config.getTable()); 41 | } 42 | 43 | @Test 44 | public void testConstructor_withExactlyOnce() { 45 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 46 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 300000L)); 47 | BigQuerySinkTableConfig config = 48 | BigQuerySinkTableConfig.newBuilder() 49 | .project("foo") 50 | .dataset("bar") 51 | .table("qux") 52 | .deliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE) 53 | .streamExecutionEnvironment(env) 54 | .build(); 55 | assertEquals("foo", config.getProject()); 56 | assertEquals("bar", config.getDataset()); 57 | assertEquals("qux", config.getTable()); 58 | } 59 | 60 | @Test(expected = IllegalArgumentException.class) 61 | public void testConstructor_withExactlyOnce_withoutStreamExecutionEnv() { 62 | BigQuerySinkTableConfig.newBuilder() 63 | .project("foo") 64 | .dataset("bar") 65 | .table("qux") 66 | .deliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE) 67 | .build(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /flink-1.17-connector-bigquery/flink-connector-bigquery/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Set root logger level to OFF to not flood build logs 20 | # set manually to INFO for debugging purposes 21 | rootLogger.level = OFF 22 | rootLogger.appenderRef.test.ref = TestLogger 23 | 24 | appender.testlogger.name = TestLogger 25 | appender.testlogger.type = CONSOLE 26 | appender.testlogger.target = SYSTEM_ERR 27 | appender.testlogger.layout.type = PatternLayout 28 | appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n 29 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 21 | 22 | 4.0.0 23 | 24 | 25 | com.google.cloud.flink 26 | flink-connector-bigquery-parent 27 | ${revision} 28 | 29 | 30 | flink-connector-bigquery-common 31 | Flink : Connectors : Google BigQuery Common 32 | ${revision} 33 | 34 | jar 35 | 36 | 37 | 38 | Apache License, Version 2.0 39 | http://www.apache.org/licenses/LICENSE-2.0.txt 40 | repo 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | com.google.cloud 49 | google-cloud-bigquerystorage 50 | 51 | 52 | 53 | 54 | com.google.cloud 55 | google-cloud-bigquery 56 | 57 | 58 | 59 | 60 | 61 | com.google.guava 62 | guava 63 | 64 | 65 | 66 | 67 | org.apache.avro 68 | avro 69 | 70 | 71 | 72 | 73 | org.assertj 74 | assertj-core 75 | 76 | 77 | 78 | org.apache.commons 79 | commons-lang3 80 | 81 | 82 | 83 | org.junit.vintage 84 | junit-vintage-engine 85 | ${junit5.version} 86 | 87 | 88 | 89 | org.apache.flink 90 | flink-annotations 91 | 92 | 95 | 96 | org.apache.flink 97 | flink-core 98 | 99 | 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-jar-plugin 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-surefire-plugin 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/common/config/CredentialsOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.common.config; 18 | 19 | import org.apache.flink.annotation.PublicEvolving; 20 | 21 | import com.google.auth.Credentials; 22 | import com.google.auto.value.AutoValue; 23 | import com.google.cloud.flink.bigquery.common.utils.GoogleCredentialsSupplier; 24 | 25 | import javax.annotation.Nullable; 26 | 27 | import java.io.Serializable; 28 | import java.util.Objects; 29 | import java.util.Optional; 30 | 31 | /** An options object that covers the possible {@link Credentials} configurations. */ 32 | @AutoValue 33 | @PublicEvolving 34 | public abstract class CredentialsOptions implements Serializable { 35 | 36 | @Nullable 37 | public abstract String getCredentialsFile(); 38 | 39 | @Nullable 40 | public abstract String getCredentialsKey(); 41 | 42 | @Nullable 43 | public abstract String getAccessToken(); 44 | 45 | /** 46 | * Returns the Google Credentials created given the provided configuration. 47 | * 48 | * @return The Google Credentials instance. 49 | */ 50 | public Credentials getCredentials() { 51 | return GoogleCredentialsSupplier.supplyCredentialsFromSources( 52 | Optional.ofNullable(getAccessToken()), 53 | Optional.ofNullable(getCredentialsKey()), 54 | Optional.ofNullable(getCredentialsFile())); 55 | } 56 | 57 | @Override 58 | public final int hashCode() { 59 | int hash = 5; 60 | hash = 61 * hash + Objects.hashCode(getCredentialsFile()); 61 | hash = 61 * hash + Objects.hashCode(getCredentialsKey()); 62 | hash = 61 * hash + Objects.hashCode(getAccessToken()); 63 | return hash; 64 | } 65 | 66 | @Override 67 | public final boolean equals(Object obj) { 68 | if (this == obj) { 69 | return true; 70 | } 71 | if (obj == null) { 72 | return false; 73 | } 74 | if (getClass() != obj.getClass()) { 75 | return false; 76 | } 77 | final CredentialsOptions other = (CredentialsOptions) obj; 78 | return Objects.equals(this.getCredentialsFile(), other.getCredentialsFile()) 79 | && Objects.equals(this.getCredentialsKey(), other.getCredentialsKey()) 80 | && Objects.equals(this.getAccessToken(), other.getAccessToken()); 81 | } 82 | 83 | /** 84 | * Creates a builder class for the {@link CredentialsOptions} class. 85 | * 86 | * @return A builder class. 87 | */ 88 | public static CredentialsOptions.Builder builder() { 89 | return new AutoValue_CredentialsOptions.Builder(); 90 | } 91 | 92 | /** A builder class for the {@link CredentialsOptions} class. */ 93 | @AutoValue.Builder 94 | public abstract static class Builder { 95 | 96 | /** 97 | * Sets the credentials using a file system location. 98 | * 99 | * @param credentialsFile the path of the credentials file. 100 | * @return this builder's instance 101 | */ 102 | public abstract Builder setCredentialsFile(String credentialsFile); 103 | 104 | /** 105 | * Sets the credentials using a credentials key, encoded in Base64. 106 | * 107 | * @param credentialsKey The credentials key. 108 | * @return this builder's instance 109 | */ 110 | public abstract Builder setCredentialsKey(String credentialsKey); 111 | 112 | /** 113 | * Sets the credentials using a GCP access token. 114 | * 115 | * @param credentialsToken The GCP access token. 116 | * @return this builder's instance 117 | */ 118 | public abstract Builder setAccessToken(String credentialsToken); 119 | 120 | /** 121 | * Builds a fully initialized {@link CredentialsOptions} instance. 122 | * 123 | * @return The {@link CredentialsOptions} instance. 124 | */ 125 | public abstract CredentialsOptions build(); 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/common/exceptions/BigQueryConnectorException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.common.exceptions; 18 | 19 | /** Represents a generic error during the execution of the connector's code. */ 20 | public class BigQueryConnectorException extends RuntimeException { 21 | 22 | public BigQueryConnectorException(String message) { 23 | super(message); 24 | } 25 | 26 | public BigQueryConnectorException(String message, Throwable cause) { 27 | super(message, cause); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/common/utils/BigQueryStateSerde.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.common.utils; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.util.function.BiConsumerWithException; 21 | import org.apache.flink.util.function.FunctionWithException; 22 | 23 | import java.io.DataInputStream; 24 | import java.io.DataOutputStream; 25 | import java.io.IOException; 26 | import java.util.ArrayList; 27 | import java.util.HashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | 31 | /** A utility class with some helper method for serde in the BigQuery source and its state. */ 32 | @Internal 33 | public class BigQueryStateSerde { 34 | 35 | /** Private constructor for utility class. */ 36 | private BigQueryStateSerde() {} 37 | 38 | /** 39 | * Serializes a list of data and writes it into a data output stream. 40 | * 41 | * @param The type of the list's elements. 42 | * @param out The data output stream. 43 | * @param list The data to be serialized. 44 | * @param serializer The serialization function of the list's elements. 45 | * @throws IOException In case of serialization or stream write problems. 46 | */ 47 | public static void serializeList( 48 | DataOutputStream out, 49 | List list, 50 | BiConsumerWithException serializer) 51 | throws IOException { 52 | out.writeInt(list.size()); 53 | for (T t : list) { 54 | serializer.accept(out, t); 55 | } 56 | } 57 | 58 | /** 59 | * De-serializes a list from the data input stream. 60 | * 61 | * @param The type of the list's elements. 62 | * @param in the data input stream. 63 | * @param deserializer the de-serialization function for the list's elements. 64 | * @return A fully initialized list with elements de-serialized from the data input stream. 65 | * @throws IOException In case of de-serialization or read problems. 66 | */ 67 | public static List deserializeList( 68 | DataInputStream in, FunctionWithException deserializer) 69 | throws IOException { 70 | int size = in.readInt(); 71 | List list = new ArrayList<>(size); 72 | for (int i = 0; i < size; i++) { 73 | T t = deserializer.apply(in); 74 | list.add(t); 75 | } 76 | 77 | return list; 78 | } 79 | 80 | /** 81 | * Serializes a map of data and writes it into a data output stream. 82 | * 83 | * @param The type of the map's keys. 84 | * @param The type of the map's values. 85 | * @param out The data output stream. 86 | * @param map The data output stream. 87 | * @param keySerializer Serialization function for the map's keys. 88 | * @param valueSerializer Serialization function for the map's values. 89 | * @throws IOException In case of serialization or stream write problems. 90 | */ 91 | public static void serializeMap( 92 | DataOutputStream out, 93 | Map map, 94 | BiConsumerWithException keySerializer, 95 | BiConsumerWithException valueSerializer) 96 | throws IOException { 97 | out.writeInt(map.size()); 98 | for (Map.Entry entry : map.entrySet()) { 99 | keySerializer.accept(out, entry.getKey()); 100 | valueSerializer.accept(out, entry.getValue()); 101 | } 102 | } 103 | 104 | /** 105 | * Serializes a list from the data input stream. 106 | * 107 | * @param The type of the map's keys. 108 | * @param The type of the map's values. 109 | * @param in the data input stream. 110 | * @param keyDeserializer De-serialization function for the map's keys. 111 | * @param valueDeserializer De-serialization function for the map's values. 112 | * @return A fully initialized map instance, with elements read from the data input stream. 113 | * @throws IOException In case of de-serialization or read problems. 114 | */ 115 | public static Map deserializeMap( 116 | DataInputStream in, 117 | FunctionWithException keyDeserializer, 118 | FunctionWithException valueDeserializer) 119 | throws IOException { 120 | int size = in.readInt(); 121 | Map result = new HashMap<>(size); 122 | for (int i = 0; i < size; i++) { 123 | K key = keyDeserializer.apply(in); 124 | V value = valueDeserializer.apply(in); 125 | result.put(key, value); 126 | } 127 | return result; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/common/utils/BigQueryTableInfo.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.flink.bigquery.common.utils; 2 | 3 | import com.google.api.services.bigquery.model.TableSchema; 4 | import com.google.cloud.bigquery.BigQuery; 5 | import com.google.cloud.bigquery.TableId; 6 | 7 | import java.util.Optional; 8 | 9 | /** Class to obtain information about BigQuery Table. */ 10 | public class BigQueryTableInfo { 11 | 12 | // Make the constructor private so that it cannot be instantiated. 13 | private BigQueryTableInfo() {} 14 | 15 | /** 16 | * Function to obtain a BigQuery Table Schema. 17 | * 18 | * @param client {@link BigQuery} Object containing the BigQuery Client. 19 | * @param project Project ID containing the Table. 20 | * @param dataset Dataset ID containing the Table. 21 | * @param table Table Name. 22 | * @return {@link TableSchema} Object containing the Table Schema requested. 23 | */ 24 | public static TableSchema getSchema( 25 | BigQuery client, String project, String dataset, String table) { 26 | return Optional.ofNullable(client.getTable(TableId.of(project, dataset, table))) 27 | .map(t -> t.getDefinition().getSchema()) 28 | .map(SchemaTransform::bigQuerySchemaToTableSchema) 29 | .orElseThrow( 30 | () -> 31 | new IllegalArgumentException( 32 | String.format( 33 | "The provided table %s.%s.%s does not exists.", 34 | project, dataset, table))); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/common/utils/GoogleCredentialsSupplier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.common.utils; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | 21 | import com.google.auth.Credentials; 22 | import com.google.auth.oauth2.AccessToken; 23 | import com.google.auth.oauth2.GoogleCredentials; 24 | 25 | import java.io.ByteArrayInputStream; 26 | import java.io.FileInputStream; 27 | import java.io.IOException; 28 | import java.io.UncheckedIOException; 29 | import java.util.Base64; 30 | import java.util.Optional; 31 | 32 | /** A utility class to supply credentials given the multiple possible configuration sources. */ 33 | @Internal 34 | public class GoogleCredentialsSupplier { 35 | private GoogleCredentialsSupplier() {} 36 | 37 | /** 38 | * Supplies a Google {@link Credentials} object, given the possible configurations. 39 | * 40 | * @param accessToken The actual access token as a string. 41 | * @param credentialsKey The actual key encoded in a Base64 based string. 42 | * @param credentialsFile The location of the credentials file. 43 | * @return A fully initialized {@link Credentials} object. 44 | */ 45 | public static Credentials supplyCredentialsFromSources( 46 | Optional accessToken, 47 | Optional credentialsKey, 48 | Optional credentialsFile) { 49 | Credentials credentials; 50 | if (accessToken.isPresent()) { 51 | credentials = createCredentialsFromAccessToken(accessToken.get()); 52 | } else if (credentialsKey.isPresent()) { 53 | credentials = createCredentialsFromKey(credentialsKey.get()); 54 | } else if (credentialsFile.isPresent()) { 55 | credentials = createCredentialsFromFile(credentialsFile.get()); 56 | } else { 57 | credentials = createDefaultCredentials(); 58 | } 59 | return credentials; 60 | } 61 | 62 | private static Credentials createCredentialsFromAccessToken(String accessToken) { 63 | return GoogleCredentials.create(new AccessToken(accessToken, null)); 64 | } 65 | 66 | private static Credentials createCredentialsFromKey(String key) { 67 | try { 68 | // Replaced BaseEncoding.base64() [of com.google.common.io.BaseEncoding] 69 | // with Base64.getDecoder() since flink does not allow common.io methods 70 | // to coexist and prefers usage of flink-shaded-guava methods instead. 71 | // But that would cause dependency on flink, so replaced with java.utils.Base64 72 | // Both support RFC-4648 (https://www.ietf.org/rfc/rfc4648.txt) 73 | // Links: 74 | // 1. BaseEncoding: 75 | // https://guava.dev/releases/17.0/api/docs/com/google/common/io/BaseEncoding.html#base64() 76 | // 2. Base64: 77 | // https://docs.oracle.com/javase/8/docs/api/java/util/Base64.html 78 | return GoogleCredentials.fromStream( 79 | new ByteArrayInputStream(Base64.getDecoder().decode(key))); 80 | } catch (IOException e) { 81 | throw new UncheckedIOException("Failed to create Credentials from key", e); 82 | } 83 | } 84 | 85 | private static Credentials createCredentialsFromFile(String file) { 86 | try { 87 | return GoogleCredentials.fromStream(new FileInputStream(file)); 88 | } catch (IOException e) { 89 | throw new UncheckedIOException("Failed to create Credentials from file", e); 90 | } 91 | } 92 | 93 | private static Credentials createDefaultCredentials() { 94 | try { 95 | return GoogleCredentials.getApplicationDefault(); 96 | } catch (IOException e) { 97 | throw new UncheckedIOException("Failed to create default Credentials", e); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/services/BigQueryServicesFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | import org.apache.flink.annotation.VisibleForTesting; 21 | import org.apache.flink.util.Preconditions; 22 | 23 | import com.google.cloud.flink.bigquery.common.config.BigQueryConnectOptions; 24 | 25 | import java.io.IOException; 26 | 27 | /** 28 | * A factory class to dispatch the right implementation of the BigQuery services functionalities. 29 | * This class can be configured to use a mock implementation of the BigQuery services, simplifying 30 | * testing of the library. 31 | */ 32 | @Internal 33 | public class BigQueryServicesFactory { 34 | 35 | private static final BigQueryServicesFactory INSTANCE = new BigQueryServicesFactory(); 36 | private static final BigQueryServices SERVICES = new BigQueryServicesImpl(); 37 | 38 | private Boolean isTestingEnabled = false; 39 | private BigQueryServices testingServices; 40 | private BigQueryConnectOptions bqConnectOptions; 41 | 42 | private BigQueryServicesFactory() {} 43 | 44 | /** 45 | * Returns the factory instance, given the current factory's internal state. 46 | * 47 | * @param options The BigQuery connect options. 48 | * @return A factory instance. 49 | */ 50 | public static BigQueryServicesFactory instance(BigQueryConnectOptions options) { 51 | INSTANCE.bqConnectOptions = options; 52 | if (options.getTestingBigQueryServices() == null) { 53 | return INSTANCE.defaultImplementation(); 54 | } else { 55 | return INSTANCE.withTestingServices(options.getTestingBigQueryServices().get()); 56 | } 57 | } 58 | 59 | /** 60 | * Returns a BigQuery storage read client, given the factory's current internal state. 61 | * 62 | * @return A BigQuery storage read client. 63 | */ 64 | public BigQueryServices.StorageReadClient storageRead() throws IOException { 65 | if (isTestingEnabled) { 66 | return testingServices.createStorageReadClient( 67 | bqConnectOptions.getCredentialsOptions()); 68 | } 69 | return SERVICES.createStorageReadClient(bqConnectOptions.getCredentialsOptions()); 70 | } 71 | 72 | /** 73 | * Returns a BigQuery storage write client, given the factory's current internal state. 74 | * 75 | * @return A BigQuery storage write client. 76 | */ 77 | public BigQueryServices.StorageWriteClient storageWrite() throws IOException { 78 | if (isTestingEnabled) { 79 | return testingServices.createStorageWriteClient( 80 | bqConnectOptions.getCredentialsOptions()); 81 | } 82 | return SERVICES.createStorageWriteClient(bqConnectOptions.getCredentialsOptions()); 83 | } 84 | 85 | /** 86 | * Returns a BigQuery query data client, given the factory's current internal state. 87 | * 88 | * @return A BigQuery query data client. 89 | */ 90 | public BigQueryServices.QueryDataClient queryClient() { 91 | if (isTestingEnabled) { 92 | return testingServices.createQueryDataClient(bqConnectOptions.getCredentialsOptions()); 93 | } 94 | return SERVICES.createQueryDataClient(bqConnectOptions.getCredentialsOptions()); 95 | } 96 | 97 | @VisibleForTesting 98 | BigQueryServicesFactory withTestingServices(BigQueryServices testingServices) { 99 | Preconditions.checkNotNull(testingServices); 100 | isTestingEnabled = true; 101 | this.testingServices = testingServices; 102 | return this; 103 | } 104 | 105 | /** 106 | * Returns the factory instance, with its default implementation (using GCP BigQuery). 107 | * 108 | * @return A factory instance in its default state. 109 | */ 110 | public BigQueryServicesFactory defaultImplementation() { 111 | isTestingEnabled = false; 112 | this.testingServices = null; 113 | return this; 114 | } 115 | 116 | public Boolean getIsTestingEnabled() { 117 | return isTestingEnabled; 118 | } 119 | 120 | public BigQueryServices getTestingServices() { 121 | return testingServices; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/services/PartitionIdWithInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | 21 | import java.util.Objects; 22 | 23 | /** */ 24 | @Internal 25 | public class PartitionIdWithInfo { 26 | private final String partitionId; 27 | private final TablePartitionInfo info; 28 | 29 | public PartitionIdWithInfo(String partitionId, TablePartitionInfo info) { 30 | this.partitionId = partitionId; 31 | this.info = info; 32 | } 33 | 34 | public String getPartitionId() { 35 | return partitionId; 36 | } 37 | 38 | public TablePartitionInfo getInfo() { 39 | return info; 40 | } 41 | 42 | @Override 43 | public int hashCode() { 44 | return Objects.hash(this.partitionId, this.info); 45 | } 46 | 47 | @Override 48 | public boolean equals(Object obj) { 49 | if (this == obj) { 50 | return true; 51 | } 52 | if (obj == null) { 53 | return false; 54 | } 55 | if (getClass() != obj.getClass()) { 56 | return false; 57 | } 58 | final PartitionIdWithInfo other = (PartitionIdWithInfo) obj; 59 | return Objects.equals(this.getPartitionId(), other.getPartitionId()) 60 | && Objects.equals(this.getInfo(), other.getInfo()); 61 | } 62 | 63 | @Override 64 | public String toString() { 65 | return "PartitionIdWithInfo{" + "partitionId=" + partitionId + ", info=" + info + '}'; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/services/TablePartitionInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import org.apache.flink.annotation.Internal; 20 | 21 | import com.google.cloud.bigquery.StandardSQLTypeName; 22 | import com.google.cloud.flink.bigquery.common.utils.BigQueryPartitionUtils.PartitionType; 23 | 24 | import java.time.Instant; 25 | import java.util.Arrays; 26 | import java.util.List; 27 | import java.util.Objects; 28 | import java.util.Optional; 29 | import java.util.stream.Collectors; 30 | 31 | /** Represents the information of the BigQuery table's partition. */ 32 | @Internal 33 | public class TablePartitionInfo { 34 | 35 | private final String columnName; 36 | private final StandardSQLTypeName columnType; 37 | private final PartitionType partitionType; 38 | private final Instant streamingBufferOldestEntryTime; 39 | 40 | public TablePartitionInfo( 41 | String columnName, 42 | PartitionType partitionType, 43 | StandardSQLTypeName columnType, 44 | Instant streamingBufferOldestEntryTime) { 45 | this.columnName = columnName; 46 | this.columnType = columnType; 47 | this.partitionType = partitionType; 48 | this.streamingBufferOldestEntryTime = streamingBufferOldestEntryTime; 49 | } 50 | 51 | public String getColumnName() { 52 | return columnName; 53 | } 54 | 55 | public StandardSQLTypeName getColumnType() { 56 | return columnType; 57 | } 58 | 59 | public PartitionType getPartitionType() { 60 | return partitionType; 61 | } 62 | 63 | public Instant getStreamingBufferOldestEntryTime() { 64 | return streamingBufferOldestEntryTime; 65 | } 66 | 67 | public List toPartitionsWithInfo(List partitionIds) { 68 | return Optional.ofNullable(partitionIds) 69 | .map( 70 | ps -> 71 | ps.stream() 72 | .map(id -> new PartitionIdWithInfo(id, this)) 73 | .collect(Collectors.toList())) 74 | .orElse(Arrays.asList()); 75 | } 76 | 77 | @Override 78 | public int hashCode() { 79 | return Objects.hash( 80 | this.columnName, 81 | this.columnType, 82 | this.partitionType, 83 | this.streamingBufferOldestEntryTime); 84 | } 85 | 86 | @Override 87 | public boolean equals(Object obj) { 88 | if (this == obj) { 89 | return true; 90 | } 91 | if (obj == null) { 92 | return false; 93 | } 94 | if (getClass() != obj.getClass()) { 95 | return false; 96 | } 97 | final TablePartitionInfo other = (TablePartitionInfo) obj; 98 | return Objects.equals(this.columnName, other.columnName) 99 | && this.columnType == other.columnType 100 | && this.partitionType == other.partitionType 101 | && Objects.equals( 102 | this.streamingBufferOldestEntryTime, other.streamingBufferOldestEntryTime); 103 | } 104 | 105 | @Override 106 | public String toString() { 107 | return "TablePartitionInfo{" 108 | + "columnName=" 109 | + columnName 110 | + ", columnType=" 111 | + columnType 112 | + ", partitionType=" 113 | + partitionType 114 | + ", streamingBufferOldestEntryTime=" 115 | + streamingBufferOldestEntryTime 116 | + '}'; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/main/java/com/google/cloud/flink/bigquery/source/split/SplitDiscoveryScheduler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.source.split; 18 | 19 | import java.util.concurrent.Callable; 20 | import java.util.function.BiConsumer; 21 | 22 | /** Defines the behavior of a scheduler used for asynchronous BigQuery source split discovery. */ 23 | public interface SplitDiscoveryScheduler { 24 | 25 | /** 26 | * Schedules the next execution of the split discovery process and chains the results handler. 27 | * 28 | * @param The type of the discovery results 29 | * @param callable The discovery process reference 30 | * @param handler The discovery result handler reference 31 | * @param initialDelayMillis The initial delay for the next execution 32 | * @param periodMillis The frequency of the async execution. 33 | */ 34 | void schedule( 35 | Callable callable, 36 | BiConsumer handler, 37 | long initialDelayMillis, 38 | long periodMillis); 39 | 40 | /** Called as soon new splits has been discovered and stored in the assigner's state. */ 41 | void notifySplits(); 42 | } 43 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/java/com/google/cloud/flink/bigquery/common/utils/BigQueryStateSerdeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.common.utils; 18 | 19 | import org.junit.Test; 20 | 21 | import java.io.ByteArrayInputStream; 22 | import java.io.ByteArrayOutputStream; 23 | import java.io.DataInput; 24 | import java.io.DataInputStream; 25 | import java.io.DataOutputStream; 26 | import java.io.IOException; 27 | import java.util.Arrays; 28 | import java.util.HashMap; 29 | import java.util.List; 30 | import java.util.Map; 31 | 32 | import static com.google.common.truth.Truth.assertThat; 33 | 34 | /** */ 35 | public class BigQueryStateSerdeTest { 36 | 37 | @Test 38 | public void testListSerDe() throws IOException { 39 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 40 | DataOutputStream out = new DataOutputStream(baos)) { 41 | 42 | List original = Arrays.asList("first", "second", "third", "fourth"); 43 | BigQueryStateSerde.serializeList(out, original, DataOutputStream::writeUTF); 44 | out.flush(); 45 | byte[] serialized = baos.toByteArray(); 46 | 47 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 48 | DataInputStream in = new DataInputStream(bais)) { 49 | 50 | List deserialized = 51 | BigQueryStateSerde.deserializeList(in, DataInput::readUTF); 52 | 53 | assertThat(original).isEqualTo(deserialized); 54 | } 55 | } 56 | } 57 | 58 | @Test 59 | public void testMapSerDe() throws IOException { 60 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 61 | DataOutputStream out = new DataOutputStream(baos)) { 62 | 63 | Map original = new HashMap<>(); 64 | original.put("key1", "value1"); 65 | original.put("key2", "value2"); 66 | original.put("key3", "value3"); 67 | BigQueryStateSerde.serializeMap( 68 | out, original, DataOutputStream::writeUTF, DataOutputStream::writeUTF); 69 | out.flush(); 70 | byte[] serialized = baos.toByteArray(); 71 | 72 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 73 | DataInputStream in = new DataInputStream(bais)) { 74 | 75 | Map deserialized = 76 | BigQueryStateSerde.deserializeMap( 77 | in, DataInput::readUTF, DataInput::readUTF); 78 | 79 | assertThat(original).isEqualTo(deserialized); 80 | } 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/java/com/google/cloud/flink/bigquery/fakes/StorageClientFaker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.fakes; 18 | 19 | import com.google.api.services.bigquery.model.TableFieldSchema; 20 | import com.google.api.services.bigquery.model.TableSchema; 21 | 22 | import java.util.Arrays; 23 | 24 | /** Utility class to generate mocked objects for the BQ storage client. */ 25 | public class StorageClientFaker { 26 | public static final TableSchema SIMPLE_BQ_TABLE_SCHEMA = 27 | new TableSchema() 28 | .setFields( 29 | Arrays.asList( 30 | new TableFieldSchema() 31 | .setName("name") 32 | .setType("STRING") 33 | .setMode("REQUIRED"), 34 | new TableFieldSchema() 35 | .setName("number") 36 | .setType("INTEGER") 37 | .setMode("REQUIRED"), 38 | new TableFieldSchema() 39 | .setName("ts") 40 | .setType("TIMESTAMP") 41 | .setMode("REQUIRED"))); 42 | } 43 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/java/com/google/cloud/flink/bigquery/services/BigQueryServicesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import org.apache.flink.util.function.SerializableSupplier; 20 | 21 | import com.google.cloud.flink.bigquery.common.config.BigQueryConnectOptions; 22 | import com.google.cloud.flink.bigquery.common.config.CredentialsOptions; 23 | import org.junit.Test; 24 | 25 | import java.io.IOException; 26 | 27 | import static com.google.common.truth.Truth.assertThat; 28 | 29 | /** Tests for {@link BigQueryServices}. */ 30 | public class BigQueryServicesTest { 31 | @Test 32 | public void testFactoryWithTestServices() throws IOException { 33 | SerializableSupplier dummyServices = 34 | () -> 35 | new BigQueryServices() { 36 | @Override 37 | public BigQueryServices.QueryDataClient createQueryDataClient( 38 | CredentialsOptions credentialsOptions) { 39 | return null; 40 | } 41 | 42 | @Override 43 | public BigQueryServices.StorageReadClient createStorageReadClient( 44 | CredentialsOptions credentialsOptions) throws IOException { 45 | return null; 46 | } 47 | 48 | @Override 49 | public BigQueryServices.StorageWriteClient createStorageWriteClient( 50 | CredentialsOptions credentialsOptions) throws IOException { 51 | return null; 52 | } 53 | }; 54 | BigQueryServicesFactory original = 55 | BigQueryServicesFactory.instance( 56 | BigQueryConnectOptions.builderForQuerySource() 57 | .setTestingBigQueryServices(dummyServices) 58 | .build()); 59 | 60 | assertThat(original.getIsTestingEnabled()).isTrue(); 61 | assertThat(original.getTestingServices()).isNotNull(); 62 | assertThat(original.queryClient()).isNull(); 63 | assertThat(original.storageRead()).isNull(); 64 | assertThat(original.storageWrite()).isNull(); 65 | 66 | original.defaultImplementation(); 67 | 68 | assertThat(original.getIsTestingEnabled()).isFalse(); 69 | assertThat(original.getTestingServices()).isNull(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/java/com/google/cloud/flink/bigquery/services/BigQueryUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import com.google.api.services.bigquery.Bigquery; 20 | import com.google.api.services.bigquery.model.Dataset; 21 | import com.google.api.services.bigquery.model.Job; 22 | import com.google.api.services.bigquery.model.JobConfigurationQuery; 23 | import com.google.api.services.bigquery.model.Table; 24 | import org.junit.Test; 25 | import org.mockito.ArgumentMatchers; 26 | import org.mockito.Mockito; 27 | 28 | import java.io.IOException; 29 | 30 | /** */ 31 | public class BigQueryUtilsTest { 32 | 33 | @Test 34 | public void testRetries() throws IOException, InterruptedException { 35 | 36 | Bigquery client = Mockito.mock(Bigquery.class); 37 | Bigquery.Jobs jobs = Mockito.mock(Bigquery.Jobs.class); 38 | Bigquery.Jobs.Insert insert = Mockito.mock(Bigquery.Jobs.Insert.class); 39 | Mockito.when(client.jobs()).thenReturn(jobs); 40 | Mockito.when(jobs.insert(ArgumentMatchers.any(), ArgumentMatchers.any())) 41 | .thenReturn(insert); 42 | Mockito.when(insert.setPrettyPrint(false)).thenReturn(insert); 43 | Mockito.when(insert.execute()).thenThrow(new IOException("Expected")); 44 | 45 | try { 46 | BigQueryUtils.maxRetryCount = 2; 47 | BigQueryUtils.dryRunQuery(client, "", null, ""); 48 | } catch (Exception ex) { 49 | // swallow the expected error 50 | } 51 | // check there was a retry because we always fail 52 | Mockito.verify(insert, Mockito.times(2)).execute(); 53 | } 54 | 55 | @Test 56 | public void testNoRetriesJob() throws IOException, InterruptedException { 57 | 58 | Bigquery client = Mockito.mock(Bigquery.class); 59 | Bigquery.Jobs jobs = Mockito.mock(Bigquery.Jobs.class); 60 | Bigquery.Jobs.Insert insert = Mockito.mock(Bigquery.Jobs.Insert.class); 61 | 62 | Mockito.when(client.jobs()).thenReturn(jobs); 63 | Mockito.when(jobs.insert(ArgumentMatchers.any(), ArgumentMatchers.any())) 64 | .thenReturn(insert); 65 | Mockito.when(insert.setPrettyPrint(false)).thenReturn(insert); 66 | Mockito.when(insert.execute()).thenReturn(new Job()); 67 | 68 | BigQueryUtils.maxRetryCount = 5; 69 | BigQueryUtils.runQuery(client, "", new JobConfigurationQuery(), ""); 70 | 71 | // check there was only one request, since no errors occurred 72 | Mockito.verify(insert, Mockito.times(1)).execute(); 73 | } 74 | 75 | @Test 76 | public void testNoRetriesDataset() throws IOException, InterruptedException { 77 | Bigquery client = Mockito.mock(Bigquery.class); 78 | 79 | Bigquery.Datasets datasets = Mockito.mock(Bigquery.Datasets.class); 80 | Bigquery.Datasets.Get got = Mockito.mock(Bigquery.Datasets.Get.class); 81 | Mockito.when(client.datasets()).thenReturn(datasets); 82 | Mockito.when(datasets.get(ArgumentMatchers.any(), ArgumentMatchers.any())).thenReturn(got); 83 | Mockito.when(got.setPrettyPrint(false)).thenReturn(got); 84 | Mockito.when(got.execute()).thenReturn(new Dataset()); 85 | 86 | BigQueryUtils.maxRetryCount = 100; 87 | BigQueryUtils.datasetInfo(client, "", ""); 88 | // check no retries either 89 | Mockito.verify(got, Mockito.times(1)).execute(); 90 | } 91 | 92 | @Test 93 | public void testNoRetriesTable() throws IOException, InterruptedException { 94 | Bigquery client = Mockito.mock(Bigquery.class); 95 | 96 | Bigquery.Tables tables = Mockito.mock(Bigquery.Tables.class); 97 | Bigquery.Tables.Get got = Mockito.mock(Bigquery.Tables.Get.class); 98 | Mockito.when(client.tables()).thenReturn(tables); 99 | Mockito.when( 100 | tables.get( 101 | ArgumentMatchers.anyString(), 102 | ArgumentMatchers.anyString(), 103 | ArgumentMatchers.anyString())) 104 | .thenReturn(got); 105 | Mockito.when(got.setPrettyPrint(false)).thenReturn(got); 106 | Mockito.when(got.execute()).thenReturn(new Table()); 107 | 108 | BigQueryUtils.maxRetryCount = 100; 109 | BigQueryUtils.tableInfo(client, "", "", ""); 110 | // check no retries either 111 | Mockito.verify(got, Mockito.times(1)).execute(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/java/com/google/cloud/flink/bigquery/services/TablePartitionInfoTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | package com.google.cloud.flink.bigquery.services; 18 | 19 | import com.google.cloud.bigquery.StandardSQLTypeName; 20 | import com.google.cloud.flink.bigquery.common.utils.BigQueryPartitionUtils; 21 | import org.junit.Test; 22 | 23 | import java.time.Instant; 24 | 25 | import static com.google.common.truth.Truth.assertThat; 26 | 27 | /** */ 28 | public class TablePartitionInfoTest { 29 | 30 | @Test 31 | public void testEquality() { 32 | String columnName = "someName"; 33 | Instant now = Instant.now(); 34 | TablePartitionInfo info1 = 35 | new TablePartitionInfo( 36 | columnName, 37 | BigQueryPartitionUtils.PartitionType.MONTH, 38 | StandardSQLTypeName.TIMESTAMP, 39 | now); 40 | TablePartitionInfo info2 = 41 | new TablePartitionInfo( 42 | columnName, 43 | BigQueryPartitionUtils.PartitionType.INT_RANGE, 44 | StandardSQLTypeName.INT64, 45 | Instant.now()); 46 | 47 | assertThat(info1).isNotEqualTo(info2); 48 | 49 | TablePartitionInfo info3 = 50 | new TablePartitionInfo( 51 | columnName, 52 | BigQueryPartitionUtils.PartitionType.MONTH, 53 | StandardSQLTypeName.TIMESTAMP, 54 | now); 55 | 56 | assertThat(info1).isEqualTo(info3); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /flink-connector-bigquery-common/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Set root logger level to OFF to not flood build logs 20 | # set manually to INFO for debugging purposes 21 | rootLogger.level = OFF 22 | rootLogger.appenderRef.test.ref = TestLogger 23 | 24 | appender.testlogger.name = TestLogger 25 | appender.testlogger.type = CONSOLE 26 | appender.testlogger.target = SYSTEM_ERR 27 | appender.testlogger.layout.type = PatternLayout 28 | appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n 29 | -------------------------------------------------------------------------------- /tools/ci/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.out.ref = ConsoleAppender 21 | 22 | # ----------------------------------------------------------------------------- 23 | # Console (use 'console') 24 | # ----------------------------------------------------------------------------- 25 | 26 | appender.console.name = ConsoleAppender 27 | appender.console.type = CONSOLE 28 | appender.console.layout.type = PatternLayout 29 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 30 | 31 | # ----------------------------------------------------------------------------- 32 | # File (use 'file') 33 | # ----------------------------------------------------------------------------- 34 | appender.file.name = FileAppender 35 | appender.file.type = FILE 36 | appender.file.fileName = ${sys:log.dir}/mvn-${sys:mvn.forkNumber:-output}.log 37 | appender.file.layout.type = PatternLayout 38 | appender.file.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 39 | appender.file.createOnDemand = true 40 | 41 | # suppress the irrelevant (wrong) warnings from the netty channel handler 42 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 43 | logger.netty.level = ERROR 44 | -------------------------------------------------------------------------------- /tools/maven/clover.xml: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /tools/maven/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | --------------------------------------------------------------------------------