├── .changes ├── 0.0.0.md ├── README.md ├── header.tpl.md └── unreleased │ ├── .gitkeep │ ├── Features-20241202-223835.yaml │ ├── Fixes-20241120-163101.yaml │ ├── Fixes-20241204-105846.yaml │ ├── Fixes-20241205-133606.yaml │ ├── Fixes-20241211-144752.yaml │ └── Under the Hood-20241117-194746.yaml ├── .changie.yaml ├── .dockerignore ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ └── config.yml ├── pull_request_template.md ├── scripts │ ├── env-setup.sh │ ├── integration-test-matrix.js │ ├── update_dependencies.sh │ ├── update_dev_dependency_branches.sh │ └── update_release_branch.sh └── workflows │ ├── backport.yml │ ├── bot-changelog.yml │ ├── changelog-existence.yml │ ├── cut-release-branch.yml │ ├── docs-issue.yml │ ├── integration.yml │ ├── main.yml │ ├── nightly-release.yml │ ├── release-branch-tests.yml │ ├── release-internal.yml │ ├── release.yml │ ├── release_prep_hatch.yml │ ├── repository-cleanup.yml │ ├── stale.yml │ └── triage-labels.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── dbt ├── __init__.py ├── adapters │ └── bigquery │ │ ├── __init__.py │ │ ├── __version__.py │ │ ├── clients.py │ │ ├── column.py │ │ ├── connections.py │ │ ├── credentials.py │ │ ├── dataset.py │ │ ├── impl.py │ │ ├── python_submissions.py │ │ ├── relation.py │ │ ├── relation_configs │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _cluster.py │ │ ├── _materialized_view.py │ │ ├── _options.py │ │ ├── _partition.py │ │ └── _policies.py │ │ ├── retry.py │ │ └── utility.py └── include │ └── bigquery │ ├── __init__.py │ ├── dbt_project.yml │ ├── macros │ ├── adapters.sql │ ├── adapters │ │ ├── apply_grants.sql │ │ └── columns.sql │ ├── catalog │ │ ├── by_relation.sql │ │ ├── by_schema.sql │ │ └── catalog.sql │ ├── etc.sql │ ├── materializations │ │ ├── clone.sql │ │ ├── copy.sql │ │ ├── incremental.sql │ │ ├── incremental_strategy │ │ │ ├── common.sql │ │ │ ├── insert_overwrite.sql │ │ │ ├── merge.sql │ │ │ ├── microbatch.sql │ │ │ └── time_ingestion_tables.sql │ │ ├── seed.sql │ │ ├── snapshot.sql │ │ ├── table.sql │ │ └── view.sql │ ├── python_model │ │ └── python.sql │ ├── relations │ │ ├── cluster.sql │ │ ├── drop.sql │ │ ├── materialized_view │ │ │ ├── alter.sql │ │ │ ├── create.sql │ │ │ ├── drop.sql │ │ │ ├── refresh.sql │ │ │ └── replace.sql │ │ ├── options.sql │ │ ├── partition.sql │ │ ├── rename.sql │ │ ├── table │ │ │ ├── drop.sql │ │ │ ├── options.sql │ │ │ └── rename.sql │ │ └── view │ │ │ ├── drop.sql │ │ │ ├── options.sql │ │ │ ├── rename.sql │ │ │ └── replace.sql │ └── utils │ │ ├── array_append.sql │ │ ├── array_concat.sql │ │ ├── array_construct.sql │ │ ├── bool_or.sql │ │ ├── date.sql │ │ ├── date_trunc.sql │ │ ├── dateadd.sql │ │ ├── datediff.sql │ │ ├── escape_single_quotes.sql │ │ ├── except.sql │ │ ├── get_columns_spec_ddl.sql │ │ ├── hash.sql │ │ ├── intersect.sql │ │ ├── listagg.sql │ │ ├── position.sql │ │ ├── right.sql │ │ ├── safe_cast.sql │ │ ├── split_part.sql │ │ ├── string_literal.sql │ │ └── timestamps.sql │ └── profile_template.yml ├── docker ├── Dockerfile ├── README.md └── dev.Dockerfile ├── hatch.toml ├── pyproject.toml ├── test.env.example ├── tests ├── __init__.py ├── boundary │ └── test_bigquery_sdk.py ├── conftest.py ├── functional │ ├── adapter │ │ ├── catalog_tests │ │ │ ├── files.py │ │ │ └── test_relation_types.py │ │ ├── column_types │ │ │ ├── fixtures.py │ │ │ ├── test_alter_column_types.py │ │ │ └── test_column_types.py │ │ ├── constraints │ │ │ ├── fixtures.py │ │ │ └── test_constraints.py │ │ ├── dbt_clone │ │ │ └── test_dbt_clone.py │ │ ├── dbt_show │ │ │ └── test_dbt_show.py │ │ ├── describe_relation │ │ │ ├── _files.py │ │ │ └── test_describe_relation.py │ │ ├── empty │ │ │ └── test_empty.py │ │ ├── expected_stats.py │ │ ├── hooks │ │ │ ├── data │ │ │ │ ├── seed_model.sql │ │ │ │ └── seed_run.sql │ │ │ ├── test_model_hooks.py │ │ │ └── test_run_hooks.py │ │ ├── incremental │ │ │ ├── incremental_strategy_fixtures.py │ │ │ ├── seeds.py │ │ │ ├── test_incremental_merge_exclude_columns.py │ │ │ ├── test_incremental_microbatch.py │ │ │ ├── test_incremental_on_schema_change.py │ │ │ ├── test_incremental_predicates.py │ │ │ ├── test_incremental_strategies.py │ │ │ └── test_incremental_unique_id.py │ │ ├── materialized_view_tests │ │ │ ├── _files.py │ │ │ ├── _mixin.py │ │ │ ├── test_materialized_view.py │ │ │ ├── test_materialized_view_changes.py │ │ │ ├── test_materialized_view_cluster_changes.py │ │ │ └── test_materialized_view_partition_changes.py │ │ ├── query_comment_test │ │ │ ├── test_job_label.py │ │ │ └── test_query_comment.py │ │ ├── simple_bigquery_view │ │ │ ├── fixtures.py │ │ │ ├── seeds.py │ │ │ └── test_simple_bigquery_view.py │ │ ├── simple_copy │ │ │ ├── fixtures.py │ │ │ └── test_simple_copy.py │ │ ├── sources_freshness_tests │ │ │ ├── files.py │ │ │ └── test_get_relation_last_modified.py │ │ ├── store_test_failures_tests │ │ │ └── test_store_test_failures.py │ │ ├── test_aliases.py │ │ ├── test_basic.py │ │ ├── test_changing_relation_type.py │ │ ├── test_copy_materialization.py │ │ ├── test_dbt_debug.py │ │ ├── test_grant_access_to.py │ │ ├── test_grants.py │ │ ├── test_json_keyfile.py │ │ ├── test_persist_docs.py │ │ ├── test_python_model.py │ │ ├── test_simple_seed.py │ │ ├── test_simple_snaphot.py │ │ ├── test_string_literal_macro.py │ │ ├── unit_testing │ │ │ └── test_unit_testing.py │ │ ├── upload_file_tests │ │ │ ├── data │ │ │ │ ├── csv │ │ │ │ │ └── source.csv │ │ │ │ ├── ndjson │ │ │ │ │ └── source.ndjson │ │ │ │ └── parquet │ │ │ │ │ └── source.parquet │ │ │ └── test_upload_file.py │ │ └── utils │ │ │ ├── fixture_array_append.py │ │ │ ├── fixture_array_concat.py │ │ │ ├── fixture_array_construct.py │ │ │ ├── fixture_get_intervals_between.py │ │ │ ├── test_data_types.py │ │ │ ├── test_timestamps.py │ │ │ └── test_utils.py │ ├── python_model_tests │ │ ├── __init__.py │ │ ├── files.py │ │ └── test_list_inference.py │ ├── test_cancel.py │ ├── test_changing_partitions.py │ ├── test_delete_column_policy.py │ ├── test_drop_temp_relation.py │ ├── test_get_columns_incomplete_database.py │ ├── test_hours_to_expiration.py │ ├── test_incremental_materialization.py │ ├── test_job_timeout.py │ ├── test_location_change.py │ ├── test_override_database │ │ ├── fixtures.py │ │ └── test_override_database.py │ ├── test_quota_project.py │ ├── test_update_column_policy.py │ └── test_update_field_description.py └── unit │ ├── __init__.py │ ├── mock_adapter.py │ ├── test_bigquery_adapter.py │ ├── test_bigquery_connection_manager.py │ ├── test_column.py │ ├── test_configure_dataproc_batch.py │ ├── test_dataset.py │ ├── test_renamed_relations.py │ └── utils.py └── third-party-stubs └── agate ├── __init__.pyi └── data_types.pyi /.changes/0.0.0.md: -------------------------------------------------------------------------------- 1 | ## Previous Releases 2 | For information on prior major and minor releases, see their changelogs: 3 | - [1.6](https://github.com/dbt-labs/dbt-bigquery/blob/1.6.latest/CHANGELOG.md) 4 | - [1.5](https://github.com/dbt-labs/dbt-bigquery/blob/1.5.latest/CHANGELOG.md) 5 | - [1.4](https://github.com/dbt-labs/dbt-bigquery/blob/1.4.latest/CHANGELOG.md) 6 | - [1.3](https://github.com/dbt-labs/dbt-bigquery/blob/1.3.latest/CHANGELOG.md) 7 | - [1.2](https://github.com/dbt-labs/dbt-bigquery/blob/1.2.latest/CHANGELOG.md) 8 | - [1.1](https://github.com/dbt-labs/dbt-bigquery/blob/1.1.latest/CHANGELOG.md) 9 | - [1.0](https://github.com/dbt-labs/dbt-bigquery/blob/1.0.latest/CHANGELOG.md) 10 | -------------------------------------------------------------------------------- /.changes/README.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | To view information about the changelog operation we suggest reading this [README](https://github.com/dbt-labs/dbt-bigquery/blob/main/.changes/README.md) found in `dbt-bigquery`. 4 | -------------------------------------------------------------------------------- /.changes/header.tpl.md: -------------------------------------------------------------------------------- 1 | # dbt-bigquery Changelog 2 | 3 | - This file provides a full account of all changes to `dbt-bigquery`. 4 | - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases. 5 | - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. 6 | - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) 7 | -------------------------------------------------------------------------------- /.changes/unreleased/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-bigquery/0995665e490cdee9c408d26aac8e1c19fefaebe0/.changes/unreleased/.gitkeep -------------------------------------------------------------------------------- /.changes/unreleased/Features-20241202-223835.yaml: -------------------------------------------------------------------------------- 1 | kind: Features 2 | body: Allow copy_partitions in microbatch 3 | time: 2024-12-02T22:38:35.479052Z 4 | custom: 5 | Author: borjavb 6 | Issue: "1414" 7 | -------------------------------------------------------------------------------- /.changes/unreleased/Fixes-20241120-163101.yaml: -------------------------------------------------------------------------------- 1 | kind: Fixes 2 | body: Fix issue where dbt-bigquery was not retrying in certain retryable scenarios, 3 | e.g. 503's 4 | time: 2024-11-20T16:31:01.60689-05:00 5 | custom: 6 | Author: mikealfare 7 | Issue: "682" 8 | -------------------------------------------------------------------------------- /.changes/unreleased/Fixes-20241204-105846.yaml: -------------------------------------------------------------------------------- 1 | kind: Fixes 2 | body: Cast `event_time` to a timestamp prior to comparing against microbatch start/end 3 | time 4 | time: 2024-12-04T10:58:46.573608-05:00 5 | custom: 6 | Author: michelleark 7 | Issue: "1422" 8 | -------------------------------------------------------------------------------- /.changes/unreleased/Fixes-20241205-133606.yaml: -------------------------------------------------------------------------------- 1 | kind: Fixes 2 | body: Fix issue where rate limit errors on table service calls are not retried 3 | time: 2024-12-05T13:36:06.436005-05:00 4 | custom: 5 | Author: mikealfare 6 | Issue: "1423" 7 | -------------------------------------------------------------------------------- /.changes/unreleased/Fixes-20241211-144752.yaml: -------------------------------------------------------------------------------- 1 | kind: Fixes 2 | body: Fix retry scenarios so that dbt always retries when BigQuery recommends a retry 3 | time: 2024-12-11T14:47:52.36905-05:00 4 | custom: 5 | Author: mikealfare 6 | Issue: "263" 7 | -------------------------------------------------------------------------------- /.changes/unreleased/Under the Hood-20241117-194746.yaml: -------------------------------------------------------------------------------- 1 | kind: Under the Hood 2 | body: Move from setup.py to pyproject.toml and to hatch as a dev tool 3 | time: 2024-11-17T19:47:46.341-05:00 4 | custom: 5 | Author: mikealfare 6 | Issue: "1407" 7 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !docker_dev 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This codeowners file is used to ensure all PRs require reviews from the adapters team 2 | 3 | * @dbt-labs/adapters 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask the community for help 4 | url: https://github.com/dbt-labs/docs.getdbt.com/discussions 5 | about: Need help troubleshooting? Check out our guide on how to ask 6 | - name: Contact dbt Cloud support 7 | url: mailto:support@getdbt.com 8 | about: Are you using dbt Cloud? Contact our support team for help! 9 | - name: Participate in Discussions 10 | url: https://github.com/dbt-labs/dbt-adapters/discussions 11 | about: Do you have a Big Idea for dbt-bigquery? Read open discussions, or start a new one 12 | - name: Create an issue for dbt-bigquery 13 | url: https://github.com/dbt-labs/dbt-adapters/issues/new/choose 14 | about: Report a bug or request a feature for dbt-bigquery 15 | - name: Create an issue for dbt-core 16 | url: https://github.com/dbt-labs/dbt-core/issues/new/choose 17 | about: Report a bug or request a feature for dbt-core 18 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | -------------------------------------------------------------------------------- /.github/scripts/env-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Set TOXENV environment variable for subsequent steps 3 | echo "TOXENV=integration-bigquery" >> $GITHUB_ENV 4 | # Set INTEGRATION_TESTS_SECRETS_PREFIX environment variable for subsequent steps 5 | # All GH secrets that have this prefix will be set as environment variables 6 | echo "INTEGRATION_TESTS_SECRETS_PREFIX=BIGQUERY_TEST" >> $GITHUB_ENV 7 | # Set environment variables required for integration tests 8 | echo "DBT_TEST_USER_1=group:buildbot@dbtlabs.com" >> $GITHUB_ENV 9 | echo "DBT_TEST_USER_2=group:engineering-core-team@dbtlabs.com" >> $GITHUB_ENV 10 | echo "DBT_TEST_USER_3=serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com" >> $GITHUB_ENV 11 | echo "DATAPROC_REGION=us-central1" >> $GITHUB_ENV 12 | echo "DATAPROC_CLUSTER_NAME=dbt-test-1" >> $GITHUB_ENV 13 | echo "GCS_BUCKET=dbt-ci" >> $GITHUB_ENV 14 | -------------------------------------------------------------------------------- /.github/scripts/integration-test-matrix.js: -------------------------------------------------------------------------------- 1 | module.exports = ({ context }) => { 2 | const defaultPythonVersion = "3.9"; 3 | const supportedPythonVersions = ["3.9", "3.10", "3.11", "3.12"]; 4 | const supportedAdapters = ["bigquery"]; 5 | 6 | // if PR, generate matrix based on files changed and PR labels 7 | if (context.eventName.includes("pull_request")) { 8 | // `changes` is a list of adapter names that have related 9 | // file changes in the PR 10 | // ex: ['postgres', 'snowflake'] 11 | const changes = JSON.parse(process.env.CHANGES); 12 | const labels = context.payload.pull_request.labels.map(({ name }) => name); 13 | console.log("labels", labels); 14 | console.log("changes", changes); 15 | const testAllLabel = labels.includes("test all"); 16 | const include = []; 17 | 18 | for (const adapter of supportedAdapters) { 19 | if ( 20 | changes.includes(adapter) || 21 | testAllLabel || 22 | labels.includes(`test ${adapter}`) 23 | ) { 24 | for (const pythonVersion of supportedPythonVersions) { 25 | if ( 26 | pythonVersion === defaultPythonVersion || 27 | labels.includes(`test python${pythonVersion}`) || 28 | testAllLabel 29 | ) { 30 | // always run tests on ubuntu by default 31 | include.push({ 32 | os: "ubuntu-latest", 33 | adapter, 34 | "python-version": pythonVersion, 35 | }); 36 | 37 | if (labels.includes("test windows") || testAllLabel) { 38 | include.push({ 39 | os: "windows-latest", 40 | adapter, 41 | "python-version": pythonVersion, 42 | }); 43 | } 44 | 45 | if (labels.includes("test macos") || testAllLabel) { 46 | include.push({ 47 | os: "macos-14", 48 | adapter, 49 | "python-version": pythonVersion, 50 | }); 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | console.log("matrix", { include }); 58 | 59 | return { 60 | include, 61 | }; 62 | } 63 | // if not PR, generate matrix of python version, adapter, and operating 64 | // system to run integration tests on 65 | 66 | const include = []; 67 | // run for all adapters and python versions on ubuntu 68 | for (const adapter of supportedAdapters) { 69 | for (const pythonVersion of supportedPythonVersions) { 70 | include.push({ 71 | os: 'ubuntu-latest', 72 | adapter: adapter, 73 | "python-version": pythonVersion, 74 | }); 75 | } 76 | } 77 | 78 | // additionally include runs for all adapters, on macos and windows, 79 | // but only for the default python version 80 | for (const adapter of supportedAdapters) { 81 | for (const operatingSystem of ["windows-latest", "macos-14"]) { 82 | include.push({ 83 | os: operatingSystem, 84 | adapter: adapter, 85 | "python-version": defaultPythonVersion, 86 | }); 87 | } 88 | } 89 | 90 | console.log("matrix", { include }); 91 | 92 | return { 93 | include, 94 | }; 95 | }; 96 | -------------------------------------------------------------------------------- /.github/scripts/update_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | set -e 3 | 4 | git_branch=$1 5 | target_req_file="hatch.toml" 6 | core_req_sed_pattern="s|dbt-core.git.*#subdirectory=core|dbt-core.git@${git_branch}#subdirectory=core|g" 7 | tests_req_sed_pattern="s|dbt-adapters.git.*#subdirectory=dbt-tests-adapter|dbt-adapters.git@${git_branch}#subdirectory=dbt-tests-adapter|g" 8 | if [[ "$OSTYPE" == darwin* ]]; then 9 | # mac ships with a different version of sed that requires a delimiter arg 10 | sed -i "" "$core_req_sed_pattern" $target_req_file 11 | sed -i "" "$tests_req_sed_pattern" $target_req_file 12 | else 13 | sed -i "$core_req_sed_pattern" $target_req_file 14 | sed -i "$tests_req_sed_pattern" $target_req_file 15 | fi 16 | -------------------------------------------------------------------------------- /.github/scripts/update_dev_dependency_branches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | set -e 3 | 4 | 5 | dbt_adapters_branch=$1 6 | dbt_core_branch=$2 7 | dbt_common_branch=$3 8 | target_req_file="hatch.toml" 9 | core_req_sed_pattern="s|dbt-core.git.*#subdirectory=core|dbt-core.git@${dbt_core_branch}#subdirectory=core|g" 10 | adapters_req_sed_pattern="s|dbt-adapters.git|dbt-adapters.git@${dbt_adapters_branch}|g" 11 | common_req_sed_pattern="s|dbt-common.git|dbt-common.git@${dbt_common_branch}|g" 12 | if [[ "$OSTYPE" == darwin* ]]; then 13 | # mac ships with a different version of sed that requires a delimiter arg 14 | sed -i "" "$adapters_req_sed_pattern" $target_req_file 15 | sed -i "" "$core_req_sed_pattern" $target_req_file 16 | sed -i "" "$common_req_sed_pattern" $target_req_file 17 | else 18 | sed -i "$adapters_req_sed_pattern" $target_req_file 19 | sed -i "$core_req_sed_pattern" $target_req_file 20 | sed -i "$common_req_sed_pattern" $target_req_file 21 | fi 22 | -------------------------------------------------------------------------------- /.github/scripts/update_release_branch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | set -e 3 | 4 | release_branch=$1 5 | target_req_file=".github/workflows/nightly-release.yml" 6 | if [[ "$OSTYPE" == darwin* ]]; then 7 | # mac ships with a different version of sed that requires a delimiter arg 8 | sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file 9 | else 10 | sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file 11 | fi 12 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # When a PR is merged, if it has the backport label, it will create 3 | # a new PR to backport those changes to the given branch. If it can't 4 | # cleanly do a backport, it will comment on the merged PR of the failure. 5 | # 6 | # Label naming convention: "backport " 7 | # Example: backport 1.0.latest 8 | # 9 | # You MUST "Squash and merge" the original PR or this won't work. 10 | 11 | # **why?** 12 | # Changes sometimes need to be backported to release branches. 13 | # This automates the backporting process 14 | 15 | # **when?** 16 | # Once a PR is "Squash and merge"'d, by adding a backport label, this is triggered 17 | 18 | name: Backport 19 | on: 20 | pull_request: 21 | types: 22 | - labeled 23 | 24 | permissions: 25 | contents: write 26 | pull-requests: write 27 | 28 | jobs: 29 | backport: 30 | name: Backport 31 | runs-on: ubuntu-latest 32 | # Only react to merged PRs for security reasons. 33 | # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target. 34 | if: > 35 | github.event.pull_request.merged 36 | && contains(github.event.label.name, 'backport') 37 | steps: 38 | - uses: tibdex/backport@v2 39 | with: 40 | github_token: ${{ secrets.GITHUB_TOKEN }} 41 | -------------------------------------------------------------------------------- /.github/workflows/bot-changelog.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # When bots create a PR, this action will add a corresponding changie yaml file to that 3 | # PR when a specific label is added. 4 | # 5 | # The file is created off a template: 6 | # 7 | # kind: 8 | # body: 9 | # time: 10 | # custom: 11 | # Author: 12 | # PR: 13 | # 14 | # **why?** 15 | # Automate changelog generation for more visability with automated bot PRs. 16 | # 17 | # **when?** 18 | # Once a PR is created, label should be added to PR before or after creation. You can also 19 | # manually trigger this by adding the appropriate label at any time. 20 | # 21 | # **how to add another bot?** 22 | # Add the label and changie kind to the include matrix. That's it! 23 | # 24 | 25 | name: Bot Changelog 26 | 27 | on: 28 | pull_request: 29 | # catch when the PR is opened with the label or when the label is added 30 | types: [opened, labeled] 31 | 32 | permissions: 33 | contents: write 34 | pull-requests: read 35 | 36 | jobs: 37 | generate_changelog: 38 | strategy: 39 | matrix: 40 | include: 41 | - label: "dependencies" 42 | changie_kind: "Dependencies" 43 | - label: "snyk" 44 | changie_kind: "Security" 45 | runs-on: ubuntu-latest 46 | 47 | steps: 48 | 49 | - name: Create and commit changelog on bot PR 50 | if: "contains(github.event.pull_request.labels.*.name, ${{ matrix.label }})" 51 | id: bot_changelog 52 | uses: emmyoop/changie_bot@v1 53 | with: 54 | GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }} 55 | commit_author_name: "Github Build Bot" 56 | commit_author_email: "" 57 | commit_message: "Add automated changelog yaml from template for bot PR" 58 | changie_kind: ${{ matrix.changie_kind }} 59 | label: ${{ matrix.label }} 60 | custom_changelog_string: "custom:\n Author: ${{ github.event.pull_request.user.login }}\n PR: ${{ github.event.pull_request.number }}" 61 | -------------------------------------------------------------------------------- /.github/workflows/changelog-existence.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Checks that a file has been committed under the /.changes directory 3 | # as a new CHANGELOG entry. Cannot check for a specific filename as 4 | # it is dynamically generated by change type and timestamp. 5 | # This workflow should not require any secrets since it runs for PRs 6 | # from forked repos. 7 | # By default, secrets are not passed to workflows running from 8 | # a forked repo. 9 | 10 | # **why?** 11 | # Ensure code change gets reflected in the CHANGELOG. 12 | 13 | # **when?** 14 | # This will run for all PRs going into main and *.latest. It will 15 | # run when they are opened, reopened, when any label is added or removed 16 | # and when new code is pushed to the branch. The action will then get 17 | # skipped if the 'Skip Changelog' label is present is any of the labels. 18 | 19 | name: Check Changelog Entry 20 | 21 | on: 22 | pull_request: 23 | types: [opened, reopened, labeled, unlabeled, synchronize] 24 | workflow_dispatch: 25 | 26 | defaults: 27 | run: 28 | shell: bash 29 | 30 | permissions: 31 | contents: read 32 | pull-requests: write 33 | 34 | 35 | jobs: 36 | changelog: 37 | uses: dbt-labs/actions/.github/workflows/changelog-existence.yml@main 38 | with: 39 | changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-bigquery contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md).' 40 | skip_label: 'Skip Changelog' 41 | secrets: inherit # this is only acceptable because we own the action we're calling 42 | -------------------------------------------------------------------------------- /.github/workflows/cut-release-branch.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Calls a centralize3d workflow that will: 3 | # 1. Cut a new branch (generally `*.latest`) 4 | # 2. Also cleans up all files in `.changes/unreleased` and `.changes/previous version on 5 | # `main` and bumps `main` to the input version. 6 | 7 | # **why?** 8 | # Generally reduces the workload of engineers and reduces error. Allow automation. 9 | 10 | # **when?** 11 | # This will run when called manually. 12 | 13 | name: Cut new release branch 14 | 15 | on: 16 | workflow_dispatch: 17 | inputs: 18 | version_to_bump_main: 19 | description: 'The alpha version main should bump to (ex. 1.6.0a1)' 20 | required: true 21 | new_branch_name: 22 | description: 'The full name of the new branch (ex. 1.5.latest)' 23 | required: true 24 | 25 | defaults: 26 | run: 27 | shell: bash 28 | 29 | permissions: 30 | contents: write 31 | 32 | jobs: 33 | cut_branch: 34 | name: "Cut branch and clean up main for dbt-bigquery" 35 | uses: dbt-labs/actions/.github/workflows/cut-release-branch.yml@main 36 | with: 37 | version_to_bump_main: ${{ inputs.version_to_bump_main }} 38 | new_branch_name: ${{ inputs.new_branch_name }} 39 | PR_title: "Cleanup main after cutting new ${{ inputs.new_branch_name }} branch" 40 | PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts." 41 | secrets: 42 | FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }} 43 | -------------------------------------------------------------------------------- /.github/workflows/docs-issue.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed 3 | 4 | # **why?** 5 | # To reduce barriers for keeping docs up to date 6 | 7 | # **when?** 8 | # When an issue is labeled `user docs` and is closed as completed. Can be labeled before or after the issue is closed. 9 | 10 | 11 | name: Open issues in docs.getdbt.com repo when an issue is labeled 12 | run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}" 13 | 14 | on: 15 | issues: 16 | types: [labeled, closed] 17 | 18 | defaults: 19 | run: 20 | shell: bash 21 | 22 | permissions: 23 | issues: write # comments on issues 24 | 25 | jobs: 26 | open_issues: 27 | # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned. 28 | # If this logic does not exist in this workflow, it runs the 29 | # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having 30 | # generating the comment before the other runs. This lives here instead of the shared workflow because this is where we 31 | # decide if it should run or not. 32 | if: | 33 | (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && ( 34 | (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) || 35 | (github.event.action == 'labeled' && github.event.label.name == 'user docs')) 36 | uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main 37 | with: 38 | issue_repository: "dbt-labs/docs.getdbt.com" 39 | issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}" 40 | issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated." 41 | secrets: inherit 42 | -------------------------------------------------------------------------------- /.github/workflows/release-branch-tests.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # The purpose of this workflow is to trigger CI to run for each 3 | # release branch and main branch on a regular cadence. If the CI workflow 4 | # fails for a branch, it will post to dev-core-alerts to raise awareness. 5 | 6 | # **why?** 7 | # Ensures release branches and main are always shippable and not broken. 8 | # Also, can catch any dependencies shifting beneath us that might 9 | # introduce breaking changes (could also impact Cloud). 10 | 11 | # **when?** 12 | # Mainly on a schedule of 9:00, 13:00, 18:00 UTC everyday. 13 | # Manual trigger can also test on demand 14 | 15 | name: Release branch scheduled testing 16 | 17 | on: 18 | schedule: 19 | - cron: '0 9,13,18 * * *' # 9:00, 13:00, 18:00 UTC 20 | 21 | workflow_dispatch: # for manual triggering 22 | 23 | # no special access is needed 24 | permissions: read-all 25 | 26 | jobs: 27 | run_tests: 28 | uses: dbt-labs/actions/.github/workflows/release-branch-tests.yml@main 29 | with: 30 | workflows_to_run: '["main.yml", "integration.yml"]' 31 | include_main: false 32 | secrets: inherit 33 | -------------------------------------------------------------------------------- /.github/workflows/release-internal.yml: -------------------------------------------------------------------------------- 1 | # What? 2 | # 3 | # Tag and release an arbitrary ref. Uploads to an internal archive for further processing. 4 | # 5 | # How? 6 | # 7 | # After checking out and testing the provided ref, the image is built and uploaded. 8 | # 9 | # When? 10 | # 11 | # Manual trigger. 12 | 13 | name: "Release to Cloud" 14 | run-name: "Release to Cloud off of ${{ inputs.ref }}" 15 | 16 | on: 17 | workflow_dispatch: 18 | inputs: 19 | ref: 20 | description: "The ref (sha or branch name) to use" 21 | type: string 22 | default: "main" 23 | required: true 24 | package_test_command: 25 | description: "Package test command" 26 | type: string 27 | default: "python -c \"import dbt.adapters.bigquery\"" 28 | required: true 29 | skip_tests: 30 | description: "Should the tests be skipped? (default to false)" 31 | type: boolean 32 | required: true 33 | default: false 34 | 35 | defaults: 36 | run: 37 | shell: bash 38 | 39 | jobs: 40 | invoke-reusable-workflow: 41 | name: "Build and Release Internally" 42 | 43 | uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main" 44 | 45 | with: 46 | package_test_command: "${{ inputs.package_test_command }}" 47 | dbms_name: "bigquery" 48 | ref: "${{ inputs.ref }}" 49 | skip_tests: "${{ inputs.skip_tests }}" 50 | 51 | secrets: "inherit" 52 | -------------------------------------------------------------------------------- /.github/workflows/repository-cleanup.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Cleanup branches left over from automation and testing. Also cleanup 3 | # draft releases from release testing. 4 | 5 | # **why?** 6 | # The automations are leaving behind branches and releases that clutter 7 | # the repository. Sometimes we need them to debug processes so we don't 8 | # want them immediately deleted. Running on Saturday to avoid running 9 | # at the same time as an actual release to prevent breaking a release 10 | # mid-release. 11 | 12 | # **when?** 13 | # Mainly on a schedule of 12:00 Saturday. 14 | # Manual trigger can also run on demand 15 | 16 | name: Repository Cleanup 17 | 18 | on: 19 | schedule: 20 | - cron: '0 12 * * SAT' # At 12:00 on Saturday - details in `why` above 21 | 22 | workflow_dispatch: # for manual triggering 23 | 24 | permissions: 25 | contents: write 26 | 27 | jobs: 28 | cleanup-repo: 29 | uses: dbt-labs/actions/.github/workflows/repository-cleanup.yml@main 30 | secrets: inherit 31 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: "Close stale issues and PRs" 2 | on: 3 | schedule: 4 | - cron: "30 1 * * *" 5 | 6 | permissions: 7 | issues: write 8 | pull-requests: write 9 | 10 | jobs: 11 | stale: 12 | uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main 13 | -------------------------------------------------------------------------------- /.github/workflows/triage-labels.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # When the core team triages, we sometimes need more information from the issue creator. In 3 | # those cases we remove the `triage` label and add the `awaiting_response` label. Once we 4 | # recieve a response in the form of a comment, we want the `awaiting_response` label removed 5 | # in favor of the `triage` label so we are aware that the issue needs action. 6 | 7 | # **why?** 8 | # To help with out team triage issue tracking 9 | 10 | # **when?** 11 | # This will run when a comment is added to an issue and that issue has to `awaiting_response` label. 12 | 13 | name: Update Triage Label 14 | 15 | on: issue_comment 16 | 17 | defaults: 18 | run: 19 | shell: bash 20 | 21 | permissions: 22 | issues: write 23 | 24 | jobs: 25 | triage_label: 26 | if: contains(github.event.issue.labels.*.name, 'awaiting_response') 27 | uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main 28 | with: 29 | add_label: "triage" 30 | remove_label: "awaiting_response" 31 | secrets: inherit # this is only acceptable because we own the action we're calling 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env*/ 12 | dbt_env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | *.mypy_cache/ 28 | logs/ 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | .env 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | test.env 52 | *.pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | 61 | # Sphinx documentation 62 | docs/_build/ 63 | 64 | # PyBuilder 65 | target/ 66 | 67 | # Ipython Notebook 68 | .ipynb_checkpoints 69 | 70 | # Emacs 71 | *~ 72 | 73 | # Sublime Text 74 | *.sublime-* 75 | 76 | # Vim 77 | *.sw* 78 | 79 | # Pyenv 80 | .python-version 81 | 82 | # Vim 83 | *.sw* 84 | 85 | # pycharm 86 | .idea/ 87 | venv/ 88 | 89 | # AWS credentials 90 | .aws/ 91 | 92 | # MacOS 93 | .DS_Store 94 | 95 | # vscode 96 | .vscode/ 97 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # For more on configuring pre-commit hooks (see https://pre-commit.com/) 2 | default_language_version: 3 | python: python3 4 | 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v4.6.0 8 | hooks: 9 | - id: check-yaml 10 | args: [--unsafe] 11 | - id: check-json 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | - id: check-case-conflict 15 | 16 | - repo: https://github.com/dbt-labs/pre-commit-hooks 17 | rev: v0.1.0a1 18 | hooks: 19 | - id: dbt-core-in-adapters-check 20 | 21 | - repo: https://github.com/psf/black 22 | rev: 24.4.2 23 | hooks: 24 | - id: black 25 | args: 26 | - --line-length=99 27 | - --target-version=py39 28 | - --target-version=py310 29 | - --target-version=py311 30 | - --target-version=py312 31 | additional_dependencies: [flaky] 32 | 33 | - repo: https://github.com/pycqa/flake8 34 | rev: 7.0.0 35 | hooks: 36 | - id: flake8 37 | exclude: tests/ 38 | args: 39 | - --max-line-length=99 40 | - --select=E,F,W 41 | - --ignore=E203,E501,E741,W503,W504 42 | - --per-file-ignores=*/__init__.py:F401 43 | 44 | - repo: https://github.com/pre-commit/mirrors-mypy 45 | rev: v1.10.0 46 | hooks: 47 | - id: mypy 48 | args: 49 | - --explicit-package-bases 50 | - --ignore-missing-imports 51 | - --pretty 52 | - --show-error-codes 53 | - --warn-unused-ignores 54 | files: ^dbt/adapters/bigquery 55 | additional_dependencies: 56 | - types-protobuf 57 | - types-pytz 58 | - types-requests 59 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # dbt-bigquery Changelog 2 | 3 | - This file provides a full account of all changes to `dbt-bigquery`. 4 | - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases. 5 | - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. 6 | - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) 7 | 8 | ## Previous Releases 9 | For information on prior major and minor releases, see their changelogs: 10 | - [1.6](https://github.com/dbt-labs/dbt-bigquery/blob/1.6.latest/CHANGELOG.md) 11 | - [1.5](https://github.com/dbt-labs/dbt-bigquery/blob/1.5.latest/CHANGELOG.md) 12 | - [1.4](https://github.com/dbt-labs/dbt-bigquery/blob/1.4.latest/CHANGELOG.md) 13 | - [1.3](https://github.com/dbt-labs/dbt-bigquery/blob/1.3.latest/CHANGELOG.md) 14 | - [1.2](https://github.com/dbt-labs/dbt-bigquery/blob/1.2.latest/CHANGELOG.md) 15 | - [1.1](https://github.com/dbt-labs/dbt-bigquery/blob/1.1.latest/CHANGELOG.md) 16 | - [1.0](https://github.com/dbt-labs/dbt-bigquery/blob/1.0.latest/CHANGELOG.md) 17 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `dbt-bigquery` 2 | 3 | This repository has moved into [dbt-labs/dbt-adapters](https://www.github.com/dbt-labs/dbt-adapters). 4 | Please refer to that repo for a guide on how to contribute to `dbt-bigquery`. 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | dbt logo 3 |

4 | 5 | This repository has moved into [dbt-labs/dbt-adapters](https://www.github.com/dbt-labs/dbt-adapters). 6 | Please refer to that repo for information about `dbt-bigquery`. 7 | -------------------------------------------------------------------------------- /dbt/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) 4 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.bigquery.column import BigQueryColumn 2 | from dbt.adapters.bigquery.connections import BigQueryConnectionManager 3 | from dbt.adapters.bigquery.credentials import BigQueryCredentials 4 | from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig 5 | from dbt.adapters.bigquery.relation import BigQueryRelation 6 | 7 | from dbt.adapters.base import AdapterPlugin 8 | from dbt.include import bigquery 9 | 10 | Plugin = AdapterPlugin( 11 | adapter=BigQueryAdapter, credentials=BigQueryCredentials, include_path=bigquery.PACKAGE_PATH 12 | ) 13 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/__version__.py: -------------------------------------------------------------------------------- 1 | version = "1.10.0a1" 2 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/clients.py: -------------------------------------------------------------------------------- 1 | from google.api_core.client_info import ClientInfo 2 | from google.api_core.client_options import ClientOptions 3 | from google.auth.exceptions import DefaultCredentialsError 4 | from google.cloud.bigquery import Client as BigQueryClient, DEFAULT_RETRY as BQ_DEFAULT_RETRY 5 | from google.cloud.dataproc_v1 import BatchControllerClient, JobControllerClient 6 | from google.cloud.storage import Client as StorageClient 7 | from google.cloud.storage.retry import DEFAULT_RETRY as GCS_DEFAULT_RETRY 8 | 9 | from dbt.adapters.events.logging import AdapterLogger 10 | 11 | import dbt.adapters.bigquery.__version__ as dbt_version 12 | from dbt.adapters.bigquery.credentials import ( 13 | BigQueryCredentials, 14 | create_google_credentials, 15 | set_default_credentials, 16 | ) 17 | 18 | 19 | _logger = AdapterLogger("BigQuery") 20 | 21 | 22 | def create_bigquery_client(credentials: BigQueryCredentials) -> BigQueryClient: 23 | try: 24 | return _create_bigquery_client(credentials) 25 | except DefaultCredentialsError: 26 | _logger.info("Please log into GCP to continue") 27 | set_default_credentials() 28 | return _create_bigquery_client(credentials) 29 | 30 | 31 | @GCS_DEFAULT_RETRY 32 | def create_gcs_client(credentials: BigQueryCredentials) -> StorageClient: 33 | return StorageClient( 34 | project=credentials.execution_project, 35 | credentials=create_google_credentials(credentials), 36 | ) 37 | 38 | 39 | # dataproc does not appear to have a default retry like BQ and GCS 40 | def create_dataproc_job_controller_client(credentials: BigQueryCredentials) -> JobControllerClient: 41 | return JobControllerClient( 42 | credentials=create_google_credentials(credentials), 43 | client_options=ClientOptions(api_endpoint=_dataproc_endpoint(credentials)), 44 | ) 45 | 46 | 47 | # dataproc does not appear to have a default retry like BQ and GCS 48 | def create_dataproc_batch_controller_client( 49 | credentials: BigQueryCredentials, 50 | ) -> BatchControllerClient: 51 | return BatchControllerClient( 52 | credentials=create_google_credentials(credentials), 53 | client_options=ClientOptions(api_endpoint=_dataproc_endpoint(credentials)), 54 | ) 55 | 56 | 57 | @BQ_DEFAULT_RETRY 58 | def _create_bigquery_client(credentials: BigQueryCredentials) -> BigQueryClient: 59 | return BigQueryClient( 60 | credentials.execution_project, 61 | create_google_credentials(credentials), 62 | location=getattr(credentials, "location", None), 63 | client_info=ClientInfo(user_agent=f"dbt-bigquery-{dbt_version.version}"), 64 | client_options=ClientOptions(quota_project_id=credentials.quota_project), 65 | ) 66 | 67 | 68 | def _dataproc_endpoint(credentials: BigQueryCredentials) -> str: 69 | return f"{credentials.dataproc_region}-dataproc.googleapis.com:443" 70 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from google.cloud.bigquery import AccessEntry, Dataset 4 | 5 | from dbt.adapters.events.logging import AdapterLogger 6 | 7 | 8 | logger = AdapterLogger("BigQuery") 9 | 10 | 11 | def is_access_entry_in_dataset(dataset: Dataset, access_entry: AccessEntry) -> bool: 12 | """Check if the access entry already exists in the dataset. 13 | 14 | Args: 15 | dataset (Dataset): the dataset to be updated 16 | access_entry (AccessEntry): the access entry to be added to the dataset 17 | 18 | Returns: 19 | bool: True if entry exists in dataset, False otherwise 20 | """ 21 | access_entries: List[AccessEntry] = dataset.access_entries 22 | # we can't simply check if an access entry is in the list as the current equality check 23 | # does not work because the locally created AccessEntry can have extra properties. 24 | for existing_entry in access_entries: 25 | role_match = existing_entry.role == access_entry.role 26 | entity_type_match = existing_entry.entity_type == access_entry.entity_type 27 | property_match = existing_entry._properties.items() <= access_entry._properties.items() 28 | if role_match and entity_type_match and property_match: 29 | return True 30 | return False 31 | 32 | 33 | def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset: 34 | """Adds an access entry to a dataset, always use access_entry_present_in_dataset to check 35 | if the access entry already exists before calling this function. 36 | 37 | Args: 38 | dataset (Dataset): the dataset to be updated 39 | access_entry (AccessEntry): the access entry to be added to the dataset 40 | 41 | Returns: 42 | Dataset: the updated dataset 43 | """ 44 | access_entries: List[AccessEntry] = dataset.access_entries 45 | access_entries.append(access_entry) 46 | dataset.access_entries = access_entries 47 | return dataset 48 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/relation_configs/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig 2 | from dbt.adapters.bigquery.relation_configs._cluster import ( 3 | BigQueryClusterConfig, 4 | BigQueryClusterConfigChange, 5 | ) 6 | from dbt.adapters.bigquery.relation_configs._materialized_view import ( 7 | BigQueryMaterializedViewConfig, 8 | BigQueryMaterializedViewConfigChangeset, 9 | ) 10 | from dbt.adapters.bigquery.relation_configs._options import ( 11 | BigQueryOptionsConfig, 12 | BigQueryOptionsConfigChange, 13 | ) 14 | from dbt.adapters.bigquery.relation_configs._partition import ( 15 | PartitionConfig, 16 | BigQueryPartitionConfigChange, 17 | ) 18 | from dbt.adapters.bigquery.relation_configs._policies import ( 19 | BigQueryIncludePolicy, 20 | BigQueryQuotePolicy, 21 | ) 22 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/relation_configs/_base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional, Dict, TYPE_CHECKING 3 | 4 | from dbt.adapters.base.relation import Policy 5 | from dbt.adapters.relation_configs import RelationConfigBase 6 | from google.cloud.bigquery import Table as BigQueryTable 7 | from typing_extensions import Self 8 | 9 | from dbt.adapters.bigquery.relation_configs._policies import ( 10 | BigQueryIncludePolicy, 11 | BigQueryQuotePolicy, 12 | ) 13 | from dbt.adapters.contracts.relation import ComponentName, RelationConfig 14 | 15 | if TYPE_CHECKING: 16 | # Indirectly imported via agate_helper, which is lazy loaded further downfile. 17 | # Used by mypy for earlier type hints. 18 | import agate 19 | 20 | 21 | @dataclass(frozen=True, eq=True, unsafe_hash=True) 22 | class BigQueryBaseRelationConfig(RelationConfigBase): 23 | @classmethod 24 | def include_policy(cls) -> Policy: 25 | return BigQueryIncludePolicy() 26 | 27 | @classmethod 28 | def quote_policy(cls) -> Policy: 29 | return BigQueryQuotePolicy() 30 | 31 | @classmethod 32 | def from_relation_config(cls, relation_config: RelationConfig) -> Self: 33 | relation_config_dict = cls.parse_relation_config(relation_config) 34 | relation = cls.from_dict(relation_config_dict) 35 | return relation 36 | 37 | @classmethod 38 | def parse_relation_config(cls, relation_config: RelationConfig) -> Dict: 39 | raise NotImplementedError( 40 | "`parse_model_node()` needs to be implemented on this RelationConfigBase instance" 41 | ) 42 | 43 | @classmethod 44 | def from_bq_table(cls, table: BigQueryTable) -> Self: 45 | relation_config = cls.parse_bq_table(table) 46 | relation = cls.from_dict(relation_config) 47 | return relation 48 | 49 | @classmethod 50 | def parse_bq_table(cls, table: BigQueryTable) -> Dict: 51 | raise NotImplementedError("`parse_bq_table()` is not implemented for this relation type") 52 | 53 | @classmethod 54 | def _render_part(cls, component: ComponentName, value: Optional[str]) -> Optional[str]: 55 | if cls.include_policy().get_part(component) and value: 56 | if cls.quote_policy().get_part(component): 57 | return f'"{value}"' 58 | return value.lower() 59 | return None 60 | 61 | @classmethod 62 | def _get_first_row(cls, results: "agate.Table") -> "agate.Row": 63 | try: 64 | return results.rows[0] 65 | except IndexError: 66 | import agate 67 | 68 | return agate.Row(values=set()) 69 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/relation_configs/_cluster.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, Dict, FrozenSet, Optional 3 | 4 | from dbt.adapters.relation_configs import RelationConfigChange 5 | from dbt.adapters.contracts.relation import RelationConfig 6 | from google.cloud.bigquery import Table as BigQueryTable 7 | from typing_extensions import Self 8 | 9 | from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig 10 | 11 | 12 | @dataclass(frozen=True, eq=True, unsafe_hash=True) 13 | class BigQueryClusterConfig(BigQueryBaseRelationConfig): 14 | """ 15 | This config manages table options supporting clustering. See the following for more information: 16 | - https://docs.getdbt.com/reference/resource-configs/bigquery-configs#using-table-partitioning-and-clustering 17 | - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#clustering_column_list 18 | 19 | - fields: set of columns to cluster on 20 | - Note: can contain up to four columns 21 | """ 22 | 23 | fields: FrozenSet[str] 24 | 25 | @classmethod 26 | def from_dict(cls, config_dict: Dict[str, Any]) -> Self: 27 | kwargs_dict = {"fields": config_dict.get("fields")} 28 | return super().from_dict(kwargs_dict) 29 | 30 | @classmethod 31 | def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any]: 32 | config_dict = {} 33 | 34 | if cluster_by := relation_config.config.extra.get("cluster_by"): 35 | # users may input a single field as a string 36 | if isinstance(cluster_by, str): 37 | cluster_by = [cluster_by] 38 | config_dict.update({"fields": frozenset(cluster_by)}) 39 | 40 | return config_dict 41 | 42 | @classmethod 43 | def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: 44 | config_dict = {"fields": frozenset(table.clustering_fields)} 45 | return config_dict 46 | 47 | 48 | @dataclass(frozen=True, eq=True, unsafe_hash=True) 49 | class BigQueryClusterConfigChange(RelationConfigChange): 50 | context: Optional[BigQueryClusterConfig] 51 | 52 | @property 53 | def requires_full_refresh(self) -> bool: 54 | return True 55 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/relation_configs/_policies.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from dbt.adapters.base.relation import Policy 4 | 5 | 6 | class BigQueryIncludePolicy(Policy): 7 | database: bool = True 8 | schema: bool = True 9 | identifier: bool = True 10 | 11 | 12 | @dataclass 13 | class BigQueryQuotePolicy(Policy): 14 | database: bool = True 15 | schema: bool = True 16 | identifier: bool = True 17 | -------------------------------------------------------------------------------- /dbt/adapters/bigquery/utility.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Optional 3 | 4 | import dbt_common.exceptions 5 | 6 | 7 | def bool_setting(value: Optional[Any] = None) -> Optional[bool]: 8 | if value is None: 9 | return None 10 | elif isinstance(value, bool): 11 | return value 12 | elif isinstance(value, str): 13 | # don't do bool(value) as that is equivalent to: len(value) > 0 14 | if value.lower() == "true": 15 | return True 16 | elif value.lower() == "false": 17 | return False 18 | else: 19 | raise ValueError( 20 | f"Invalid input, " 21 | f"expecting `bool` or `str` ex. (True, False, 'true', 'False'), received: {value}" 22 | ) 23 | else: 24 | raise TypeError( 25 | f"Invalid type for bool evaluation, " 26 | f"expecting `bool` or `str`, received: {type(value)}" 27 | ) 28 | 29 | 30 | def float_setting(value: Optional[Any] = None) -> Optional[float]: 31 | if value is None: 32 | return None 33 | elif any(isinstance(value, i) for i in [int, float, str]): 34 | return float(value) 35 | else: 36 | raise TypeError( 37 | f"Invalid type for float evaluation, " 38 | f"expecting `int`, `float`, or `str`, received: {type(value)}" 39 | ) 40 | 41 | 42 | def sql_escape(string): 43 | if not isinstance(string, str): 44 | raise dbt_common.exceptions.CompilationError(f"cannot escape a non-string: {string}") 45 | return json.dumps(string)[1:-1] 46 | -------------------------------------------------------------------------------- /dbt/include/bigquery/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PACKAGE_PATH = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/dbt_project.yml: -------------------------------------------------------------------------------- 1 | config-version: 2 2 | name: dbt_bigquery 3 | version: 1.0 4 | 5 | macro-paths: ["macros"] 6 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/adapters/apply_grants.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_show_grant_sql(relation) %} 2 | {% set location = adapter.get_dataset_location(relation) %} 3 | {% set relation = relation.incorporate(location=location) %} 4 | 5 | select privilege_type, grantee 6 | from {{ relation.information_schema("OBJECT_PRIVILEGES") }} 7 | where object_schema = "{{ relation.dataset }}" 8 | and object_name = "{{ relation.identifier }}" 9 | -- filter out current user 10 | and split(grantee, ':')[offset(1)] != session_user() 11 | {% endmacro %} 12 | 13 | 14 | {%- macro bigquery__get_grant_sql(relation, privilege, grantee) -%} 15 | grant `{{ privilege }}` on {{ relation.type }} {{ relation }} to {{ '\"' + grantee|join('\", \"') + '\"' }} 16 | {%- endmacro -%} 17 | 18 | {%- macro bigquery__get_revoke_sql(relation, privilege, grantee) -%} 19 | revoke `{{ privilege }}` on {{ relation.type }} {{ relation }} from {{ '\"' + grantee|join('\", \"') + '\"' }} 20 | {%- endmacro -%} 21 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/adapters/columns.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_empty_subquery_sql(select_sql, select_sql_header=none) %} 2 | {%- if select_sql_header is not none -%} 3 | {{ select_sql_header }} 4 | {%- endif -%} 5 | select * from ( 6 | {{ select_sql }} 7 | ) as __dbt_sbq 8 | where false and current_timestamp() = current_timestamp() 9 | limit 0 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/catalog/by_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_catalog_relations(information_schema, relations) -%} 2 | 3 | {%- if (relations | length) == 0 -%} 4 | {# Hopefully nothing cares about the columns we return when there are no rows #} 5 | {%- set query = "select 1 as id limit 0" -%} 6 | 7 | {%- else -%} 8 | {%- set query -%} 9 | with 10 | table_shards_stage as ({{ _bigquery__get_table_shards_sql(information_schema) }}), 11 | table_shards as ( 12 | select * from table_shards_stage 13 | where ( 14 | {%- for relation in relations -%} 15 | ( 16 | upper(table_schema) = upper('{{ relation.schema }}') 17 | and upper(table_name) = upper('{{ relation.identifier }}') 18 | ) 19 | {%- if not loop.last %} or {% endif -%} 20 | {%- endfor -%} 21 | ) 22 | ), 23 | tables as ({{ _bigquery__get_tables_sql() }}), 24 | table_stats as ({{ _bigquery__get_table_stats_sql() }}), 25 | 26 | columns as ({{ _bigquery__get_columns_sql(information_schema) }}), 27 | column_stats as ({{ _bigquery__get_column_stats_sql() }}) 28 | 29 | {{ _bigquery__get_extended_catalog_sql() }} 30 | {%- endset -%} 31 | 32 | {%- endif -%} 33 | 34 | {{ return(run_query(query)) }} 35 | 36 | {%- endmacro %} 37 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/catalog/by_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_catalog(information_schema, schemas) -%} 2 | 3 | {%- if (schemas | length) == 0 -%} 4 | {# Hopefully nothing cares about the columns we return when there are no rows #} 5 | {%- set query = "select 1 as id limit 0" -%} 6 | 7 | {%- else -%} 8 | {%- set query -%} 9 | with 10 | table_shards as ( 11 | {{ _bigquery__get_table_shards_sql(information_schema) }} 12 | where ( 13 | {%- for schema in schemas -%} 14 | upper(tables.dataset_id) = upper('{{ schema }}') 15 | {%- if not loop.last %} or {% endif -%} 16 | {%- endfor -%} 17 | ) 18 | ), 19 | tables as ({{ _bigquery__get_tables_sql() }}), 20 | table_stats as ({{ _bigquery__get_table_stats_sql() }}), 21 | 22 | columns as ({{ _bigquery__get_columns_sql(information_schema) }}), 23 | column_stats as ({{ _bigquery__get_column_stats_sql() }}) 24 | 25 | {{ _bigquery__get_extended_catalog_sql() }} 26 | {%- endset -%} 27 | 28 | {%- endif -%} 29 | 30 | {{ return(run_query(query)) }} 31 | 32 | {%- endmacro %} 33 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/etc.sql: -------------------------------------------------------------------------------- 1 | {% macro date_sharded_table(base_name) %} 2 | {{ return(base_name ~ "[DBT__PARTITION_DATE]") }} 3 | {% endmacro %} 4 | 5 | {% macro grant_access_to(entity, entity_type, role, grant_target_dict) -%} 6 | {% do adapter.grant_access_to(entity, entity_type, role, grant_target_dict) %} 7 | {% endmacro %} 8 | 9 | {%- macro get_partitions_metadata(table) -%} 10 | {%- if execute -%} 11 | {%- set res = adapter.get_partitions_metadata(table) -%} 12 | {{- return(res) -}} 13 | {%- endif -%} 14 | {{- return(None) -}} 15 | {%- endmacro -%} 16 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/clone.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__can_clone_table() %} 2 | {{ return(True) }} 3 | {% endmacro %} 4 | 5 | {% macro bigquery__create_or_replace_clone(this_relation, defer_relation) %} 6 | create or replace 7 | table {{ this_relation }} 8 | clone {{ defer_relation }} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/copy.sql: -------------------------------------------------------------------------------- 1 | {% materialization copy, adapter='bigquery' -%} 2 | 3 | {# Setup #} 4 | {{ run_hooks(pre_hooks) }} 5 | 6 | {% set destination = this.incorporate(type='table') %} 7 | 8 | {# there can be several ref() or source() according to BQ copy API docs #} 9 | {# cycle over ref() and source() to create source tables array #} 10 | {% set source_array = [] %} 11 | {% for ref_table in model.refs %} 12 | {{ source_array.append(ref(ref_table.get('package'), ref_table.name, version=ref_table.get('version'))) }} 13 | {% endfor %} 14 | 15 | {% for src_table in model.sources %} 16 | {{ source_array.append(source(*src_table)) }} 17 | {% endfor %} 18 | 19 | {# Call adapter copy_table function #} 20 | {%- set result_str = adapter.copy_table( 21 | source_array, 22 | destination, 23 | config.get('copy_materialization', default = 'table')) -%} 24 | 25 | {{ store_result('main', response=result_str) }} 26 | 27 | {# Clean up #} 28 | {{ run_hooks(post_hooks) }} 29 | {%- do apply_grants(target_relation, grant_config) -%} 30 | {{ adapter.commit() }} 31 | 32 | {{ return({'relations': [destination]}) }} 33 | {%- endmaterialization %} 34 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql: -------------------------------------------------------------------------------- 1 | {% macro declare_dbt_max_partition(relation, partition_by, compiled_code, language='sql') %} 2 | 3 | {#-- TODO: revisit partitioning with python models --#} 4 | {%- if '_dbt_max_partition' in compiled_code and language == 'sql' -%} 5 | 6 | declare _dbt_max_partition {{ partition_by.data_type_for_partition() }} default ( 7 | select max({{ partition_by.field }}) from {{ this }} 8 | where {{ partition_by.field }} is not null 9 | ); 10 | 11 | {%- endif -%} 12 | 13 | {% endmacro %} 14 | 15 | {% macro predicate_for_avoid_require_partition_filter(target='DBT_INTERNAL_DEST') %} 16 | 17 | {%- set raw_partition_by = config.get('partition_by', none) -%} 18 | {%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%} 19 | {%- set predicate = none -%} 20 | 21 | {% if partition_config and config.get('require_partition_filter') -%} 22 | {%- set partition_field = partition_config.time_partitioning_field() if partition_config.time_ingestion_partitioning else partition_config.field -%} 23 | {% set predicate %} 24 | ( 25 | `{{ target }}`.`{{ partition_field }}` is null 26 | or `{{ target }}`.`{{ partition_field }}` is not null 27 | ) 28 | {% endset %} 29 | {%- endif -%} 30 | 31 | {{ return(predicate) }} 32 | 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql: -------------------------------------------------------------------------------- 1 | {% macro bq_generate_incremental_merge_build_sql( 2 | tmp_relation, target_relation, sql, unique_key, partition_by, dest_columns, tmp_relation_exists, incremental_predicates 3 | ) %} 4 | {%- set source_sql -%} 5 | {%- if tmp_relation_exists -%} 6 | ( 7 | select 8 | {% if partition_by.time_ingestion_partitioning -%} 9 | {{ partition_by.insertable_time_partitioning_field() }}, 10 | {%- endif -%} 11 | * from {{ tmp_relation }} 12 | ) 13 | {%- else -%} {#-- wrap sql in parens to make it a subquery --#} 14 | ( 15 | {%- if partition_by.time_ingestion_partitioning -%} 16 | {{ wrap_with_time_ingestion_partitioning_sql(partition_by, sql, True) }} 17 | {%- else -%} 18 | {{sql}} 19 | {%- endif %} 20 | ) 21 | {%- endif -%} 22 | {%- endset -%} 23 | 24 | {%- set predicates = [] if incremental_predicates is none else [] + incremental_predicates -%} 25 | {%- set avoid_require_partition_filter = predicate_for_avoid_require_partition_filter() -%} 26 | {%- if avoid_require_partition_filter is not none -%} 27 | {% do predicates.append(avoid_require_partition_filter) %} 28 | {%- endif -%} 29 | 30 | {% set build_sql = get_merge_sql(target_relation, source_sql, unique_key, dest_columns, predicates) %} 31 | 32 | {{ return(build_sql) }} 33 | 34 | {% endmacro %} 35 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql: -------------------------------------------------------------------------------- 1 | {% macro bq_validate_microbatch_config(config) %} 2 | {% if config.get("partition_by") is none %} 3 | {% set missing_partition_msg -%} 4 | The 'microbatch' strategy requires a `partition_by` config. 5 | {%- endset %} 6 | {% do exceptions.raise_compiler_error(missing_partition_msg) %} 7 | {% endif %} 8 | 9 | {% if config.get("partition_by").granularity != config.get('batch_size') %} 10 | {% set invalid_partition_by_granularity_msg -%} 11 | The 'microbatch' strategy requires a `partition_by` config with the same granularity as its configured `batch_size`. 12 | Got: 13 | `batch_size`: {{ config.get('batch_size') }} 14 | `partition_by.granularity`: {{ config.get("partition_by").granularity }} 15 | {%- endset %} 16 | {% do exceptions.raise_compiler_error(invalid_partition_by_granularity_msg) %} 17 | {% endif %} 18 | {% endmacro %} 19 | 20 | {% macro bq_generate_microbatch_build_sql( 21 | tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions 22 | ) %} 23 | {% set build_sql = bq_insert_overwrite_sql( 24 | tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions 25 | ) %} 26 | 27 | {{ return(build_sql) }} 28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/incremental_strategy/time_ingestion_tables.sql: -------------------------------------------------------------------------------- 1 | {% macro wrap_with_time_ingestion_partitioning_sql(partition_by, sql, is_nested) %} 2 | 3 | select TIMESTAMP({{ partition_by.field }}) as {{ partition_by.insertable_time_partitioning_field() }}, * EXCEPT({{ partition_by.field }}) from ( 4 | {{ sql }} 5 | ){%- if not is_nested -%};{%- endif -%} 6 | 7 | {% endmacro %} 8 | 9 | {% macro get_quoted_with_types_csv(columns) %} 10 | {% set quoted = [] %} 11 | {% for col in columns -%} 12 | {%- do quoted.append(adapter.quote(col.name) ~ " " ~ col.data_type) -%} 13 | {%- endfor %} 14 | {%- set dest_cols_csv = quoted | join(', ') -%} 15 | {{ return(dest_cols_csv) }} 16 | 17 | {% endmacro %} 18 | 19 | {% macro columns_without_partition_fields_csv(partition_config, columns) -%} 20 | {%- set columns_no_partition = partition_config.reject_partition_field_column(columns) -%} 21 | {% set columns_names = get_quoted_with_types_csv(columns_no_partition) %} 22 | {{ return(columns_names) }} 23 | 24 | {%- endmacro -%} 25 | 26 | {% macro bq_insert_into_ingestion_time_partitioned_table_sql(target_relation, sql) -%} 27 | {%- set sql_header = config.get('sql_header', none) -%} 28 | {{ sql_header if sql_header is not none }} 29 | {%- set raw_partition_by = config.get('partition_by', none) -%} 30 | {%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%} 31 | {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} 32 | {%- set dest_columns_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} 33 | 34 | insert into {{ target_relation }} ({{ partition_by.insertable_time_partitioning_field() }}, {{ dest_columns_csv }}) 35 | {{ wrap_with_time_ingestion_partitioning_sql(partition_by, sql, False) }} 36 | 37 | {%- endmacro -%} 38 | 39 | {% macro get_columns_with_types_in_query_sql(select_sql) %} 40 | {% set sql %} 41 | {%- set sql_header = config.get('sql_header', none) -%} 42 | {{ sql_header if sql_header is not none }} 43 | select * from ( 44 | {{ select_sql }} 45 | ) as __dbt_sbq 46 | where false 47 | limit 0 48 | {% endset %} 49 | {{ return(adapter.get_columns_in_select_sql(sql)) }} 50 | {% endmacro %} 51 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/seed.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro bigquery__create_csv_table(model, agate_table) %} 3 | -- no-op 4 | {% endmacro %} 5 | 6 | {% macro bigquery__reset_csv_table(model, full_refresh, old_relation, agate_table) %} 7 | {{ adapter.drop_relation(old_relation) }} 8 | {% endmacro %} 9 | 10 | {% macro bigquery__load_csv_rows(model, agate_table) %} 11 | 12 | {%- set column_override = model['config'].get('column_types', {}) -%} 13 | {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], 14 | agate_table, column_override, model['config']['delimiter']) }} 15 | 16 | {% call statement() %} 17 | alter table {{ this.render() }} set {{ bigquery_table_options(config, model) }} 18 | {% endcall %} 19 | 20 | {% if config.persist_relation_docs() and 'description' in model %} 21 | 22 | {{ adapter.update_table_description(model['database'], model['schema'], model['alias'], model['description']) }} 23 | {% endif %} 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/snapshot.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__snapshot_hash_arguments(args) -%} 2 | to_hex(md5(concat({%- for arg in args -%} 3 | coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif -%} 4 | {%- endfor -%} 5 | ))) 6 | {%- endmacro %} 7 | 8 | {% macro bigquery__create_columns(relation, columns) %} 9 | {{ adapter.alter_table_add_columns(relation, columns) }} 10 | {% endmacro %} 11 | 12 | {% macro bigquery__post_snapshot(staging_relation) %} 13 | -- Clean up the snapshot temp table 14 | {% do drop_relation(staging_relation) %} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/materializations/view.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro bigquery__handle_existing_table(full_refresh, old_relation) %} 3 | {%- if full_refresh -%} 4 | {{ adapter.drop_relation(old_relation) }} 5 | {%- else -%} 6 | {{ exceptions.relation_wrong_type(old_relation, 'view') }} 7 | {%- endif -%} 8 | {% endmacro %} 9 | 10 | 11 | {% materialization view, adapter='bigquery' -%} 12 | -- grab current tables grants config for comparision later on 13 | {% set grant_config = config.get('grants') %} 14 | 15 | {% set to_return = bigquery__create_or_replace_view() %} 16 | 17 | {% set target_relation = this.incorporate(type='view') %} 18 | 19 | {% do persist_docs(target_relation, model) %} 20 | 21 | {% if config.get('grant_access_to') %} 22 | {% for grant_target_dict in config.get('grant_access_to') %} 23 | {% do adapter.grant_access_to(this, 'view', None, grant_target_dict) %} 24 | {% endfor %} 25 | {% endif %} 26 | 27 | {% do return(to_return) %} 28 | 29 | {%- endmaterialization %} 30 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/python_model/python.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__resolve_model_name(input_model_name) -%} 2 | {{ input_model_name | string | replace('`', '') | replace('"', '\"') }} 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/cluster.sql: -------------------------------------------------------------------------------- 1 | {% macro cluster_by(raw_cluster_by) %} 2 | {%- if raw_cluster_by is not none -%} 3 | cluster by {% if raw_cluster_by is string -%} 4 | {% set raw_cluster_by = [raw_cluster_by] %} 5 | {%- endif -%} 6 | {%- for cluster in raw_cluster_by -%} 7 | {{ cluster }} 8 | {%- if not loop.last -%}, {% endif -%} 9 | {%- endfor -%} 10 | 11 | {% endif %} 12 | 13 | {%- endmacro -%} 14 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/drop.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__drop_relation(relation) -%} 2 | {% do adapter.drop_relation(relation) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/materialized_view/alter.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_alter_materialized_view_as_sql( 2 | relation, 3 | configuration_changes, 4 | sql, 5 | existing_relation, 6 | backup_relation, 7 | intermediate_relation 8 | ) %} 9 | 10 | {% if configuration_changes.requires_full_refresh %} 11 | {{ get_replace_sql(existing_relation, relation, sql) }} 12 | {% else %} 13 | 14 | alter materialized view {{ relation }} 15 | set {{ bigquery_options(configuration_changes.options.context.as_ddl_dict()) }} 16 | 17 | {%- endif %} 18 | 19 | {% endmacro %} 20 | 21 | {% macro bigquery__get_materialized_view_configuration_changes(existing_relation, new_config) %} 22 | {% set _existing_materialized_view = adapter.describe_relation(existing_relation) %} 23 | {% set _configuration_changes = existing_relation.materialized_view_config_changeset(_existing_materialized_view, new_config.model) %} 24 | {% do return(_configuration_changes) %} 25 | {% endmacro %} 26 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/materialized_view/create.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_create_materialized_view_as_sql(relation, sql) %} 2 | 3 | {%- set materialized_view = adapter.Relation.materialized_view_from_relation_config(config.model) -%} 4 | 5 | create materialized view if not exists {{ relation }} 6 | {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} 7 | {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} 8 | {{ bigquery_options(materialized_view.options.as_ddl_dict()) }} 9 | as {{ sql }} 10 | 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/materialized_view/drop.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__drop_materialized_view(relation) %} 2 | drop materialized view if exists {{ relation }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/materialized_view/refresh.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__refresh_materialized_view(relation) %} 2 | call bq.refresh_materialized_view('{{ relation.database }}.{{ relation.schema }}.{{ relation.identifier }}') 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/materialized_view/replace.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_replace_materialized_view_as_sql(relation, sql) %} 2 | 3 | {%- set materialized_view = adapter.Relation.materialized_view_from_relation_config(config.model) -%} 4 | 5 | create or replace materialized view if not exists {{ relation }} 6 | {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} 7 | {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} 8 | {{ bigquery_options(materialized_view.options.as_ddl_dict()) }} 9 | as {{ sql }} 10 | 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/options.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery_options(opts) %} 2 | {% set options -%} 3 | OPTIONS({% for opt_key, opt_val in opts.items() %} 4 | {{ opt_key }}={{ opt_val }}{{ "," if not loop.last }} 5 | {% endfor %}) 6 | {%- endset %} 7 | {%- do return(options) -%} 8 | {%- endmacro -%} 9 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/partition.sql: -------------------------------------------------------------------------------- 1 | {% macro partition_by(partition_config) -%} 2 | {%- if partition_config is none -%} 3 | {% do return('') %} 4 | {%- elif partition_config.time_ingestion_partitioning -%} 5 | partition by {{ partition_config.render_wrapped() }} 6 | {%- elif partition_config.data_type | lower in ('date','timestamp','datetime') -%} 7 | partition by {{ partition_config.render() }} 8 | {%- elif partition_config.data_type | lower in ('int64') -%} 9 | {%- set range = partition_config.range -%} 10 | partition by range_bucket( 11 | {{ partition_config.field }}, 12 | generate_array({{ range.start}}, {{ range.end }}, {{ range.interval }}) 13 | ) 14 | {%- endif -%} 15 | {%- endmacro -%} 16 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/rename.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__rename_relation(from_relation, to_relation) -%} 2 | {% do adapter.rename_relation(from_relation, to_relation) %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/table/drop.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__drop_table(relation) %} 2 | drop table if exists {{ relation }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/table/options.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery_table_options(config, node, temporary) %} 2 | {% set opts = adapter.get_table_options(config, node, temporary) %} 3 | {%- do return(bigquery_options(opts)) -%} 4 | {%- endmacro -%} 5 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/table/rename.sql: -------------------------------------------------------------------------------- 1 | {%- macro bigquery__get_rename_table_sql(relation, new_name) -%} 2 | alter table {{ relation }} rename to {{ new_name }} 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/view/drop.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__drop_view(relation) %} 2 | drop view if exists {{ relation }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/view/options.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery_view_options(config, node) %} 2 | {% set opts = adapter.get_view_options(config, node) %} 3 | {%- do return(bigquery_options(opts)) -%} 4 | {%- endmacro -%} 5 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/view/rename.sql: -------------------------------------------------------------------------------- 1 | {%- macro bigquery__get_rename_view_sql(relation, new_name) -%} 2 | alter view {{ relation }} rename to {{ new_name }} 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/relations/view/replace.sql: -------------------------------------------------------------------------------- 1 | /* {# 2 | Core materialization implementation. BigQuery and Snowflake are similar 3 | because both can use `create or replace view` where the resulting view's columns 4 | are not necessarily the same as those of the existing view. On Redshift, this would 5 | result in: ERROR: cannot change number of columns in view 6 | 7 | This implementation is superior to the create_temp, swap_with_existing, drop_old 8 | paradigm because transactions don't run DDL queries atomically on Snowflake. By using 9 | `create or replace view`, the materialization becomes atomic in nature. 10 | #} */ 11 | 12 | {% macro bigquery__create_or_replace_view() %} 13 | {%- set identifier = model['alias'] -%} 14 | 15 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 16 | {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} 17 | 18 | {%- set target_relation = api.Relation.create( 19 | identifier=identifier, schema=schema, database=database, 20 | type='view') -%} 21 | {% set grant_config = config.get('grants') %} 22 | 23 | {{ run_hooks(pre_hooks) }} 24 | 25 | -- If there's a table with the same name and we weren't told to full refresh, 26 | -- that's an error. If we were told to full refresh, drop it. This behavior differs 27 | -- for Snowflake and BigQuery, so multiple dispatch is used. 28 | {%- if old_relation is not none and not old_relation.is_view -%} 29 | {{ handle_existing_table(should_full_refresh(), old_relation) }} 30 | {%- endif -%} 31 | 32 | -- build model 33 | {% call statement('main') -%} 34 | {{ get_create_view_as_sql(target_relation, sql) }} 35 | {%- endcall %} 36 | 37 | {% set should_revoke = should_revoke(exists_as_view, full_refresh_mode=True) %} 38 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 39 | 40 | {{ run_hooks(post_hooks) }} 41 | 42 | {{ return({'relations': [target_relation]}) }} 43 | 44 | {% endmacro %} 45 | 46 | 47 | {% macro handle_existing_table(full_refresh, old_relation) %} 48 | {{ adapter.dispatch('handle_existing_table', 'dbt')(full_refresh, old_relation) }} 49 | {% endmacro %} 50 | 51 | {% macro default__handle_existing_table(full_refresh, old_relation) %} 52 | {{ log("Dropping relation " ~ old_relation ~ " because it is of type " ~ old_relation.type) }} 53 | {{ adapter.drop_relation(old_relation) }} 54 | {% endmacro %} 55 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/array_append.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__array_append(array, new_element) -%} 2 | {{ array_concat(array, array_construct([new_element])) }} 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/array_concat.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__array_concat(array_1, array_2) -%} 2 | array_concat({{ array_1 }}, {{ array_2 }}) 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/array_construct.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__array_construct(inputs, data_type) -%} 2 | {% if inputs|length > 0 %} 3 | [ {{ inputs|join(' , ') }} ] 4 | {% else %} 5 | ARRAY<{{data_type}}>[] 6 | {% endif %} 7 | {%- endmacro %} 8 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/bool_or.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__bool_or(expression) -%} 2 | 3 | logical_or({{ expression }}) 4 | 5 | {%- endmacro %} 6 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/date.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__date(year, month, day) -%} 2 | date({{ year }}, {{ month }}, {{ day }}) 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/date_trunc.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__date_trunc(datepart, date) -%} 2 | timestamp_trunc( 3 | cast({{date}} as timestamp), 4 | {{datepart}} 5 | ) 6 | 7 | {%- endmacro %} 8 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/dateadd.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__dateadd(datepart, interval, from_date_or_timestamp) %} 2 | 3 | datetime_add( 4 | cast( {{ from_date_or_timestamp }} as datetime), 5 | interval {{ interval }} {{ datepart }} 6 | ) 7 | 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/datediff.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__datediff(first_date, second_date, datepart) -%} 2 | 3 | {% if dbt_version[0] == 1 and dbt_version[2] >= 2 %} 4 | {{ return(dbt.datediff(first_date, second_date, datepart)) }} 5 | {% else %} 6 | 7 | datetime_diff( 8 | cast({{second_date}} as datetime), 9 | cast({{first_date}} as datetime), 10 | {{datepart}} 11 | ) 12 | 13 | {% endif %} 14 | 15 | {%- endmacro %} 16 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/escape_single_quotes.sql: -------------------------------------------------------------------------------- 1 | {# /*BigQuery uses a single backslash: they're -> they\'re. The second backslash is to escape it from Jinja */ #} 2 | {% macro bigquery__escape_single_quotes(expression) -%} 3 | {{ expression | replace("'", "\\'") }} 4 | {%- endmacro %} 5 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/except.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__except() %} 2 | 3 | except distinct 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__format_column(column) -%} 2 | {% set data_type = column.data_type %} 3 | {% set formatted = column.column.lower() ~ " " ~ data_type %} 4 | {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} 5 | {%- endmacro -%} 6 | 7 | {% macro bigquery__get_empty_schema_sql(columns) %} 8 | {%- set col_err = [] -%} 9 | {% for col in columns.values() %} 10 | {%- if col['data_type'] is not defined -%} 11 | {{ col_err.append(col['name']) }} 12 | {%- endif -%} 13 | {%- endfor -%} 14 | {%- if (col_err | length) > 0 -%} 15 | {{ exceptions.column_type_missing(column_names=col_err) }} 16 | {%- endif -%} 17 | 18 | {%- set columns = adapter.nest_column_data_types(columns) -%} 19 | {{ return(dbt.default__get_empty_schema_sql(columns)) }} 20 | {% endmacro %} 21 | 22 | {% macro bigquery__get_select_subquery(sql) %} 23 | select {{ adapter.dispatch('get_column_names')() }} 24 | from ( 25 | {{ sql }} 26 | ) as model_subq 27 | {%- endmacro %} 28 | 29 | {% macro bigquery__get_column_names() %} 30 | {#- loop through nested user_provided_columns to get column names -#} 31 | {%- set user_provided_columns = adapter.nest_column_data_types(model['columns']) -%} 32 | {%- for i in user_provided_columns %} 33 | {%- set col = user_provided_columns[i] -%} 34 | {%- set col_name = adapter.quote(col['name']) if col.get('quote') else col['name'] -%} 35 | {{ col_name }}{{ ", " if not loop.last }} 36 | {%- endfor -%} 37 | {% endmacro %} 38 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/hash.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__hash(field) -%} 2 | to_hex({{dbt.default__hash(field)}}) 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/intersect.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__intersect() %} 2 | 3 | intersect distinct 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/listagg.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} 2 | 3 | string_agg( 4 | {{ measure }}, 5 | {{ delimiter_text }} 6 | {% if order_by_clause -%} 7 | {{ order_by_clause }} 8 | {%- endif %} 9 | {% if limit_num -%} 10 | limit {{ limit_num }} 11 | {%- endif %} 12 | ) 13 | 14 | {%- endmacro %} 15 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/position.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__position(substring_text, string_text) %} 2 | 3 | strpos( 4 | {{ string_text }}, 5 | {{ substring_text }} 6 | 7 | ) 8 | 9 | {%- endmacro -%} 10 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/right.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__right(string_text, length_expression) %} 2 | 3 | case when {{ length_expression }} = 0 4 | then '' 5 | else 6 | substr( 7 | {{ string_text }}, 8 | -1 * ({{ length_expression }}) 9 | ) 10 | end 11 | 12 | {%- endmacro -%} 13 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/safe_cast.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__safe_cast(field, type) %} 2 | {%- if type.lower().startswith('array') and field is iterable and (field is not string and field is not mapping) and field | length > 0 -%} 3 | {#-- Extract nested type from 'array' --#} 4 | {% set nested_type = type.lower()[6:-1] %} 5 | {#-- BigQuery does not support direct casts to arrays. instead, each element must be cast individually + reaggregated into an array --#} 6 | {%- if cast_from_string_unsupported_for(nested_type) %} 7 | (select array_agg(safe_cast(i as {{ nested_type }})) from unnest([ 8 | {%- for nested_field in field %} 9 | {{ nested_field.strip('"').strip("'") }}{{ ',' if not loop.last }} 10 | {%- endfor %} 11 | ]) i) 12 | {%- else -%} 13 | (select array_agg(safe_cast(i as {{nested_type}})) from unnest({{field}}) i) 14 | {%- endif -%} 15 | 16 | {%- elif type.lower() == 'json' and field is mapping -%} 17 | safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json) 18 | {%- elif cast_from_string_unsupported_for(type) and field is string -%} 19 | safe_cast({{field.strip('"').strip("'")}} as {{type}}) 20 | {%- else -%} 21 | safe_cast({{field}} as {{type}}) 22 | {%- endif -%} 23 | {% endmacro %} 24 | 25 | {% macro cast_from_string_unsupported_for(type) %} 26 | {{ return(type.lower().startswith('struct') or type.lower() == 'geography') }} 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/split_part.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__split_part(string_text, delimiter_text, part_number) %} 2 | 3 | {% if part_number >= 0 %} 4 | split( 5 | {{ string_text }}, 6 | {{ delimiter_text }} 7 | )[safe_offset({{ part_number - 1 }})] 8 | {% else %} 9 | split( 10 | {{ string_text }}, 11 | {{ delimiter_text }} 12 | )[safe_offset( 13 | length({{ string_text }}) 14 | - length( 15 | replace({{ string_text }}, {{ delimiter_text }}, '') 16 | ) + 1 + {{ part_number }} 17 | )] 18 | {% endif %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/string_literal.sql: -------------------------------------------------------------------------------- 1 | {%- macro bigquery__string_literal(value) -%} 2 | '''{{ value }}''' 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /dbt/include/bigquery/macros/utils/timestamps.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__current_timestamp() -%} 2 | current_timestamp() 3 | {%- endmacro %} 4 | 5 | {% macro bigquery__snapshot_string_as_time(timestamp) -%} 6 | {%- set result = 'TIMESTAMP("' ~ timestamp ~ '")' -%} 7 | {{ return(result) }} 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__current_timestamp_backcompat() -%} 11 | current_timestamp 12 | {%- endmacro %} 13 | -------------------------------------------------------------------------------- /dbt/include/bigquery/profile_template.yml: -------------------------------------------------------------------------------- 1 | fixed: 2 | type: bigquery 3 | priority: interactive 4 | job_retries: 1 5 | prompts: 6 | _choose_authentication_method: 7 | oauth: 8 | _fixed_method: oauth 9 | service_account: 10 | _fixed_method: service-account 11 | keyfile: 12 | hint: '/path/to/bigquery/keyfile.json' 13 | project: 14 | hint: 'GCP project id' 15 | dataset: 16 | hint: 'the name of your dbt dataset' 17 | threads: 18 | hint: '1 or more' 19 | type: 'int' 20 | job_execution_timeout_seconds: 21 | default: 300 22 | type: 'int' 23 | _choose_location: 24 | US: 25 | _fixed_location: US 26 | EU: 27 | _fixed_location: EU 28 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # this image gets published to GHCR for production use 2 | ARG py_version=3.11.2 3 | 4 | FROM python:$py_version-slim-bullseye AS base 5 | 6 | RUN apt-get update \ 7 | && apt-get dist-upgrade -y \ 8 | && apt-get install -y --no-install-recommends \ 9 | build-essential=12.9 \ 10 | ca-certificates=20210119 \ 11 | git=1:2.30.2-1+deb11u2 \ 12 | libpq-dev=13.18-0+deb11u1 \ 13 | make=4.3-4.1 \ 14 | openssh-client=1:8.4p1-5+deb11u3 \ 15 | software-properties-common=0.96.20.2-2.1 \ 16 | && apt-get clean \ 17 | && rm -rf \ 18 | /var/lib/apt/lists/* \ 19 | /tmp/* \ 20 | /var/tmp/* 21 | 22 | ENV PYTHONIOENCODING=utf-8 23 | ENV LANG=C.UTF-8 24 | 25 | RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir 26 | 27 | 28 | FROM base AS dbt-bigquery 29 | 30 | ARG commit_ref=main 31 | 32 | HEALTHCHECK CMD dbt --version || exit 1 33 | 34 | WORKDIR /usr/app/dbt/ 35 | ENTRYPOINT ["dbt"] 36 | 37 | RUN python -m pip install --no-cache-dir "dbt-bigquery @ git+https://github.com/dbt-labs/dbt-bigquery@${commit_ref}" 38 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker for dbt 2 | This docker file is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry. 3 | 4 | 5 | ## Building an image: 6 | This Dockerfile can create images for the following target: `dbt-bigquery` 7 | 8 | In order to build a new image, run the following docker command. 9 | ```shell 10 | docker build --tag --target dbt-bigquery 11 | ``` 12 | --- 13 | > **Note:** Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly! 14 | 15 | --- 16 | 17 | By default the image will be populated with the latest version of `dbt-bigquery` on `main`. 18 | If you need to use a different version you can specify it by git ref using the `--build-arg` flag: 19 | ```shell 20 | docker build --tag \ 21 | --target dbt-bigquery \ 22 | --build-arg commit_ref= \ 23 | 24 | ``` 25 | 26 | ### Examples: 27 | To build an image named "my-dbt" that supports Snowflake using the latest releases: 28 | ```shell 29 | cd dbt-core/docker 30 | docker build --tag my-dbt --target dbt-bigquery . 31 | ``` 32 | 33 | To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1: 34 | ```shell 35 | cd dbt-core/docker 36 | docker build \ 37 | --tag my-other-dbt \ 38 | --target dbt-bigquery \ 39 | --build-arg commit_ref=v1.0.0b1 \ 40 | . 41 | ``` 42 | 43 | ## Running an image in a container: 44 | The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: 45 | ```shell 46 | docker run \ 47 | --network=host \ 48 | --mount type=bind,source=path/to/project,target=/usr/app \ 49 | --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \ 50 | my-dbt \ 51 | ls 52 | ``` 53 | --- 54 | **Notes:** 55 | * Bind-mount sources _must_ be an absolute path 56 | * You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host. 57 | 58 | --- 59 | -------------------------------------------------------------------------------- /docker/dev.Dockerfile: -------------------------------------------------------------------------------- 1 | # this image does not get published, it is intended for local development only, see `Makefile` for usage 2 | FROM ubuntu:24.04 AS base 3 | 4 | # prevent python installation from asking for time zone region 5 | ARG DEBIAN_FRONTEND=noninteractive 6 | 7 | # add python repository 8 | RUN apt-get update \ 9 | && apt-get install -y software-properties-common=0.99.48 \ 10 | && add-apt-repository -y ppa:deadsnakes/ppa \ 11 | && apt-get clean \ 12 | && rm -rf \ 13 | /var/lib/apt/lists/* \ 14 | /tmp/* \ 15 | /var/tmp/* 16 | 17 | # install python 18 | RUN apt-get update \ 19 | && apt-get install -y --no-install-recommends \ 20 | build-essential=12.10ubuntu1 \ 21 | git-all=1:2.43.0-1ubuntu7.1 \ 22 | python3.9=3.9.20-1+noble1 \ 23 | python3.9-dev=3.9.20-1+noble1 \ 24 | python3.9-distutils=3.9.20-1+noble1 \ 25 | python3.9-venv=3.9.20-1+noble1 \ 26 | python3-pip=24.0+dfsg-1ubuntu1 \ 27 | python3-wheel=0.42.0-2 \ 28 | && apt-get clean \ 29 | && rm -rf \ 30 | /var/lib/apt/lists/* \ 31 | /tmp/* \ 32 | /var/tmp/* 33 | 34 | # update the default system interpreter to the newly installed version 35 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 36 | 37 | 38 | FROM base AS dbt-bigquery-dev 39 | 40 | HEALTHCHECK CMD python --version || exit 1 41 | 42 | # send stdout/stderr to terminal 43 | ENV PYTHONUNBUFFERED=1 44 | 45 | # setup mount for local code 46 | WORKDIR /opt/code 47 | VOLUME /opt/code 48 | 49 | # create a virtual environment 50 | RUN python3 -m venv /opt/venv 51 | -------------------------------------------------------------------------------- /hatch.toml: -------------------------------------------------------------------------------- 1 | [version] 2 | path = "dbt/adapters/bigquery/__version__.py" 3 | 4 | [build.targets.sdist] 5 | packages = ["dbt"] 6 | 7 | [build.targets.wheel] 8 | packages = ["dbt"] 9 | 10 | [envs.default] 11 | python = "3.9" 12 | dependencies = [ 13 | "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git", 14 | "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git", 15 | "dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter", 16 | "dbt-core @ git+https://github.com/dbt-labs/dbt-core.git#subdirectory=core", 17 | "ddtrace==2.3.0", 18 | "ipdb~=0.13.13", 19 | "pre-commit==3.7.0", 20 | "freezegun", 21 | "pytest>=7.0,<8.0", 22 | "pytest-csv~=3.0", 23 | "pytest-dotenv", 24 | "pytest-logbook~=1.2", 25 | "pytest-mock", 26 | "pytest-xdist", 27 | ] 28 | 29 | [envs.default.scripts] 30 | setup = "pre-commit install" 31 | code-quality = "pre-commit run --all-files" 32 | unit-tests = "python -m pytest {args:tests/unit}" 33 | integration-tests = "python -m pytest --profile service_account {args:tests/functional}" 34 | docker-dev = [ 35 | "docker build -f docker/dev.Dockerfile -t dbt-bigquery-dev .", 36 | "docker run --rm -it --name dbt-bigquery-dev -v $(shell pwd):/opt/code dbt-bigquery-dev", 37 | ] 38 | 39 | [envs.build] 40 | detached = true 41 | dependencies = [ 42 | "wheel", 43 | "twine", 44 | "check-wheel-contents", 45 | ] 46 | 47 | [envs.build.scripts] 48 | check-all = [ 49 | "- check-wheel", 50 | "- check-sdist", 51 | ] 52 | check-wheel = [ 53 | "twine check dist/*", 54 | "find ./dist/dbt_bigquery-*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", 55 | "pip freeze | grep dbt-bigquery", 56 | ] 57 | check-sdist = [ 58 | "check-wheel-contents dist/*.whl --ignore W007,W008", 59 | "find ./dist/dbt_bigquery-*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/", 60 | "pip freeze | grep dbt-bigquery", 61 | ] 62 | docker-prod = "docker build -f docker/Dockerfile -t dbt-bigquery ." 63 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | dynamic = ["version"] 7 | name = "dbt-bigquery" 8 | description = "The BigQuery adapter plugin for dbt" 9 | readme = "README.md" 10 | keywords = ["dbt", "adapter", "adapters", "database", "elt", "dbt-core", "dbt Core", "dbt Cloud", "dbt Labs", "bigquery", "google"] 11 | requires-python = ">=3.9.0" 12 | authors = [{ name = "dbt Labs", email = "info@dbtlabs.com" }] 13 | maintainers = [{ name = "dbt Labs", email = "info@dbtlabs.com" }] 14 | classifiers = [ 15 | "Development Status :: 5 - Production/Stable", 16 | "License :: OSI Approved :: Apache Software License", 17 | "Operating System :: MacOS :: MacOS X", 18 | "Operating System :: Microsoft :: Windows", 19 | "Operating System :: POSIX :: Linux", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | ] 25 | dependencies = [ 26 | "dbt-common>=1.10,<2.0", 27 | "dbt-adapters>=1.7,<2.0", 28 | # 3.20 introduced pyarrow>=3.0 under the `pandas` extra 29 | "google-cloud-bigquery[pandas]>=3.0,<4.0", 30 | "google-cloud-storage~=2.4", 31 | "google-cloud-dataproc~=5.0", 32 | # ---- 33 | # Expect compatibility with all new versions of these packages, so lower bounds only. 34 | "google-api-core>=2.11.0", 35 | # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency 36 | "dbt-core>=1.8.0", 37 | ] 38 | 39 | [project.urls] 40 | Homepage = "https://github.com/dbt-labs/dbt-bigquery" 41 | Documentation = "https://docs.getdbt.com" 42 | Repository = "https://github.com/dbt-labs/dbt-bigquery.git" 43 | Issues = "https://github.com/dbt-labs/dbt-bigquery/issues" 44 | Changelog = "https://github.com/dbt-labs/dbt-bigquery/blob/main/CHANGELOG.md" 45 | 46 | [tool.mypy] 47 | mypy_path = "third-party-stubs/" 48 | 49 | [tool.pytest.ini_options] 50 | testpaths = ["tests/functional", "tests/unit"] 51 | env_files = ["test.env"] 52 | addopts = "-v -n auto" 53 | color = true 54 | filterwarnings = [ 55 | "ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning", 56 | "ignore:unclosed file .*:ResourceWarning", 57 | ] 58 | -------------------------------------------------------------------------------- /test.env.example: -------------------------------------------------------------------------------- 1 | # Note: These values will come from your BigQuery account and GCP projects. 2 | 3 | # Test Environment field definitions 4 | # Name of a GCP project you don't have access to query. 5 | BIGQUERY_TEST_NO_ACCESS_DATABASE= 6 | # Authentication method required to hookup to BigQuery via client library. 7 | BIGQUERY_TEST_SERVICE_ACCOUNT_JSON='{}' 8 | 9 | # tests for local ci/cd 10 | DBT_TEST_USER_1="group:buildbot@dbtlabs.com" 11 | DBT_TEST_USER_2="group:engineering-core-team@dbtlabs.com" 12 | DBT_TEST_USER_3="serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com" 13 | 14 | # only needed for python model 15 | DATAPROC_REGION=us- 16 | DATAPROC_CLUSTER_NAME= 17 | GCS_BUCKET= 18 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-bigquery/0995665e490cdee9c408d26aac8e1c19fefaebe0/tests/__init__.py -------------------------------------------------------------------------------- /tests/boundary/test_bigquery_sdk.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import get_connection 4 | from google.cloud.bigquery import Client, DatasetReference, TableReference 5 | from google.api_core.exceptions import NotFound 6 | 7 | 8 | @pytest.mark.parametrize("table_name", ["this_table_does_not_exist"]) 9 | def test_get_table_does_not_exist(project, table_name): 10 | """ 11 | TODO: replace dbt project methods with direct connection instantiation 12 | """ 13 | with get_connection(project.adapter) as conn: 14 | client: Client = conn.handle 15 | dataset_ref = DatasetReference(project.database, project.test_schema) 16 | table_ref = TableReference(dataset_ref, table_name) 17 | with pytest.raises(NotFound): 18 | client.get_table(table_ref) 19 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import json 4 | from dbt.adapters.bigquery.credentials import _is_base64, _base64_to_string 5 | 6 | # Import the functional fixtures as a plugin 7 | # Note: fixtures with session scope need to be local 8 | 9 | pytest_plugins = ["dbt.tests.fixtures.project"] 10 | 11 | 12 | def pytest_addoption(parser): 13 | parser.addoption("--profile", action="store", default="oauth", type=str) 14 | 15 | 16 | @pytest.fixture(scope="class") 17 | def dbt_profile_target(request): 18 | profile_type = request.config.getoption("--profile") 19 | if profile_type == "oauth": 20 | target = oauth_target() 21 | elif profile_type == "service_account": 22 | target = service_account_target() 23 | else: 24 | raise ValueError(f"Invalid profile type '{profile_type}'") 25 | return target 26 | 27 | 28 | def oauth_target(): 29 | return { 30 | "type": "bigquery", 31 | "method": "oauth", 32 | "threads": 1, 33 | "job_retries": 2, 34 | "dataproc_region": os.getenv("DATAPROC_REGION"), 35 | "dataproc_cluster_name": os.getenv("DATAPROC_CLUSTER_NAME"), 36 | "gcs_bucket": os.getenv("GCS_BUCKET"), 37 | } 38 | 39 | 40 | def service_account_target(): 41 | credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace("'", "") 42 | if _is_base64(credentials_json_str): 43 | credentials_json_str = _base64_to_string(credentials_json_str) 44 | credentials = json.loads(credentials_json_str) 45 | project_id = credentials.get("project_id") 46 | return { 47 | "type": "bigquery", 48 | "method": "service-account-json", 49 | "threads": 1, 50 | "job_retries": 2, 51 | "project": project_id, 52 | "keyfile_json": credentials, 53 | # following 3 for python model 54 | "dataproc_region": os.getenv("DATAPROC_REGION"), 55 | "dataproc_cluster_name": os.getenv( 56 | "DATAPROC_CLUSTER_NAME" 57 | ), # only needed for cluster submission method 58 | "gcs_bucket": os.getenv("GCS_BUCKET"), 59 | } 60 | -------------------------------------------------------------------------------- /tests/functional/adapter/catalog_tests/files.py: -------------------------------------------------------------------------------- 1 | MY_SEED = """ 2 | id,value,record_valid_date 3 | 1,100,2023-01-01 00:00:00 4 | 2,200,2023-01-02 00:00:00 5 | 3,300,2023-01-02 00:00:00 6 | """.strip() 7 | 8 | 9 | MY_TABLE = """ 10 | {{ config( 11 | materialized='table', 12 | ) }} 13 | select * 14 | from {{ ref('my_seed') }} 15 | """ 16 | 17 | 18 | MY_VIEW = """ 19 | {{ config( 20 | materialized='view', 21 | ) }} 22 | select * 23 | from {{ ref('my_seed') }} 24 | """ 25 | 26 | 27 | MY_MATERIALIZED_VIEW = """ 28 | {{ config( 29 | materialized='materialized_view', 30 | ) }} 31 | select * 32 | from {{ ref('my_table') }} 33 | """ 34 | -------------------------------------------------------------------------------- /tests/functional/adapter/catalog_tests/test_relation_types.py: -------------------------------------------------------------------------------- 1 | from dbt.contracts.results import CatalogArtifact 2 | from dbt.tests.util import run_dbt 3 | import pytest 4 | 5 | from tests.functional.adapter.catalog_tests import files 6 | 7 | 8 | class TestCatalogRelationTypes: 9 | @pytest.fixture(scope="class", autouse=True) 10 | def seeds(self): 11 | return {"my_seed.csv": files.MY_SEED} 12 | 13 | @pytest.fixture(scope="class", autouse=True) 14 | def models(self): 15 | yield { 16 | "my_table.sql": files.MY_TABLE, 17 | "my_view.sql": files.MY_VIEW, 18 | "my_materialized_view.sql": files.MY_MATERIALIZED_VIEW, 19 | } 20 | 21 | @pytest.fixture(scope="class", autouse=True) 22 | def docs(self, project): 23 | run_dbt(["seed"]) 24 | run_dbt(["run"]) 25 | yield run_dbt(["docs", "generate"]) 26 | 27 | @pytest.mark.parametrize( 28 | "node_name,relation_type", 29 | [ 30 | ("seed.test.my_seed", "table"), 31 | ("model.test.my_table", "table"), 32 | ("model.test.my_view", "view"), 33 | ("model.test.my_materialized_view", "materialized view"), 34 | ], 35 | ) 36 | def test_relation_types_populate_correctly( 37 | self, docs: CatalogArtifact, node_name: str, relation_type: str 38 | ): 39 | """ 40 | This test addresses: https://github.com/dbt-labs/dbt-bigquery/issues/995 41 | """ 42 | assert node_name in docs.nodes 43 | node = docs.nodes[node_name] 44 | assert node.metadata.type == relation_type 45 | -------------------------------------------------------------------------------- /tests/functional/adapter/column_types/fixtures.py: -------------------------------------------------------------------------------- 1 | _MACRO_TEST_ALTER_COLUMN_TYPE = """ 2 | {% macro test_alter_column_type(model_name, column_name, new_column_type) %} 3 | {% set relation = ref(model_name) %} 4 | {{ alter_column_type(relation, column_name, new_column_type) }} 5 | {% endmacro %} 6 | """ 7 | 8 | _MODEL_SQL = """ 9 | select 10 | CAST(1 as int64) as int64_col, 11 | CAST(2.0 as float64) as float64_col, 12 | CAST(3.0 as numeric) as numeric_col, 13 | CAST('3' as string) as string_col, 14 | """ 15 | 16 | _MODEL_ALT_SQL = """ 17 | {{ config(materialized='table') }} 18 | select 19 | CAST(1 as int64) as int64_col, 20 | CAST(2.0 as float64) as float64_col, 21 | CAST(3.0 as numeric) as numeric_col, 22 | CAST('3' as string) as string_col, 23 | """ 24 | 25 | _SCHEMA_YML = """ 26 | version: 2 27 | models: 28 | - name: model 29 | data_tests: 30 | - is_type: 31 | column_map: 32 | int64_col: ['integer', 'number'] 33 | float64_col: ['float', 'number'] 34 | numeric_col: ['numeric', 'number'] 35 | string_col: ['string', 'not number'] 36 | """ 37 | 38 | _ALT_SCHEMA_YML = """ 39 | version: 2 40 | models: 41 | - name: model 42 | data_tests: 43 | - is_type: 44 | column_map: 45 | int64_col: ['string', 'not number'] 46 | float64_col: ['float', 'number'] 47 | numeric_col: ['numeric', 'number'] 48 | string_col: ['string', 'not number'] 49 | """ 50 | -------------------------------------------------------------------------------- /tests/functional/adapter/column_types/test_alter_column_types.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import yaml 3 | from dbt.tests.util import run_dbt 4 | from dbt.tests.adapter.column_types.test_column_types import BaseColumnTypes 5 | from dbt.tests.adapter.column_types.fixtures import macro_test_is_type_sql 6 | from tests.functional.adapter.column_types.fixtures import ( 7 | _MACRO_TEST_ALTER_COLUMN_TYPE, 8 | _MODEL_ALT_SQL, 9 | _ALT_SCHEMA_YML, 10 | ) 11 | 12 | 13 | class BaseAlterColumnTypes(BaseColumnTypes): 14 | @pytest.fixture(scope="class") 15 | def macros(self): 16 | return { 17 | "test_is_type.sql": macro_test_is_type_sql, 18 | "test_alter_column_type.sql": _MACRO_TEST_ALTER_COLUMN_TYPE, 19 | } 20 | 21 | def run_and_alter_and_test(self, alter_column_type_args): 22 | results = run_dbt(["run"]) 23 | assert len(results) == 1 24 | run_dbt(["run-operation", "test_alter_column_type", "--args", alter_column_type_args]) 25 | results = run_dbt(["test"]) 26 | assert len(results) == 1 27 | 28 | 29 | class TestBigQueryAlterColumnTypes(BaseAlterColumnTypes): 30 | @pytest.fixture(scope="class") 31 | def models(self): 32 | return {"model.sql": _MODEL_ALT_SQL, "schema.yml": _ALT_SCHEMA_YML} 33 | 34 | def test_bigquery_alter_column_types(self, project): 35 | alter_column_type_args = yaml.safe_dump( 36 | {"model_name": "model", "column_name": "int64_col", "new_column_type": "string"} 37 | ) 38 | 39 | self.run_and_alter_and_test(alter_column_type_args) 40 | -------------------------------------------------------------------------------- /tests/functional/adapter/column_types/test_column_types.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.column_types.test_column_types import BaseColumnTypes 3 | from tests.functional.adapter.column_types.fixtures import _MODEL_SQL, _SCHEMA_YML 4 | 5 | 6 | class TestBigQueryColumnTypes(BaseColumnTypes): 7 | @pytest.fixture(scope="class") 8 | def models(self): 9 | return {"model.sql": _MODEL_SQL, "schema.yml": _SCHEMA_YML} 10 | 11 | def test_run_and_test(self, project): 12 | self.run_and_test() 13 | -------------------------------------------------------------------------------- /tests/functional/adapter/constraints/fixtures.py: -------------------------------------------------------------------------------- 1 | my_model_struct_wrong_data_type_sql = """ 2 | {{ config(materialized = "table") }} 3 | 4 | select 5 | STRUCT(1 AS struct_column_being_tested, "test" AS another_struct_column) as a 6 | """ 7 | 8 | my_model_struct_correct_data_type_sql = """ 9 | {{ config(materialized = "table")}} 10 | 11 | select 12 | STRUCT("test" AS struct_column_being_tested, "test" AS b) as a 13 | """ 14 | 15 | model_struct_data_type_schema_yml = """ 16 | version: 2 17 | models: 18 | - name: contract_struct_wrong 19 | config: 20 | contract: 21 | enforced: true 22 | columns: 23 | - name: a.struct_column_being_tested 24 | data_type: string 25 | - name: a.b 26 | data_type: string 27 | 28 | - name: contract_struct_correct 29 | config: 30 | contract: 31 | enforced: true 32 | columns: 33 | - name: a.struct_column_being_tested 34 | data_type: string 35 | - name: a.b 36 | data_type: string 37 | """ 38 | 39 | my_model_double_struct_wrong_data_type_sql = """ 40 | {{ config(materialized = "table") }} 41 | 42 | select 43 | STRUCT( 44 | STRUCT(1 AS struct_column_being_tested, "test" AS c) as b, 45 | "test" as d 46 | ) as a 47 | """ 48 | 49 | my_model_double_struct_correct_data_type_sql = """ 50 | {{ config(materialized = "table") }} 51 | 52 | select 53 | STRUCT( 54 | STRUCT("test" AS struct_column_being_tested, "test" AS c) as b, 55 | "test" as d 56 | ) as a 57 | """ 58 | 59 | model_double_struct_data_type_schema_yml = """ 60 | version: 2 61 | models: 62 | - name: contract_struct_wrong 63 | config: 64 | contract: 65 | enforced: true 66 | columns: 67 | - name: a.b.struct_column_being_tested 68 | data_type: string 69 | - name: a.b.c 70 | data_type: string 71 | - name: a.d 72 | data_type: string 73 | 74 | - name: contract_struct_correct 75 | config: 76 | contract: 77 | enforced: true 78 | columns: 79 | - name: a.b.struct_column_being_tested 80 | data_type: string 81 | - name: a.b.c 82 | data_type: string 83 | - name: a.d 84 | data_type: string 85 | """ 86 | 87 | 88 | my_model_struct_sql = """ 89 | {{ 90 | config( 91 | materialized = "table" 92 | ) 93 | }} 94 | 95 | select STRUCT("test" as nested_column, "test" as nested_column2) as id 96 | """ 97 | 98 | 99 | model_struct_schema_yml = """ 100 | version: 2 101 | models: 102 | - name: my_model 103 | config: 104 | contract: 105 | enforced: true 106 | columns: 107 | - name: id.nested_column 108 | quote: true 109 | data_type: string 110 | description: hello 111 | constraints: 112 | - type: not_null 113 | - type: unique 114 | - name: id.nested_column2 115 | data_type: string 116 | constraints: 117 | - type: unique 118 | """ 119 | -------------------------------------------------------------------------------- /tests/functional/adapter/dbt_clone/test_dbt_clone.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible 3 | 4 | 5 | class TestBigQueryClonePossible(BaseClonePossible): 6 | @pytest.fixture(autouse=True) 7 | def clean_up(self, project): 8 | yield 9 | with project.adapter.connection_named("__test"): 10 | relation = project.adapter.Relation.create( 11 | database=project.database, schema=f"{project.test_schema}_seeds" 12 | ) 13 | project.adapter.drop_schema(relation) 14 | 15 | relation = project.adapter.Relation.create( 16 | database=project.database, schema=project.test_schema 17 | ) 18 | project.adapter.drop_schema(relation) 19 | 20 | pass 21 | -------------------------------------------------------------------------------- /tests/functional/adapter/dbt_show/test_dbt_show.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.dbt_show.test_dbt_show import ( 3 | BaseShowSqlHeader, 4 | BaseShowLimit, 5 | BaseShowDoesNotHandleDoubleLimit, 6 | ) 7 | 8 | from dbt.tests.util import run_dbt 9 | 10 | model_with_json_struct = """ 11 | select * 12 | from ( 13 | select 14 | struct< 15 | k array< 16 | struct 17 | > 18 | >( 19 | [ 20 | struct( 21 | 1 as c1, 22 | to_json(struct(1 as a)) as c2 23 | ) 24 | ] 25 | ) 26 | as v 27 | ) 28 | """ 29 | 30 | model_with_null_json_struct = """ 31 | select (struct(null)) as null_struct 32 | """ 33 | 34 | 35 | class TestBigQueryShowLimit(BaseShowLimit): 36 | pass 37 | 38 | 39 | class TestBigQueryShowSqlHeader(BaseShowSqlHeader): 40 | pass 41 | 42 | 43 | # Added to check if dbt show works with JSON struct 44 | # Addresses: https://github.com/dbt-labs/dbt-bigquery/issues/972 45 | class TestBigQueryShowSqlWorksWithJSONStruct: 46 | @pytest.fixture(scope="class") 47 | def models(self): 48 | return { 49 | "json_struct_model.sql": model_with_json_struct, 50 | "null_json_struct_model.sql": model_with_null_json_struct, 51 | } 52 | 53 | def test_sql_header(self, project): 54 | run_dbt(["show", "--select", "json_struct_model", "-d"]) 55 | 56 | def test_show_with_null_json_struct(self, project): 57 | run_dbt(["show", "--select", "null_json_struct_model"]) 58 | 59 | 60 | class TestBigQueryShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit): 61 | DATABASE_ERROR_MESSAGE = "Syntax error: Expected end of input but got keyword LIMIT" 62 | -------------------------------------------------------------------------------- /tests/functional/adapter/describe_relation/_files.py: -------------------------------------------------------------------------------- 1 | MY_SEED = """ 2 | id,value,record_date 3 | 1,100,2023-01-01 12:00:00 4 | 2,200,2023-01-02 12:00:00 5 | 3,300,2023-01-02 12:00:00 6 | """.strip() 7 | 8 | 9 | MY_BASE_TABLE = """ 10 | {{ config( 11 | materialized='table', 12 | partition_by={ 13 | "field": "record_date", 14 | "data_type": "datetime", 15 | "granularity": "day" 16 | }, 17 | cluster_by=["id", "value"] 18 | ) }} 19 | select 20 | id, 21 | value, 22 | record_date 23 | from {{ ref('my_seed') }} 24 | """ 25 | 26 | 27 | MY_MATERIALIZED_VIEW = """ 28 | {{ config( 29 | materialized='materialized_view', 30 | partition_by={ 31 | "field": "record_date", 32 | "data_type": "datetime", 33 | "granularity": "day" 34 | }, 35 | cluster_by="id", 36 | ) }} 37 | select 38 | id, 39 | value, 40 | record_date 41 | from {{ ref('my_base_table') }} 42 | """ 43 | 44 | 45 | MY_OTHER_BASE_TABLE = """ 46 | {{ config( 47 | materialized='table', 48 | partition_by={ 49 | "field": "value", 50 | "data_type": "int64", 51 | "range": { 52 | "start": 0, 53 | "end": 500, 54 | "interval": 50 55 | } 56 | }, 57 | cluster_by=["id", "value"] 58 | ) }} 59 | select 60 | id, 61 | value, 62 | record_date 63 | from {{ ref('my_seed') }} 64 | """ 65 | 66 | 67 | MY_OTHER_MATERIALIZED_VIEW = """ 68 | {{ config( 69 | materialized='materialized_view', 70 | partition_by={ 71 | "field": "value", 72 | "data_type": "int64", 73 | "range": { 74 | "start": 0, 75 | "end": 500, 76 | "interval": 50 77 | } 78 | }, 79 | cluster_by="id", 80 | enable_refresh=False, 81 | refresh_interval_minutes=60 82 | ) }} 83 | select 84 | id, 85 | value, 86 | record_date 87 | from {{ ref('my_other_base_table') }} 88 | """ 89 | -------------------------------------------------------------------------------- /tests/functional/adapter/empty/test_empty.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.empty.test_empty import BaseTestEmpty, BaseTestEmptyInlineSourceRef 2 | 3 | 4 | class TestBigQueryEmpty(BaseTestEmpty): 5 | pass 6 | 7 | 8 | class TestBigQueryEmptyInlineSourceRef(BaseTestEmptyInlineSourceRef): 9 | pass 10 | -------------------------------------------------------------------------------- /tests/functional/adapter/expected_stats.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.util import AnyString, AnyInteger 2 | 3 | 4 | def bigquery_stats(is_table, partition=None, cluster=None): 5 | stats = {} 6 | 7 | if is_table: 8 | stats.update( 9 | { 10 | "num_bytes": { 11 | "id": "num_bytes", 12 | "label": AnyString(), 13 | "value": AnyInteger(), 14 | "description": AnyString(), 15 | "include": True, 16 | }, 17 | "num_rows": { 18 | "id": "num_rows", 19 | "label": AnyString(), 20 | "value": AnyInteger(), 21 | "description": AnyString(), 22 | "include": True, 23 | }, 24 | } 25 | ) 26 | 27 | if partition is not None: 28 | stats.update( 29 | { 30 | "partitioning_type": { 31 | "id": "partitioning_type", 32 | "label": AnyString(), 33 | "value": partition, 34 | "description": AnyString(), 35 | "include": True, 36 | } 37 | } 38 | ) 39 | 40 | if cluster is not None: 41 | stats.update( 42 | { 43 | "clustering_fields": { 44 | "id": "clustering_fields", 45 | "label": AnyString(), 46 | "value": cluster, 47 | "description": AnyString(), 48 | "include": True, 49 | } 50 | } 51 | ) 52 | 53 | has_stats = { 54 | "id": "has_stats", 55 | "label": "Has Stats?", 56 | "value": bool(stats), 57 | "description": "Indicates whether there are statistics for this table", 58 | "include": False, 59 | } 60 | stats["has_stats"] = has_stats 61 | 62 | return stats 63 | -------------------------------------------------------------------------------- /tests/functional/adapter/hooks/data/seed_model.sql: -------------------------------------------------------------------------------- 1 | drop table if exists `{schema}.on_model_hook`; 2 | 3 | create table `{schema}.on_model_hook` ( 4 | test_state STRING, -- start|end 5 | target_dbname STRING, 6 | target_host STRING, 7 | target_name STRING, 8 | target_schema STRING, 9 | target_type STRING, 10 | target_user STRING, 11 | target_pass STRING, 12 | target_threads INTEGER, 13 | run_started_at STRING, 14 | invocation_id STRING, 15 | thread_id STRING 16 | ); 17 | -------------------------------------------------------------------------------- /tests/functional/adapter/hooks/data/seed_run.sql: -------------------------------------------------------------------------------- 1 | 2 | drop table if exists {schema}.on_run_hook; 3 | 4 | create table {schema}.on_run_hook ( 5 | test_state STRING, -- start|end 6 | target_dbname STRING, 7 | target_host STRING, 8 | target_name STRING, 9 | target_schema STRING, 10 | target_type STRING, 11 | target_user STRING, 12 | target_pass STRING, 13 | target_threads INTEGER, 14 | run_started_at STRING, 15 | invocation_id STRING, 16 | thread_id STRING 17 | ); 18 | -------------------------------------------------------------------------------- /tests/functional/adapter/hooks/test_model_hooks.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.hooks import test_model_hooks as core_base 2 | import pytest 3 | 4 | 5 | class TestBigQueryPrePostModelHooks(core_base.TestPrePostModelHooks): 6 | def check_hooks(self, state, project, host, count=1): 7 | self.get_ctx_vars(state, count=count, project=project) 8 | 9 | 10 | class TestBigQueryPrePostModelHooksUnderscores(core_base.TestPrePostModelHooksUnderscores): 11 | def check_hooks(self, state, project, host, count=1): 12 | self.get_ctx_vars(state, count=count, project=project) 13 | 14 | 15 | class TestBigQueryHookRefs(core_base.TestHookRefs): 16 | def check_hooks(self, state, project, host, count=1): 17 | self.get_ctx_vars(state, count=count, project=project) 18 | 19 | 20 | class TestBigQueryPrePostModelHooksOnSeeds(core_base.TestPrePostModelHooksOnSeeds): 21 | def check_hooks(self, state, project, host, count=1): 22 | self.get_ctx_vars(state, count=count, project=project) 23 | 24 | @pytest.fixture(scope="class") 25 | def project_config_update(self): 26 | return { 27 | "seed-paths": ["seeds"], 28 | "models": {}, 29 | "seeds": { 30 | "+post-hook": [ 31 | "alter table {{ this }} add column new_col int", 32 | "update {{ this }} set new_col = 1 where 1=1", 33 | ], 34 | "quote_columns": True, 35 | }, 36 | } 37 | -------------------------------------------------------------------------------- /tests/functional/adapter/hooks/test_run_hooks.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.hooks import test_run_hooks as core_base 3 | 4 | 5 | class TestPrePostRunHooksBigQuery(core_base.TestPrePostRunHooks): 6 | def check_hooks(self, state, project, host): 7 | self.get_ctx_vars(state, project) 8 | 9 | @pytest.fixture(scope="class") 10 | def project_config_update(self): 11 | return { 12 | # The create and drop table statements here validate that these hooks run 13 | # in the same order that they are defined. Drop before create is an error. 14 | # Also check that the table does not exist below. 15 | "on-run-start": [ 16 | "{{ custom_run_hook('start', target, run_started_at, invocation_id) }}", 17 | "create table {{ target.schema }}.start_hook_order_test ( id int )", 18 | "drop table {{ target.schema }}.start_hook_order_test", 19 | "{{ log(env_var('TERM_TEST'), info=True) }}", 20 | ], 21 | "on-run-end": [ 22 | "{{ custom_run_hook('end', target, run_started_at, invocation_id) }}", 23 | "create table {{ target.schema }}.end_hook_order_test ( id int )", 24 | "drop table {{ target.schema }}.end_hook_order_test", 25 | "create table {{ target.schema }}.schemas ( schema string )", 26 | "insert into {{ target.schema }}.schemas (schema) values {% for schema in schemas %}( '{{ schema }}' ){% if not loop.last %},{% endif %}{% endfor %}", 27 | "create table {{ target.schema }}.db_schemas ( db string, schema string )", 28 | "insert into {{ target.schema }}.db_schemas (db, schema) values {% for db, schema in database_schemas %}('{{ db }}', '{{ schema }}' ){% if not loop.last %},{% endif %}{% endfor %}", 29 | ], 30 | "seeds": { 31 | "quote_columns": False, 32 | }, 33 | } 34 | 35 | 36 | class TestAfterRunHooksBigQuery(core_base.TestAfterRunHooks): 37 | def check_hooks(self, state, project, host): 38 | self.get_ctx_vars(state, project) 39 | -------------------------------------------------------------------------------- /tests/functional/adapter/incremental/seeds.py: -------------------------------------------------------------------------------- 1 | seed_data_csv = """ 2 | id,dupe 3 | 1,a 4 | 2,a 5 | 3,a 6 | 4,a 7 | """.lstrip() 8 | 9 | seed_incremental_overwrite_date_expected_csv = """ 10 | id,date_day 11 | 10,2020-01-01 12 | 20,2020-01-01 13 | 30,2020-01-02 14 | 40,2020-01-02 15 | """.lstrip() 16 | 17 | seed_incremental_overwrite_day_expected_csv = """ 18 | id,date_time 19 | 10,2020-01-01 00:00:00 20 | 20,2020-01-01 00:00:00 21 | 30,2020-01-02 00:00:00 22 | 40,2020-01-02 00:00:00 23 | """.lstrip() 24 | 25 | seed_incremental_overwrite_range_expected_csv = """ 26 | id,date_int 27 | 10,20200101 28 | 20,20200101 29 | 30,20200102 30 | 40,20200102 31 | """.lstrip() 32 | 33 | seed_incremental_overwrite_time_expected_csv = """ 34 | id,date_hour 35 | 10,2020-01-01 01:00:00 36 | 20,2020-01-01 01:00:00 37 | 30,2020-01-01 02:00:00 38 | 40,2020-01-01 02:00:00 39 | """.lstrip() 40 | 41 | seed_merge_expected_csv = """ 42 | id,date_time 43 | 1,2020-01-01 00:00:00 44 | 2,2020-01-01 00:00:00 45 | 3,2020-01-01 00:00:00 46 | 4,2020-01-02 00:00:00 47 | 5,2020-01-02 00:00:00 48 | 6,2020-01-02 00:00:00 49 | """.lstrip() 50 | 51 | seed_incremental_overwrite_day_with_time_partition_expected_csv = """ 52 | id 53 | 10 54 | 20 55 | 30 56 | 40 57 | """.lstrip() 58 | -------------------------------------------------------------------------------- /tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import ( 2 | BaseMergeExcludeColumns, 3 | ) 4 | 5 | 6 | class TestMergeExcludeColumns(BaseMergeExcludeColumns): 7 | pass 8 | -------------------------------------------------------------------------------- /tests/functional/adapter/incremental/test_incremental_microbatch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from unittest import mock 4 | 5 | from dbt.tests.util import run_dbt_and_capture 6 | from dbt.tests.adapter.incremental.test_incremental_microbatch import ( 7 | BaseMicrobatch, 8 | patch_microbatch_end_time, 9 | ) 10 | 11 | from tests.functional.adapter.incremental.incremental_strategy_fixtures import ( 12 | microbatch_model_no_unique_id_sql, 13 | microbatch_input_sql, 14 | microbatch_model_no_partition_by_sql, 15 | microbatch_model_invalid_partition_by_sql, 16 | microbatch_model_no_unique_id_copy_partitions_sql, 17 | microbatch_input_event_time_date_sql, 18 | microbatch_input_event_time_datetime_sql, 19 | ) 20 | 21 | 22 | class TestBigQueryMicrobatch(BaseMicrobatch): 23 | @pytest.fixture(scope="class") 24 | def microbatch_model_sql(self) -> str: 25 | return microbatch_model_no_unique_id_sql 26 | 27 | 28 | class TestBigQueryMicrobatchInputWithDate(TestBigQueryMicrobatch): 29 | @pytest.fixture(scope="class") 30 | def input_model_sql(self) -> str: 31 | return microbatch_input_event_time_date_sql 32 | 33 | @pytest.fixture(scope="class") 34 | def insert_two_rows_sql(self, project) -> str: 35 | test_schema_relation = project.adapter.Relation.create( 36 | database=project.database, schema=project.test_schema 37 | ) 38 | return f"insert into {test_schema_relation}.input_model (id, event_time) values (4, DATE '2020-01-04'), (5, DATE '2020-01-05')" 39 | 40 | 41 | class TestBigQueryMicrobatchInputWithDatetime(TestBigQueryMicrobatch): 42 | @pytest.fixture(scope="class") 43 | def input_model_sql(self) -> str: 44 | return microbatch_input_event_time_datetime_sql 45 | 46 | @pytest.fixture(scope="class") 47 | def insert_two_rows_sql(self, project) -> str: 48 | test_schema_relation = project.adapter.Relation.create( 49 | database=project.database, schema=project.test_schema 50 | ) 51 | return f"insert into {test_schema_relation}.input_model (id, event_time) values (4, DATETIME '2020-01-04'), (5, DATETIME '2020-01-05')" 52 | 53 | 54 | class TestBigQueryMicrobatchMissingPartitionBy: 55 | @pytest.fixture(scope="class") 56 | def models(self) -> str: 57 | return { 58 | "microbatch.sql": microbatch_model_no_partition_by_sql, 59 | "input_model.sql": microbatch_input_sql, 60 | } 61 | 62 | def test_execution_failure_no_partition_by(self, project): 63 | with patch_microbatch_end_time("2020-01-03 13:57:00"): 64 | _, stdout = run_dbt_and_capture(["run"], expect_pass=False) 65 | assert "The 'microbatch' strategy requires a `partition_by` config" in stdout 66 | 67 | 68 | class TestBigQueryMicrobatchInvalidPartitionByGranularity: 69 | @pytest.fixture(scope="class") 70 | def models(self) -> str: 71 | return { 72 | "microbatch.sql": microbatch_model_invalid_partition_by_sql, 73 | "input_model.sql": microbatch_input_sql, 74 | } 75 | 76 | def test_execution_failure_no_partition_by(self, project): 77 | with patch_microbatch_end_time("2020-01-03 13:57:00"): 78 | _, stdout = run_dbt_and_capture(["run"], expect_pass=False) 79 | assert ( 80 | "The 'microbatch' strategy requires a `partition_by` config with the same granularity as its configured `batch_size`" 81 | in stdout 82 | ) 83 | 84 | 85 | class TestBigQueryMicrobatchWithCopyPartitions(BaseMicrobatch): 86 | @pytest.fixture(scope="class") 87 | def microbatch_model_sql(self) -> str: 88 | return microbatch_model_no_unique_id_copy_partitions_sql 89 | -------------------------------------------------------------------------------- /tests/functional/adapter/incremental/test_incremental_predicates.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.incremental.test_incremental_predicates import BaseIncrementalPredicates 3 | 4 | 5 | class TestIncrementalPredicatesMergeBigQuery(BaseIncrementalPredicates): 6 | @pytest.fixture(scope="class") 7 | def project_config_update(self): 8 | return { 9 | "models": { 10 | "+incremental_predicates": ["dbt_internal_dest.id != 2"], 11 | "+incremental_strategy": "merge", 12 | } 13 | } 14 | 15 | 16 | class TestPredicatesMergeBigQuery(BaseIncrementalPredicates): 17 | @pytest.fixture(scope="class") 18 | def project_config_update(self): 19 | return { 20 | "models": { 21 | "+predicates": ["dbt_internal_dest.id != 2"], 22 | "+incremental_strategy": "merge", 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tests/functional/adapter/incremental/test_incremental_unique_id.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.incremental.test_incremental_unique_id import BaseIncrementalUniqueKey 2 | 3 | 4 | class TestUniqueKeyBigQuery(BaseIncrementalUniqueKey): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/_files.py: -------------------------------------------------------------------------------- 1 | MY_SEED = """ 2 | id,value,record_valid_date 3 | 1,100,2023-01-01 00:00:00 4 | 2,200,2023-01-02 00:00:00 5 | 3,300,2023-01-02 00:00:00 6 | """.strip() 7 | 8 | 9 | MY_BASE_TABLE = """ 10 | {{ config( 11 | materialized='table', 12 | partition_by={ 13 | "field": "record_valid_date", 14 | "data_type": "datetime", 15 | "granularity": "day" 16 | }, 17 | cluster_by=["id", "value"] 18 | ) }} 19 | select 20 | id, 21 | value, 22 | record_valid_date 23 | from {{ ref('my_seed') }} 24 | """ 25 | 26 | 27 | # the whitespace to the left on partition matters here 28 | MY_MATERIALIZED_VIEW = """ 29 | {{ config( 30 | materialized='materialized_view', 31 | partition_by={ 32 | "field": "record_valid_date", 33 | "data_type": "datetime", 34 | "granularity": "day" 35 | }, 36 | cluster_by=["id", "value"], 37 | enable_refresh=True, 38 | refresh_interval_minutes=60, 39 | max_staleness="INTERVAL 45 MINUTE" 40 | ) }} 41 | select 42 | id, 43 | value, 44 | record_valid_date 45 | from {{ ref('my_base_table') }} 46 | """ 47 | 48 | 49 | # the whitespace to the left on partition matters here 50 | MY_OTHER_BASE_TABLE = """ 51 | {{ config( 52 | materialized='table', 53 | partition_by={ 54 | "field": "value", 55 | "data_type": "int64", 56 | "range": { 57 | "start": 0, 58 | "end": 500, 59 | "interval": 50 60 | } 61 | }, 62 | cluster_by=["id", "value"] 63 | ) }} 64 | select 65 | id, 66 | value, 67 | record_valid_date 68 | from {{ ref('my_seed') }} 69 | """ 70 | 71 | 72 | MY_MINIMAL_MATERIALIZED_VIEW = """ 73 | {{ 74 | config( 75 | materialized = 'materialized_view', 76 | ) 77 | }} 78 | 79 | select * from {{ ref('my_seed') }} 80 | """ 81 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/_mixin.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import pytest 4 | 5 | from dbt.adapters.base.relation import BaseRelation 6 | from dbt.adapters.contracts.relation import RelationType 7 | from dbt.tests.adapter.materialized_view.files import MY_TABLE, MY_VIEW 8 | from dbt.tests.util import ( 9 | get_connection, 10 | get_model_file, 11 | run_dbt, 12 | set_model_file, 13 | ) 14 | 15 | from tests.functional.adapter.materialized_view_tests import _files 16 | 17 | 18 | class BigQueryMaterializedViewMixin: 19 | @pytest.fixture(scope="class") 20 | def my_base_table(self, project) -> BaseRelation: 21 | """ 22 | The base table for a materialized view needs to be partitioned in 23 | the same way as the materialized view. So if we want to create a partitioned 24 | materialized view, we need to partition the base table. This table is a 25 | select * on the seed table, plus a partition. 26 | """ 27 | return project.adapter.Relation.create( 28 | identifier="my_base_table", 29 | schema=project.test_schema, 30 | database=project.database, 31 | type=RelationType.Table, 32 | ) 33 | 34 | @pytest.fixture(scope="class") 35 | def my_other_base_table(self, project) -> BaseRelation: 36 | """ 37 | Following the sentiment of `my_base_table` above, if we want to alter the partition 38 | on the materialized view, we either need to update the partition on the base table, 39 | or we need a second table with a different partition. 40 | """ 41 | return project.adapter.Relation.create( 42 | identifier="my_other_base_table", 43 | schema=project.test_schema, 44 | database=project.database, 45 | type=RelationType.Table, 46 | ) 47 | 48 | @pytest.fixture(scope="function", autouse=True) 49 | def setup(self, project, my_base_table, my_other_base_table, my_materialized_view): # type: ignore 50 | run_dbt(["seed"]) 51 | run_dbt(["run", "--full-refresh"]) 52 | 53 | # the tests touch these files, store their contents in memory 54 | initial_model = get_model_file(project, my_materialized_view) 55 | 56 | yield 57 | 58 | # and then reset them after the test runs 59 | set_model_file(project, my_materialized_view, initial_model) 60 | project.run_sql(f"drop schema if exists {project.test_schema} cascade") 61 | 62 | @pytest.fixture(scope="class", autouse=True) 63 | def seeds(self): 64 | return {"my_seed.csv": _files.MY_SEED} 65 | 66 | @pytest.fixture(scope="class", autouse=True) 67 | def models(self): 68 | yield { 69 | "my_table.sql": MY_TABLE, 70 | "my_view.sql": MY_VIEW, 71 | "my_base_table.sql": _files.MY_BASE_TABLE, 72 | "my_other_base_table.sql": _files.MY_OTHER_BASE_TABLE, 73 | "my_materialized_view.sql": _files.MY_MATERIALIZED_VIEW, 74 | } 75 | 76 | @staticmethod 77 | def insert_record(project, table: BaseRelation, record: Tuple[int, int]) -> None: 78 | my_id, value = record 79 | project.run_sql(f"insert into {table} (id, value) values ({my_id}, {value})") 80 | 81 | @staticmethod 82 | def refresh_materialized_view(project, materialized_view: BaseRelation) -> None: 83 | sql = f""" 84 | call bq.refresh_materialized_view( 85 | '{materialized_view.database}.{materialized_view.schema}.{materialized_view.identifier}' 86 | ) 87 | """ 88 | project.run_sql(sql) 89 | 90 | @staticmethod 91 | def query_row_count(project, relation: BaseRelation) -> int: 92 | sql = f"select count(*) from {relation}" 93 | return project.run_sql(sql, fetch="one")[0] 94 | 95 | # look into syntax 96 | @staticmethod 97 | def query_relation_type(project, relation: BaseRelation) -> Optional[str]: 98 | with get_connection(project.adapter) as conn: 99 | table = conn.handle.get_table( 100 | project.adapter.connections.get_bq_table( 101 | relation.database, relation.schema, relation.identifier 102 | ) 103 | ) 104 | return table.table_type.lower() 105 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/test_materialized_view.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import run_dbt 4 | from dbt.tests.adapter.materialized_view.basic import MaterializedViewBasic 5 | 6 | from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin 7 | from tests.functional.adapter.materialized_view_tests import _files 8 | 9 | 10 | class TestBigqueryMaterializedViewsBasic(BigQueryMaterializedViewMixin, MaterializedViewBasic): 11 | def test_view_replaces_materialized_view(self, project, my_materialized_view): 12 | """ 13 | We don't support replacing a view with another object in dbt-bigquery unless we use --full-refresh 14 | """ 15 | run_dbt(["run", "--models", my_materialized_view.identifier]) 16 | assert self.query_relation_type(project, my_materialized_view) == "materialized_view" 17 | 18 | self.swap_materialized_view_to_view(project, my_materialized_view) 19 | 20 | # add --full-refresh 21 | run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) 22 | assert self.query_relation_type(project, my_materialized_view) == "view" 23 | 24 | @pytest.mark.skip( 25 | "It looks like BQ updates the materialized view almost immediately, which fails this test." 26 | ) 27 | def test_materialized_view_only_updates_after_refresh( 28 | self, project, my_materialized_view, my_seed 29 | ): 30 | pass 31 | 32 | 33 | class TestMaterializedViewRerun: 34 | """ 35 | This addresses: https://github.com/dbt-labs/dbt-bigquery/issues/1007 36 | 37 | This effectively tests that defaults get properly set so that the run is idempotent. 38 | If the defaults are not properly set, changes could appear when there are no changes 39 | and cause unexpected scenarios. 40 | """ 41 | 42 | @pytest.fixture(scope="class", autouse=True) 43 | def models(self): 44 | return {"my_minimal_materialized_view.sql": _files.MY_MINIMAL_MATERIALIZED_VIEW} 45 | 46 | @pytest.fixture(scope="class", autouse=True) 47 | def seeds(self): 48 | return {"my_seed.csv": _files.MY_SEED} 49 | 50 | def test_minimal_materialized_view_is_idempotent(self, project): 51 | run_dbt(["seed"]) 52 | run_dbt(["run"]) 53 | run_dbt(["run"]) 54 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.materialized_view.changes import ( 2 | MaterializedViewChanges, 3 | MaterializedViewChangesApplyMixin, 4 | MaterializedViewChangesContinueMixin, 5 | MaterializedViewChangesFailMixin, 6 | ) 7 | from dbt.tests.util import get_connection, get_model_file, set_model_file 8 | 9 | from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig 10 | 11 | from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin 12 | 13 | 14 | class BigQueryMaterializedViewChanges(BigQueryMaterializedViewMixin, MaterializedViewChanges): 15 | @staticmethod 16 | def check_start_state(project, materialized_view): 17 | with get_connection(project.adapter): 18 | results = project.adapter.describe_relation(materialized_view) 19 | assert isinstance(results, BigQueryMaterializedViewConfig) 20 | assert results.options.enable_refresh is True 21 | assert results.options.refresh_interval_minutes == 60 22 | assert results.partition.field == "record_valid_date" 23 | assert results.partition.data_type == "datetime" 24 | assert results.partition.granularity == "day" 25 | assert results.cluster.fields == frozenset({"id", "value"}) 26 | 27 | @staticmethod 28 | def change_config_via_alter(project, materialized_view): 29 | initial_model = get_model_file(project, materialized_view) 30 | new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") 31 | set_model_file(project, materialized_view, new_model) 32 | 33 | @staticmethod 34 | def check_state_alter_change_is_applied(project, materialized_view): 35 | with get_connection(project.adapter): 36 | results = project.adapter.describe_relation(materialized_view) 37 | assert isinstance(results, BigQueryMaterializedViewConfig) 38 | # these change when run manually 39 | assert results.options.enable_refresh is False 40 | assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default 41 | 42 | @staticmethod 43 | def change_config_via_replace(project, materialized_view): 44 | initial_model = get_model_file(project, materialized_view) 45 | # the whitespace to the left on partition matters here 46 | old_partition = """ 47 | partition_by={ 48 | "field": "record_valid_date", 49 | "data_type": "datetime", 50 | "granularity": "day" 51 | },""" 52 | new_partition = """ 53 | partition_by={ 54 | "field": "value", 55 | "data_type": "int64", 56 | "range": { 57 | "start": 0, 58 | "end": 500, 59 | "interval": 50 60 | } 61 | },""" 62 | new_model = ( 63 | initial_model.replace(old_partition, new_partition) 64 | .replace("'my_base_table'", "'my_other_base_table'") 65 | .replace('cluster_by=["id", "value"]', 'cluster_by="id"') 66 | ) 67 | set_model_file(project, materialized_view, new_model) 68 | 69 | @staticmethod 70 | def check_state_replace_change_is_applied(project, materialized_view): 71 | with get_connection(project.adapter): 72 | results = project.adapter.describe_relation(materialized_view) 73 | assert isinstance(results, BigQueryMaterializedViewConfig) 74 | assert results.partition.field == "value" 75 | assert results.partition.data_type == "int64" 76 | assert results.partition.range == {"start": 0, "end": 500, "interval": 50} 77 | assert results.cluster.fields == frozenset({"id"}) 78 | 79 | 80 | class TestBigQueryMaterializedViewChangesApply( 81 | BigQueryMaterializedViewChanges, MaterializedViewChangesApplyMixin 82 | ): 83 | pass 84 | 85 | 86 | class TestBigQueryMaterializedViewChangesContinue( 87 | BigQueryMaterializedViewChanges, MaterializedViewChangesContinueMixin 88 | ): 89 | pass 90 | 91 | 92 | class TestBigQueryMaterializedViewChangesFail( 93 | BigQueryMaterializedViewChanges, MaterializedViewChangesFailMixin 94 | ): 95 | pass 96 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/test_materialized_view_cluster_changes.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.materialized_view.changes import ( 2 | MaterializedViewChanges, 3 | MaterializedViewChangesApplyMixin, 4 | MaterializedViewChangesContinueMixin, 5 | MaterializedViewChangesFailMixin, 6 | ) 7 | from dbt.tests.util import get_connection, get_model_file, set_model_file 8 | 9 | from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig 10 | 11 | from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin 12 | 13 | 14 | class BigQueryMaterializedViewClusterChanges( 15 | BigQueryMaterializedViewMixin, MaterializedViewChanges 16 | ): 17 | @staticmethod 18 | def check_start_state(project, materialized_view): 19 | with get_connection(project.adapter): 20 | results = project.adapter.describe_relation(materialized_view) 21 | assert isinstance(results, BigQueryMaterializedViewConfig) 22 | assert results.options.enable_refresh is True 23 | assert results.options.refresh_interval_minutes == 60 24 | assert results.cluster.fields == frozenset({"id", "value"}) 25 | 26 | @staticmethod 27 | def change_config_via_alter(project, materialized_view): 28 | initial_model = get_model_file(project, materialized_view) 29 | new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") 30 | set_model_file(project, materialized_view, new_model) 31 | 32 | @staticmethod 33 | def check_state_alter_change_is_applied(project, materialized_view): 34 | with get_connection(project.adapter): 35 | results = project.adapter.describe_relation(materialized_view) 36 | assert isinstance(results, BigQueryMaterializedViewConfig) 37 | assert results.options.enable_refresh is False 38 | assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default 39 | 40 | @staticmethod 41 | def change_config_via_replace(project, materialized_view): 42 | initial_model = get_model_file(project, materialized_view) 43 | new_model = initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') 44 | set_model_file(project, materialized_view, new_model) 45 | 46 | @staticmethod 47 | def check_state_replace_change_is_applied(project, materialized_view): 48 | with get_connection(project.adapter): 49 | results = project.adapter.describe_relation(materialized_view) 50 | assert isinstance(results, BigQueryMaterializedViewConfig) 51 | assert results.cluster.fields == frozenset({"id"}) 52 | 53 | 54 | class TestBigQueryMaterializedViewClusterChangesApply( 55 | BigQueryMaterializedViewClusterChanges, MaterializedViewChangesApplyMixin 56 | ): 57 | pass 58 | 59 | 60 | class TestBigQueryMaterializedViewClusterChangesContinue( 61 | BigQueryMaterializedViewClusterChanges, MaterializedViewChangesContinueMixin 62 | ): 63 | pass 64 | 65 | 66 | class TestBigQueryMaterializedViewClusterChangesFail( 67 | BigQueryMaterializedViewClusterChanges, MaterializedViewChangesFailMixin 68 | ): 69 | pass 70 | -------------------------------------------------------------------------------- /tests/functional/adapter/materialized_view_tests/test_materialized_view_partition_changes.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.materialized_view.changes import ( 2 | MaterializedViewChanges, 3 | MaterializedViewChangesApplyMixin, 4 | MaterializedViewChangesContinueMixin, 5 | MaterializedViewChangesFailMixin, 6 | ) 7 | from dbt.tests.util import get_connection, get_model_file, set_model_file 8 | 9 | from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig 10 | 11 | from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin 12 | 13 | 14 | class BigQueryMaterializedViewPartitionChanges( 15 | BigQueryMaterializedViewMixin, MaterializedViewChanges 16 | ): 17 | @staticmethod 18 | def check_start_state(project, materialized_view): 19 | with get_connection(project.adapter): 20 | results = project.adapter.describe_relation(materialized_view) 21 | assert isinstance(results, BigQueryMaterializedViewConfig) 22 | assert results.options.enable_refresh is True 23 | assert results.options.refresh_interval_minutes == 60 24 | assert results.partition.field == "record_valid_date" 25 | assert results.partition.data_type == "datetime" 26 | assert results.partition.granularity == "day" 27 | 28 | @staticmethod 29 | def change_config_via_alter(project, materialized_view): 30 | initial_model = get_model_file(project, materialized_view) 31 | new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") 32 | set_model_file(project, materialized_view, new_model) 33 | 34 | @staticmethod 35 | def check_state_alter_change_is_applied(project, materialized_view): 36 | with get_connection(project.adapter): 37 | results = project.adapter.describe_relation(materialized_view) 38 | assert isinstance(results, BigQueryMaterializedViewConfig) 39 | # these change when run manually 40 | assert results.options.enable_refresh is False 41 | assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default 42 | 43 | @staticmethod 44 | def change_config_via_replace(project, materialized_view): 45 | initial_model = get_model_file(project, materialized_view) 46 | # the whitespace to the left on partition matters here 47 | old_partition = """ 48 | partition_by={ 49 | "field": "record_valid_date", 50 | "data_type": "datetime", 51 | "granularity": "day" 52 | },""" 53 | new_partition = """ 54 | partition_by={ 55 | "field": "value", 56 | "data_type": "int64", 57 | "range": { 58 | "start": 0, 59 | "end": 500, 60 | "interval": 50 61 | } 62 | },""" 63 | new_model = initial_model.replace(old_partition, new_partition).replace( 64 | "'my_base_table'", "'my_other_base_table'" 65 | ) 66 | set_model_file(project, materialized_view, new_model) 67 | 68 | @staticmethod 69 | def check_state_replace_change_is_applied(project, materialized_view): 70 | with get_connection(project.adapter): 71 | results = project.adapter.describe_relation(materialized_view) 72 | assert isinstance(results, BigQueryMaterializedViewConfig) 73 | assert results.partition.field == "value" 74 | assert results.partition.data_type == "int64" 75 | assert results.partition.range == {"start": 0, "end": 500, "interval": 50} 76 | 77 | 78 | class TestBigQueryMaterializedViewPartitionChangesApply( 79 | BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesApplyMixin 80 | ): 81 | pass 82 | 83 | 84 | class TestBigQueryMaterializedViewPartitionChangesContinue( 85 | BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesContinueMixin 86 | ): 87 | pass 88 | 89 | 90 | class TestBigQueryMaterializedViewPartitionChangesFail( 91 | BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesFailMixin 92 | ): 93 | pass 94 | -------------------------------------------------------------------------------- /tests/functional/adapter/query_comment_test/test_job_label.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from google.cloud.bigquery.client import Client 4 | 5 | from dbt.tests.util import run_dbt 6 | 7 | 8 | _MACRO__BQ_LABELS = """ 9 | {% macro bq_labels() %}{ 10 | "system": "{{ env_var('LABEL_SYSTEM', 'my_system') }}", 11 | "env_type": "{{ env_var('LABEL_ENV', 'dev') }}" 12 | }{% endmacro %} 13 | """ 14 | _MODEL__MY_TABLE = """ 15 | {{ config(materialized= "table") }} 16 | select 1 as id 17 | """ 18 | 19 | 20 | class TestQueryCommentJobLabel: 21 | @pytest.fixture(scope="class") 22 | def models(self): 23 | return {"my_table.sql": _MODEL__MY_TABLE} 24 | 25 | @pytest.fixture(scope="class") 26 | def macros(self): 27 | return {"bq_labels.sql": _MACRO__BQ_LABELS} 28 | 29 | @pytest.fixture(scope="class") 30 | def project_config_update(self): 31 | return { 32 | "query-comment": { 33 | "comment": "{{ bq_labels() }}", 34 | "job-label": True, 35 | "append": True, 36 | } 37 | } 38 | 39 | def test_query_comments_displays_as_job_labels(self, project): 40 | """ 41 | Addresses this regression in dbt-bigquery 1.6: 42 | https://github.com/dbt-labs/dbt-bigquery/issues/863 43 | """ 44 | results = run_dbt(["run"]) 45 | job_id = results.results[0].adapter_response.get("job_id") 46 | with project.adapter.connection_named("_test"): 47 | client: Client = project.adapter.connections.get_thread_connection().handle 48 | job = client.get_job(job_id=job_id) 49 | 50 | # this is what should happen 51 | assert job.labels.get("system") == "my_system" 52 | assert job.labels.get("env_type") == "dev" 53 | -------------------------------------------------------------------------------- /tests/functional/adapter/query_comment_test/test_query_comment.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.query_comment.test_query_comment import ( 2 | BaseQueryComments, 3 | BaseMacroQueryComments, 4 | BaseMacroArgsQueryComments, 5 | BaseMacroInvalidQueryComments, 6 | BaseNullQueryComments, 7 | BaseEmptyQueryComments, 8 | ) 9 | 10 | 11 | class TestQueryCommentsBigQuery(BaseQueryComments): 12 | pass 13 | 14 | 15 | class TestMacroQueryCommentsBigQuery(BaseMacroQueryComments): 16 | pass 17 | 18 | 19 | class TestMacroArgsQueryCommentsBigQuery(BaseMacroArgsQueryComments): 20 | pass 21 | 22 | 23 | class TestMacroInvalidQueryCommentsBigQuery(BaseMacroInvalidQueryComments): 24 | pass 25 | 26 | 27 | class TestNullQueryCommentsBigQuery(BaseNullQueryComments): 28 | pass 29 | 30 | 31 | class TestEmptyQueryCommentsBigQuery(BaseEmptyQueryComments): 32 | pass 33 | -------------------------------------------------------------------------------- /tests/functional/adapter/simple_bigquery_view/seeds.py: -------------------------------------------------------------------------------- 1 | seed_data_csv = """ 2 | id,dupe 3 | 1,a 4 | 2,a 5 | 3,a 6 | 4,a 7 | """.lstrip() 8 | 9 | seed_incremental_overwrite_date_expected_csv = """ 10 | id,date_day 11 | 10,2020-01-01 12 | 20,2020-01-01 13 | 30,2020-01-02 14 | 40,2020-01-02 15 | """.lstrip() 16 | 17 | seed_incremental_overwrite_day_expected_csv = """ 18 | id,date_time 19 | 10,2020-01-01 00:00:00 20 | 20,2020-01-01 00:00:00 21 | 30,2020-01-02 00:00:00 22 | 40,2020-01-02 00:00:00 23 | """.lstrip() 24 | 25 | seed_incremental_overwrite_range_expected_csv = """ 26 | id,date_int 27 | 10,20200101 28 | 20,20200101 29 | 30,20200102 30 | 40,20200102 31 | """.lstrip() 32 | 33 | seed_incremental_overwrite_time_expected_csv = """ 34 | id,date_hour 35 | 10,2020-01-01 01:00:00 36 | 20,2020-01-01 01:00:00 37 | 30,2020-01-01 02:00:00 38 | 40,2020-01-01 02:00:00 39 | """.lstrip() 40 | 41 | seed_merge_expected_csv = """ 42 | id,date_time 43 | 1,2020-01-01 00:00:00 44 | 2,2020-01-01 00:00:00 45 | 3,2020-01-01 00:00:00 46 | 4,2020-01-02 00:00:00 47 | 5,2020-01-02 00:00:00 48 | 6,2020-01-02 00:00:00 49 | """.lstrip() 50 | 51 | seed_incremental_overwrite_day_with_time_partition_expected_csv = """ 52 | id 53 | 10 54 | 20 55 | 30 56 | 40 57 | """.lstrip() 58 | -------------------------------------------------------------------------------- /tests/functional/adapter/simple_copy/test_simple_copy.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pathlib import Path 4 | 5 | from dbt.tests.util import run_dbt, rm_file, write_file, check_relations_equal 6 | 7 | from dbt.tests.adapter.simple_copy.test_simple_copy import SimpleCopyBase 8 | 9 | from tests.functional.adapter.simple_copy.fixtures import ( 10 | _MODELS_INCREMENTAL_UPDATE_COLS, 11 | _SEEDS__SEED_MERGE_COLS_INITIAL, 12 | _SEEDS__SEED_MERGE_COLS_UPDATE, 13 | _SEEDS__SEED_MERGE_COLS_EXPECTED_RESULT, 14 | ) 15 | 16 | 17 | class TestSimpleCopyBase(SimpleCopyBase): 18 | pass 19 | 20 | 21 | class TestIncrementalMergeColumns: 22 | @pytest.fixture(scope="class") 23 | def models(self): 24 | return {"incremental_update_cols.sql": _MODELS_INCREMENTAL_UPDATE_COLS} 25 | 26 | @pytest.fixture(scope="class") 27 | def seeds(self): 28 | return {"seed.csv": _SEEDS__SEED_MERGE_COLS_INITIAL} 29 | 30 | def test_incremental_merge_columns(self, project): 31 | run_dbt(["seed"]) 32 | run_dbt(["run"]) 33 | 34 | main_seed_file = project.project_root / Path("seeds") / Path("seed.csv") 35 | expected_seed_file = project.project_root / Path("seeds") / Path("expected_result.csv") 36 | rm_file(main_seed_file) 37 | write_file(_SEEDS__SEED_MERGE_COLS_UPDATE, main_seed_file) 38 | write_file(_SEEDS__SEED_MERGE_COLS_EXPECTED_RESULT, expected_seed_file) 39 | 40 | run_dbt(["seed"]) 41 | run_dbt(["run"]) 42 | check_relations_equal(project.adapter, ["incremental_update_cols", "expected_result"]) 43 | -------------------------------------------------------------------------------- /tests/functional/adapter/sources_freshness_tests/files.py: -------------------------------------------------------------------------------- 1 | SCHEMA_YML = """version: 2 2 | sources: 3 | - name: test_source 4 | freshness: 5 | warn_after: {count: 10, period: hour} 6 | error_after: {count: 1, period: day} 7 | schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}" 8 | tables: 9 | - name: test_source 10 | """ 11 | 12 | SEED_TEST_SOURCE_CSV = """ 13 | id,name 14 | 1,Martin 15 | 2,Jeter 16 | 3,Ruth 17 | 4,Gehrig 18 | 5,DiMaggio 19 | 6,Torre 20 | 7,Mantle 21 | 8,Berra 22 | 9,Maris 23 | """.strip() 24 | -------------------------------------------------------------------------------- /tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | from dbt.tests.util import run_dbt 5 | 6 | from tests.functional.adapter.sources_freshness_tests import files 7 | 8 | 9 | class TestGetLastRelationModified: 10 | @pytest.fixture(scope="class") 11 | def seeds(self): 12 | return {"test_source.csv": files.SEED_TEST_SOURCE_CSV} 13 | 14 | @pytest.fixture(scope="class") 15 | def models(self): 16 | return {"schema.yml": files.SCHEMA_YML} 17 | 18 | @pytest.fixture(scope="class", autouse=True) 19 | def setup(self, project): 20 | # we need the schema name for the sources section 21 | os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema 22 | run_dbt(["seed"]) 23 | yield 24 | del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] 25 | 26 | def test_get_last_relation_modified(self, project): 27 | results = run_dbt(["source", "freshness"]) 28 | assert len(results) == 1 29 | result = results[0] 30 | assert result.status == "pass" 31 | -------------------------------------------------------------------------------- /tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.store_test_failures_tests import basic 4 | from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import ( 5 | StoreTestFailuresBase, 6 | ) 7 | 8 | 9 | TEST_AUDIT_SCHEMA_SUFFIX = "dbt_test__aud" 10 | 11 | 12 | class TestBigQueryStoreTestFailures(StoreTestFailuresBase): 13 | @pytest.fixture(scope="function", autouse=True) 14 | def teardown_method(self, project): 15 | yield 16 | relation = project.adapter.Relation.create( 17 | database=project.database, schema=f"{project.test_schema}_{TEST_AUDIT_SCHEMA_SUFFIX}" 18 | ) 19 | 20 | project.adapter.drop_schema(relation) 21 | 22 | def test_store_and_assert(self, project): 23 | self.run_tests_store_one_failure(project) 24 | self.run_tests_store_failures_and_assert(project) 25 | 26 | 27 | class TestStoreTestFailuresAsInteractions(basic.StoreTestFailuresAsInteractions): 28 | pass 29 | 30 | 31 | class TestStoreTestFailuresAsProjectLevelOff(basic.StoreTestFailuresAsProjectLevelOff): 32 | pass 33 | 34 | 35 | class TestStoreTestFailuresAsProjectLevelView(basic.StoreTestFailuresAsProjectLevelView): 36 | pass 37 | 38 | 39 | class TestStoreTestFailuresAsGeneric(basic.StoreTestFailuresAsGeneric): 40 | pass 41 | 42 | 43 | class TestStoreTestFailuresAsProjectLevelEphemeral(basic.StoreTestFailuresAsProjectLevelEphemeral): 44 | pass 45 | 46 | 47 | class TestStoreTestFailuresAsExceptions(basic.StoreTestFailuresAsExceptions): 48 | pass 49 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_aliases.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from dbt.tests.adapter.aliases.test_aliases import BaseAliases, BaseSameAliasDifferentDatabases 4 | 5 | MACROS__BIGQUERY_CAST_SQL = """ 6 | {% macro bigquery__string_literal(s) %} 7 | cast('{{ s }}' as string) 8 | {% endmacro %} 9 | """ 10 | 11 | MACROS__EXPECT_VALUE_SQL = """ 12 | -- cross-db compatible test, similar to accepted_values 13 | 14 | {% test expect_value(model, field, value) %} 15 | 16 | select * 17 | from {{ model }} 18 | where {{ field }} != '{{ value }}' 19 | 20 | {% endtest %} 21 | """ 22 | 23 | MODELS_DUPE_CUSTOM_DATABASE_A = """ 24 | select {{ string_literal(this.name) }} as tablename 25 | """ 26 | 27 | MODELS_DUPE_CUSTOM_DATABASE_B = """ 28 | select {{ string_literal(this.name) }} as tablename 29 | """ 30 | 31 | MODELS_SCHEMA_YML = """ 32 | version: 2 33 | models: 34 | - name: model_a 35 | data_tests: 36 | - expect_value: 37 | field: tablename 38 | value: duped_alias 39 | - name: model_b 40 | data_tests: 41 | - expect_value: 42 | field: tablename 43 | value: duped_alias 44 | """ 45 | 46 | 47 | class TestAliasesBigQuery(BaseAliases): 48 | @pytest.fixture(scope="class") 49 | def macros(self): 50 | return { 51 | "bigquery_cast.sql": MACROS__BIGQUERY_CAST_SQL, 52 | "expect_value.sql": MACROS__EXPECT_VALUE_SQL, 53 | } 54 | 55 | 56 | class TestSameTestSameAliasDifferentDatabasesBigQuery(BaseSameAliasDifferentDatabases): 57 | @pytest.fixture(scope="class") 58 | def project_config_update(self): 59 | return { 60 | "config-version": 2, 61 | "macro-paths": ["macros"], 62 | "models": { 63 | "test": { 64 | "alias": "duped_alias", 65 | "model_b": {"database": os.getenv("BIGQUERY_TEST_ALT_DATABASE")}, 66 | }, 67 | }, 68 | } 69 | 70 | @pytest.fixture(scope="class") 71 | def macros(self): 72 | return { 73 | "bigquery_cast.sql": MACROS__BIGQUERY_CAST_SQL, 74 | "expect_value.sql": MACROS__EXPECT_VALUE_SQL, 75 | } 76 | 77 | @pytest.fixture(scope="class") 78 | def models(self): 79 | return { 80 | "schema.yml": MODELS_SCHEMA_YML, 81 | "model_a.sql": MODELS_DUPE_CUSTOM_DATABASE_A, 82 | "model_b.sql": MODELS_DUPE_CUSTOM_DATABASE_B, 83 | } 84 | 85 | @pytest.fixture(autouse=True) 86 | def clean_up(self, project): 87 | yield 88 | with project.adapter.connection_named("__test"): 89 | relation = project.adapter.Relation.create( 90 | database=os.getenv("BIGQUERY_TEST_ALT_DATABASE"), schema=project.test_schema 91 | ) 92 | project.adapter.drop_schema(relation) 93 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_basic.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 4 | from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests 5 | from dbt.tests.adapter.basic.test_singular_tests_ephemeral import ( 6 | BaseSingularTestsEphemeral, 7 | ) 8 | from dbt.tests.adapter.basic.test_empty import BaseEmpty 9 | from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral 10 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental 11 | from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests 12 | from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols 13 | from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp 14 | from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod 15 | from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection 16 | from dbt.tests.adapter.basic.test_docs_generate import BaseDocsGenerate 17 | from dbt.tests.adapter.basic.expected_catalog import base_expected_catalog 18 | from tests.functional.adapter.expected_stats import bigquery_stats 19 | 20 | 21 | class TestSimpleMaterializationsBigQuery(BaseSimpleMaterializations): 22 | # This test requires a full-refresh to replace a table with a view 23 | @pytest.fixture(scope="class") 24 | def test_config(self): 25 | return {"require_full_refresh": True} 26 | 27 | 28 | class TestSingularTestsBigQuery(BaseSingularTests): 29 | pass 30 | 31 | 32 | class TestSingularTestsEphemeralBigQuery(BaseSingularTestsEphemeral): 33 | pass 34 | 35 | 36 | class TestEmptyBigQuery(BaseEmpty): 37 | pass 38 | 39 | 40 | class TestEphemeralBigQuery(BaseEphemeral): 41 | pass 42 | 43 | 44 | class TestIncrementalBigQuery(BaseIncremental): 45 | pass 46 | 47 | 48 | class TestGenericTestsBigQuery(BaseGenericTests): 49 | pass 50 | 51 | 52 | class TestSnapshotCheckColsBigQuery(BaseSnapshotCheckCols): 53 | pass 54 | 55 | 56 | class TestSnapshotTimestampBigQuery(BaseSnapshotTimestamp): 57 | pass 58 | 59 | 60 | class TestBaseAdapterMethodBigQuery(BaseAdapterMethod): 61 | pass 62 | 63 | 64 | class TestBigQueryValidateConnection(BaseValidateConnection): 65 | pass 66 | 67 | 68 | class TestDocsGenerateBigQuery(BaseDocsGenerate): 69 | @pytest.fixture(scope="class") 70 | def expected_catalog(self, project): 71 | return base_expected_catalog( 72 | project, 73 | role=None, 74 | id_type="INT64", 75 | text_type="STRING", 76 | time_type="DATETIME", 77 | view_type="view", 78 | table_type="table", 79 | model_stats=bigquery_stats(False), 80 | seed_stats=bigquery_stats(True), 81 | ) 82 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_changing_relation_type.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.relations.test_changing_relation_type import BaseChangeRelationTypeValidator 2 | 3 | 4 | class TestBigQueryChangeRelationTypes(BaseChangeRelationTypeValidator): 5 | def test_changing_materialization_changes_relation_type(self, project): 6 | self._run_and_check_materialization("view") 7 | self._run_and_check_materialization("table", extra_args=["--full-refresh"]) 8 | self._run_and_check_materialization("view", extra_args=["--full-refresh"]) 9 | self._run_and_check_materialization("incremental", extra_args=["--full-refresh"]) 10 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_copy_materialization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pathlib import Path 3 | from dbt.tests.util import run_dbt, write_file, check_relations_equal 4 | 5 | _SEED_A = """ 6 | load_date,id,first_name,last_name,email,gender,ip_address 7 | 2021-03-05,1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 8 | 2021-03-05,2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 9 | 2021-03-05,3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 10 | """.lstrip() 11 | 12 | _SEED_B = """ 13 | load_date,id,first_name,last_name,email,gender,ip_address 14 | 2021-03-05,4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 15 | 2021-03-05,5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 16 | """.lstrip() 17 | 18 | _EXPECTED_RESULT = """ 19 | load_date,id,first_name,last_name,email,gender,ip_address 20 | 2021-03-05,1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 21 | 2021-03-05,2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 22 | 2021-03-05,3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 23 | 2021-03-05,4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 24 | 2021-03-05,5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 25 | """.lstrip() 26 | 27 | _COPY_MODEL = """ 28 | {{ config( 29 | materialized="copy", 30 | copy_materialization="incremental", 31 | ) }} 32 | 33 | SELECT * FROM {{ ref("seed") }} 34 | """ 35 | 36 | 37 | class BaseCopyModelConfig: 38 | @pytest.fixture(scope="class") 39 | def models(self): 40 | return {"copy_model.sql": _COPY_MODEL} 41 | 42 | @pytest.fixture(scope="class") 43 | def seeds(self): 44 | return { 45 | "seed.csv": _SEED_A, 46 | "expected_result.csv": _EXPECTED_RESULT, 47 | } 48 | 49 | 50 | class TestCopyMaterialization(BaseCopyModelConfig): 51 | def test_incremental_copy(self, project): 52 | run_dbt(["seed"]) 53 | run_dbt(["run"]) 54 | 55 | # Replace original seed _SEED_A with _SEED_B 56 | seed_file = project.project_root / Path("seeds") / Path("seed.csv") 57 | write_file(_SEED_B, seed_file) 58 | 59 | run_dbt(["seed"]) 60 | run_dbt(["run"]) 61 | 62 | check_relations_equal(project.adapter, ["copy_model", "expected_result"]) 63 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_dbt_debug.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.util import run_dbt 2 | from dbt.tests.adapter.dbt_debug.test_dbt_debug import BaseDebug, BaseDebugProfileVariable 3 | 4 | 5 | class TestDebugBigQuery(BaseDebug): 6 | def test_ok_bigquery(self, project): 7 | run_dbt(["debug"]) 8 | assert "ERROR" not in self.capsys.readouterr().out 9 | 10 | 11 | class TestDebugProfileVariableBigQuery(BaseDebugProfileVariable): 12 | def test_ok_bigquery(self, project): 13 | run_dbt(["debug"]) 14 | assert "ERROR" not in self.capsys.readouterr().out 15 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_grant_access_to.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pytest 4 | 5 | from dbt.tests.util import run_dbt 6 | 7 | 8 | def select_1(dataset: str, materialized: str): 9 | config = f"""config( 10 | materialized='{materialized}', 11 | grant_access_to=[ 12 | {{'project': 'dbt-test-env', 'dataset': '{dataset}'}}, 13 | ] 14 | )""" 15 | return ( 16 | "{{" 17 | + config 18 | + "}}" 19 | + """ 20 | SELECT 1 as one""" 21 | ) 22 | 23 | 24 | BAD_CONFIG_TABLE_NAME = "bad_view" 25 | BAD_CONFIG_TABLE = """ 26 | {{ config( 27 | materialized='view', 28 | grant_access_to=[ 29 | {'project': 'dbt-test-env', 'dataset': 'NonExistentDataset'}, 30 | ] 31 | ) }} 32 | 33 | SELECT 1 as one 34 | """ 35 | 36 | BAD_CONFIG_CHILD_TABLE = "SELECT 1 as one FROM {{ref('" + BAD_CONFIG_TABLE_NAME + "')}}" 37 | 38 | 39 | def get_schema_name(base_schema_name: str) -> str: 40 | return f"{base_schema_name}_grant_access" 41 | 42 | 43 | class TestAccessGrantSucceeds: 44 | @pytest.fixture(scope="class") 45 | def setup_grant_schema( 46 | self, 47 | project, 48 | unique_schema, 49 | ): 50 | with project.adapter.connection_named("__test_grants"): 51 | relation = project.adapter.Relation.create( 52 | database=project.database, 53 | schema=get_schema_name(unique_schema), 54 | identifier="grant_access", 55 | ) 56 | project.adapter.create_schema(relation) 57 | yield relation 58 | 59 | @pytest.fixture(scope="class") 60 | def teardown_grant_schema( 61 | self, 62 | project, 63 | unique_schema, 64 | ): 65 | yield 66 | with project.adapter.connection_named("__test_grants"): 67 | relation = project.adapter.Relation.create( 68 | database=project.database, schema=get_schema_name(unique_schema) 69 | ) 70 | project.adapter.drop_schema(relation) 71 | 72 | @pytest.fixture(scope="class") 73 | def models(self, unique_schema): 74 | dataset = get_schema_name(unique_schema) 75 | return { 76 | "select_1.sql": select_1(dataset=dataset, materialized="view"), 77 | "select_1_table.sql": select_1(dataset=dataset, materialized="table"), 78 | } 79 | 80 | def test_grant_access_succeeds(self, project, setup_grant_schema, teardown_grant_schema): 81 | # Need to run twice to validate idempotency 82 | results = run_dbt(["run"]) 83 | assert len(results) == 2 84 | time.sleep(10) 85 | results = run_dbt(["run"]) 86 | assert len(results) == 2 87 | 88 | 89 | class TestAccessGrantFails: 90 | @pytest.fixture(scope="class") 91 | def models(self): 92 | return { 93 | "bad_config_table_child.sql": BAD_CONFIG_CHILD_TABLE, 94 | f"{BAD_CONFIG_TABLE_NAME}.sql": BAD_CONFIG_TABLE, 95 | } 96 | 97 | def test_grant_access_fails_without_running_child_table(self, project): 98 | # Need to run twice to validate idempotency 99 | results = run_dbt(["run"], expect_pass=False) 100 | assert results[0].status == "error" 101 | assert results[1].status == "skipped" 102 | assert results[0].message.startswith("404 GET https://bigquery.googleapis.com/") 103 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_grants.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.grants.base_grants import BaseGrants 2 | from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants 3 | from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants 4 | from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants 5 | from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants 6 | from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants 7 | 8 | 9 | class BaseGrantsBigQuery(BaseGrants): 10 | def privilege_grantee_name_overrides(self): 11 | return { 12 | "select": "roles/bigquery.dataViewer", 13 | "insert": "roles/bigquery.dataEditor", 14 | "fake_privilege": "roles/invalid", 15 | "invalid_user": "user:fake@dbtlabs.com", 16 | } 17 | 18 | 19 | class TestModelGrantsBigQuery(BaseGrantsBigQuery, BaseModelGrants): 20 | pass 21 | 22 | 23 | class TestIncrementalGrantsBigQuery(BaseGrantsBigQuery, BaseIncrementalGrants): 24 | pass 25 | 26 | 27 | class TestSeedGrantsBigQuery(BaseGrantsBigQuery, BaseSeedGrants): 28 | # seeds in dbt-bigquery are always "full refreshed," in such a way that 29 | # the grants do not carry over 30 | def seeds_support_partial_refresh(self): 31 | return False 32 | 33 | 34 | class TestSnapshotGrantsBigQuery(BaseGrantsBigQuery, BaseSnapshotGrants): 35 | pass 36 | 37 | 38 | class TestInvalidGrantsBigQuery(BaseGrantsBigQuery, BaseInvalidGrants): 39 | def grantee_does_not_exist_error(self): 40 | return "User fake@dbtlabs.com does not exist." 41 | 42 | def privilege_does_not_exist_error(self): 43 | return "Role roles/invalid is not supported for this resource." 44 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_json_keyfile.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import pytest 4 | from dbt.adapters.bigquery.credentials import _is_base64 5 | 6 | 7 | def string_to_base64(s): 8 | return base64.b64encode(s.encode("utf-8")) 9 | 10 | 11 | @pytest.fixture 12 | def example_json_keyfile(): 13 | keyfile = json.dumps( 14 | { 15 | "type": "service_account", 16 | "project_id": "", 17 | "private_key_id": "", 18 | "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", 19 | "client_email": "", 20 | "client_id": "", 21 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 22 | "token_uri": "https://oauth2.googleapis.com/token", 23 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 24 | "client_x509_cert_url": "", 25 | } 26 | ) 27 | 28 | return keyfile 29 | 30 | 31 | @pytest.fixture 32 | def example_json_keyfile_b64(): 33 | keyfile = json.dumps( 34 | { 35 | "type": "service_account", 36 | "project_id": "", 37 | "private_key_id": "", 38 | "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", 39 | "client_email": "", 40 | "client_id": "", 41 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 42 | "token_uri": "https://oauth2.googleapis.com/token", 43 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 44 | "client_x509_cert_url": "", 45 | } 46 | ) 47 | 48 | return string_to_base64(keyfile) 49 | 50 | 51 | def test_valid_base64_strings(example_json_keyfile_b64): 52 | valid_strings = [ 53 | "SGVsbG8gV29ybGQh", # "Hello World!" 54 | "Zm9vYmFy", # "foobar" 55 | "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string 56 | "", # Empty string 57 | example_json_keyfile_b64.decode("utf-8"), 58 | ] 59 | 60 | for s in valid_strings: 61 | assert _is_base64(s) is True 62 | 63 | 64 | def test_valid_base64_bytes(example_json_keyfile_b64): 65 | valid_bytes = [ 66 | b"SGVsbG8gV29ybGQh", # "Hello World!" 67 | b"Zm9vYmFy", # "foobar" 68 | b"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string 69 | b"", # Empty bytes 70 | example_json_keyfile_b64, 71 | ] 72 | for s in valid_bytes: 73 | assert _is_base64(s) is True 74 | 75 | 76 | def test_invalid_base64(example_json_keyfile): 77 | invalid_inputs = [ 78 | "This is not Base64", 79 | "SGVsbG8gV29ybGQ", # Incorrect padding 80 | "Invalid#Base64", 81 | 12345, # Not a string or bytes 82 | b"Invalid#Base64", 83 | "H\xffGVsbG8gV29ybGQh", # Contains invalid character \xff 84 | example_json_keyfile, 85 | ] 86 | for s in invalid_inputs: 87 | assert _is_base64(s) is False 88 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_string_literal_macro.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | 5 | _MODEL_SQL = """ 6 | select {{ dbt.string_literal('my multiline 7 | string') }} as test 8 | """ 9 | 10 | 11 | class TestStringLiteralQuoting: 12 | @pytest.fixture(scope="class") 13 | def models(self): 14 | return {"my_model.sql": _MODEL_SQL} 15 | 16 | def test_string_literal_quoting(self, project): 17 | run_dbt() 18 | -------------------------------------------------------------------------------- /tests/functional/adapter/unit_testing/test_unit_testing.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes 3 | from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity 4 | from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput 5 | 6 | 7 | class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes): 8 | @pytest.fixture 9 | def data_types(self): 10 | # sql_value, yaml_value 11 | return [ 12 | ["1", "1"], 13 | ["'1'", "1"], 14 | ["cast('true' as boolean)", "true"], 15 | ["1.0", "1.0"], 16 | ["'string value'", "string value"], 17 | ["cast(1.0 as numeric)", "1.0"], 18 | ["cast(1 as bigint)", 1], 19 | ["cast('2019-01-01' as date)", "2019-01-01"], 20 | ["cast('2013-11-03 00:00:00-07' as timestamp)", "2013-11-03 00:00:00-07"], 21 | ["st_geogpoint(75, 45)", "'st_geogpoint(75, 45)'"], 22 | # arrays 23 | ["cast(['a','b','c'] as array)", "['a','b','c']"], 24 | ["cast([1,2,3] as array)", "[1,2,3]"], 25 | ["cast([true,true,false] as array)", "[true,true,false]"], 26 | # array of date 27 | ["[date '2019-01-01']", "['2020-01-01']"], 28 | ["[date '2019-01-01']", "[]"], 29 | ["[date '2019-01-01']", "null"], 30 | # array of timestamp 31 | ["[timestamp '2019-01-01']", "['2020-01-01']"], 32 | ["[timestamp '2019-01-01']", "[]"], 33 | ["[timestamp '2019-01-01']", "null"], 34 | # json 35 | [ 36 | """json '{"name": "Cooper", "forname": "Alice"}'""", 37 | """{"name": "Cooper", "forname": "Alice"}""", 38 | ], 39 | ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], 40 | # structs 41 | ["struct('Isha' as name, 22 as age)", """'struct("Isha" as name, 22 as age)'"""], 42 | [ 43 | "struct('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)", 44 | """'struct("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""", 45 | ], 46 | # struct of struct 47 | [ 48 | "struct(struct(1 as id, 'blue' as color) as my_struct)", 49 | """'struct(struct(1 as id, "blue" as color) as my_struct)'""", 50 | ], 51 | # array of struct 52 | [ 53 | "[struct(st_geogpoint(75, 45) as my_point), struct(st_geogpoint(75, 35) as my_point)]", 54 | "['struct(st_geogpoint(75, 45) as my_point)', 'struct(st_geogpoint(75, 35) as my_point)']", 55 | ], 56 | ] 57 | 58 | 59 | class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): 60 | pass 61 | 62 | 63 | class TestBigQueryUnitTestInvalidInput(BaseUnitTestInvalidInput): 64 | pass 65 | -------------------------------------------------------------------------------- /tests/functional/adapter/upload_file_tests/data/parquet/source.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-bigquery/0995665e490cdee9c408d26aac8e1c19fefaebe0/tests/functional/adapter/upload_file_tests/data/parquet/source.parquet -------------------------------------------------------------------------------- /tests/functional/adapter/utils/fixture_array_append.py: -------------------------------------------------------------------------------- 1 | # array_append 2 | 3 | # EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string 4 | models__array_append_expected_sql = """ 5 | select 1 as id, {{ array_to_string(array_construct([1,2,3,4])) }} as array_col union all 6 | select 2 as id, {{ array_to_string(array_construct([4])) }} as array_col 7 | """ 8 | 9 | 10 | models__array_append_actual_sql = """ 11 | select 1 as id, {{ array_to_string(array_append(array_construct([1,2,3]), 4)) }} as array_col union all 12 | select 2 as id, {{ array_to_string(array_append(array_construct([]), 4)) }} as array_col 13 | """ 14 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/fixture_array_concat.py: -------------------------------------------------------------------------------- 1 | # array_concat 2 | 3 | # EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string 4 | models__array_concat_expected_sql = """ 5 | select 1 as id, {{ array_to_string(array_construct([1,2,3,4,5,6])) }} as array_col union all 6 | select 2 as id, {{ array_to_string(array_construct([2])) }} as array_col union all 7 | select 3 as id, {{ array_to_string(array_construct([3])) }} as array_col 8 | """ 9 | 10 | 11 | models__array_concat_actual_sql = """ 12 | select 1 as id, {{ array_to_string(array_concat(array_construct([1,2,3]), array_construct([4,5,6]))) }} as array_col union all 13 | select 2 as id, {{ array_to_string(array_concat(array_construct([]), array_construct([2]))) }} as array_col union all 14 | select 3 as id, {{ array_to_string(array_concat(array_construct([3]), array_construct([]))) }} as array_col 15 | """ 16 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/fixture_array_construct.py: -------------------------------------------------------------------------------- 1 | # array_construct 2 | 3 | # EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string 4 | models__array_construct_expected_sql = """ 5 | select 1 as id, {{ array_to_string(array_construct([1,2,3])) }} as array_col union all 6 | select 2 as id, {{ array_to_string(array_construct([])) }} as array_col 7 | """ 8 | 9 | 10 | models__array_construct_actual_sql = """ 11 | select 1 as id, {{ array_to_string(array_construct([1,2,3])) }} as array_col union all 12 | select 2 as id, {{ array_to_string(array_construct([])) }} as array_col 13 | """ 14 | 15 | 16 | macros__array_to_string_sql = """ 17 | {% macro array_to_string(array) %} 18 | (select string_agg(cast(element as string), ',') from unnest({{ array }}) element) 19 | {% endmacro %} 20 | """ 21 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/fixture_get_intervals_between.py: -------------------------------------------------------------------------------- 1 | models__bq_test_get_intervals_between_sql = """ 2 | SELECT 3 | {{ get_intervals_between("'2023-09-01'", "'2023-09-12'", "day") }} as intervals, 4 | 11 as expected 5 | 6 | """ 7 | 8 | models___bq_test_get_intervals_between_yml = """ 9 | version: 2 10 | models: 11 | - name: test_get_intervals_between 12 | tests: 13 | - assert_equal: 14 | actual: intervals 15 | expected: expected 16 | """ 17 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/test_data_types.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.utils.data_types.test_type_bigint import BaseTypeBigInt 2 | from dbt.tests.adapter.utils.data_types.test_type_float import BaseTypeFloat 3 | from dbt.tests.adapter.utils.data_types.test_type_int import BaseTypeInt 4 | from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric 5 | from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString 6 | from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp 7 | from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean 8 | 9 | 10 | class TestTypeBigInt(BaseTypeBigInt): 11 | pass 12 | 13 | 14 | class TestTypeFloat(BaseTypeFloat): 15 | pass 16 | 17 | 18 | class TestTypeInt(BaseTypeInt): 19 | pass 20 | 21 | 22 | class TestTypeNumeric(BaseTypeNumeric): 23 | def numeric_fixture_type(self): 24 | return "numeric" 25 | 26 | 27 | class TestTypeString(BaseTypeString): 28 | pass 29 | 30 | 31 | class TestTypeTimestamp(BaseTypeTimestamp): 32 | pass 33 | 34 | 35 | class TestTypeBoolean(BaseTypeBoolean): 36 | pass 37 | -------------------------------------------------------------------------------- /tests/functional/adapter/utils/test_timestamps.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.utils.test_timestamps import BaseCurrentTimestamps 3 | 4 | 5 | class TestCurrentTimestampBigQuery(BaseCurrentTimestamps): 6 | @pytest.fixture(scope="class") 7 | def expected_schema(self): 8 | return { 9 | "current_timestamp": "TIMESTAMP", 10 | "current_timestamp_in_utc_backcompat": "TIMESTAMP", 11 | "current_timestamp_backcompat": "TIMESTAMP", 12 | } 13 | 14 | @pytest.fixture(scope="class") 15 | def expected_sql(self): 16 | return """select current_timestamp() as current_timestamp, 17 | current_timestamp as current_timestamp_in_utc_backcompat, 18 | current_timestamp as current_timestamp_backcompat""" 19 | -------------------------------------------------------------------------------- /tests/functional/python_model_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-bigquery/0995665e490cdee9c408d26aac8e1c19fefaebe0/tests/functional/python_model_tests/__init__.py -------------------------------------------------------------------------------- /tests/functional/python_model_tests/files.py: -------------------------------------------------------------------------------- 1 | SINGLE_RECORD = """ 2 | import pandas as pd 3 | 4 | def model(dbt, session): 5 | 6 | dbt.config( 7 | submission_method="serverless", 8 | materialized="table" 9 | ) 10 | 11 | df = pd.DataFrame( 12 | [ 13 | {"column_name": {"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}}, 14 | ] 15 | ) 16 | 17 | return df 18 | """ 19 | 20 | 21 | MULTI_RECORD = """ 22 | import pandas as pd 23 | 24 | def model(dbt, session): 25 | 26 | dbt.config( 27 | submission_method="serverless", 28 | materialized="table", 29 | ) 30 | 31 | df = pd.DataFrame( 32 | [ 33 | {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, 34 | ] 35 | ) 36 | 37 | return df 38 | """ 39 | 40 | 41 | ORC_FORMAT = """ 42 | import pandas as pd 43 | 44 | def model(dbt, session): 45 | 46 | dbt.config( 47 | submission_method="serverless", 48 | materialized="table", 49 | intermediate_format="orc", 50 | ) 51 | 52 | df = pd.DataFrame( 53 | [ 54 | {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, 55 | ] 56 | ) 57 | 58 | return df 59 | """ 60 | 61 | 62 | ENABLE_LIST_INFERENCE = """ 63 | import pandas as pd 64 | 65 | def model(dbt, session): 66 | 67 | dbt.config( 68 | submission_method="serverless", 69 | materialized="table", 70 | enable_list_inference="true", 71 | ) 72 | 73 | df = pd.DataFrame( 74 | [ 75 | {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, 76 | ] 77 | ) 78 | 79 | return df 80 | """ 81 | 82 | 83 | ENABLE_LIST_INFERENCE_PARQUET_FORMAT = """ 84 | import pandas as pd 85 | 86 | def model(dbt, session): 87 | 88 | dbt.config( 89 | submission_method="serverless", 90 | materialized="table", 91 | enable_list_inference="true", 92 | intermediate_format="parquet", 93 | ) 94 | 95 | df = pd.DataFrame( 96 | [ 97 | {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, 98 | ] 99 | ) 100 | 101 | return df 102 | """ 103 | 104 | 105 | DISABLE_LIST_INFERENCE_ORC_FORMAT = """ 106 | import pandas as pd 107 | 108 | def model(dbt, session): 109 | 110 | dbt.config( 111 | submission_method="serverless", 112 | materialized="table", 113 | enable_list_inference="false", 114 | intermediate_format="orc", 115 | ) 116 | 117 | df = pd.DataFrame( 118 | [ 119 | {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, 120 | ] 121 | ) 122 | 123 | return df 124 | 125 | """ 126 | -------------------------------------------------------------------------------- /tests/functional/python_model_tests/test_list_inference.py: -------------------------------------------------------------------------------- 1 | """ 2 | This test case addresses this regression: https://github.com/dbt-labs/dbt-bigquery/issues/1047 3 | 4 | As the comments point out, the issue appears when the settings are: 5 | - list inference: off 6 | - intermediate format: parquet 7 | 8 | Adjusting either of these alleviates the issue. 9 | 10 | When the regression was first reported, `files.MULTI_RECORD` failed while the other models passed. 11 | """ 12 | 13 | from dbt.tests.util import run_dbt_and_capture 14 | import pytest 15 | 16 | from tests.functional.python_model_tests import files 17 | 18 | 19 | class TestPythonListInference: 20 | @pytest.fixture(scope="class") 21 | def models(self): 22 | return { 23 | # this is what worked prior to this issue 24 | "single_record.py": files.SINGLE_RECORD, 25 | # this is the model that initially failed for this issue 26 | "multi_record.py": files.MULTI_RECORD, 27 | # these are explicit versions of the default settings 28 | "enable_list_inference.py": files.ENABLE_LIST_INFERENCE, 29 | "enable_list_inference_parquet_format.py": files.ENABLE_LIST_INFERENCE_PARQUET_FORMAT, 30 | # orc format also resolves the issue, regardless of list inference 31 | "orc_format.py": files.ORC_FORMAT, 32 | "disable_list_inference_orc_format.py": files.DISABLE_LIST_INFERENCE_ORC_FORMAT, 33 | } 34 | 35 | def test_models_success(self, project, models): 36 | result, output = run_dbt_and_capture(["run"]) 37 | assert len(result) == len(models) 38 | -------------------------------------------------------------------------------- /tests/functional/test_delete_column_policy.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt, get_connection, relation_from_name, write_config_file 3 | 4 | from dbt.adapters.bigquery import BigQueryRelation 5 | 6 | _POLICY_TAG_MODEL = """{{ 7 | config( 8 | materialized='table', 9 | persist_docs={ 'columns': true } 10 | ) 11 | }} 12 | 13 | select 14 | struct( 15 | 1 as field 16 | ) as first_struct 17 | """ 18 | 19 | _POLICY_TAG_YML = """version: 2 20 | 21 | models: 22 | - name: policy_tag_table 23 | columns: 24 | - name: first_struct 25 | - name: first_struct.field 26 | policy_tags: 27 | - '{{ var("policy_tag") }}' 28 | """ 29 | 30 | _POLICY_TAG_YML_NO_POLICY_TAGS = """version: 2 31 | 32 | models: 33 | - name: policy_tag_table 34 | columns: 35 | - name: first_struct 36 | - name: first_struct.field 37 | """ 38 | 39 | # Manually generated https://console.cloud.google.com/bigquery/policy-tags?project=dbt-test-env 40 | _POLICY_TAG = "projects/dbt-test-env/locations/us/taxonomies/5785568062805976401/policyTags/135489647357012267" 41 | _POLICY_TAG_MODEL_NAME = "policy_tag_table" 42 | 43 | 44 | class TestBigqueryDeleteColumnPolicy: 45 | """See BQ docs for more info on policy tags: 46 | https://cloud.google.com/bigquery/docs/column-level-security#work_with_policy_tags""" 47 | 48 | @pytest.fixture(scope="class") 49 | def project_config_update(self): 50 | return {"config-version": 2, "vars": {"policy_tag": _POLICY_TAG}} 51 | 52 | @pytest.fixture(scope="class") 53 | def models(self): 54 | return {f"{_POLICY_TAG_MODEL_NAME}.sql": _POLICY_TAG_MODEL, "schema.yml": _POLICY_TAG_YML} 55 | 56 | def test_bigquery_delete_column_policy_tag(self, project): 57 | results = run_dbt(["run", "-f", "--models", "policy_tag_table"]) 58 | assert len(results) == 1 59 | write_config_file( 60 | _POLICY_TAG_YML_NO_POLICY_TAGS, project.project_root + "/models", "schema.yml" 61 | ) # update the model to remove the policy tag 62 | new_results = run_dbt(["run", "-f", "--models", "policy_tag_table"]) 63 | assert len(new_results) == 1 64 | relation: BigQueryRelation = relation_from_name(project.adapter, _POLICY_TAG_MODEL_NAME) 65 | adapter = project.adapter 66 | with get_connection(project.adapter) as conn: 67 | table = conn.handle.get_table( 68 | adapter.connections.get_bq_table( 69 | relation.database, relation.schema, relation.table 70 | ) 71 | ) 72 | for schema_field in table.schema: 73 | assert schema_field.policy_tags is None 74 | -------------------------------------------------------------------------------- /tests/functional/test_drop_temp_relation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from google.api_core.exceptions import NotFound 3 | from dbt.adapters.bigquery.relation import BigQueryRelation 4 | from dbt.tests.util import run_dbt, get_connection, relation_from_name 5 | 6 | 7 | _INCREMENTAL_MODEL = """ 8 | {{ 9 | config( 10 | materialized="incremental", 11 | on_schema_change="sync_all_columns" 12 | ) 13 | }} 14 | select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all 15 | select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour 16 | """ 17 | 18 | _INCREMENTAL_MODEL_YAML = """version: 2 19 | models: 20 | - name: test_drop_relation 21 | columns: 22 | - name: id 23 | type: int64 24 | - name: date_hour 25 | type: datetime 26 | """ 27 | 28 | 29 | class BaseIncrementalModelConfig: 30 | @pytest.fixture(scope="class") 31 | def models(self): 32 | return { 33 | "test_drop_relation.sql": _INCREMENTAL_MODEL, 34 | "schema.yml": _INCREMENTAL_MODEL_YAML, 35 | } 36 | 37 | 38 | class TestIncrementalModel(BaseIncrementalModelConfig): 39 | def test_incremental_model_succeeds(self, project): 40 | """ 41 | Steps: 42 | 1. Create the model 43 | 2. Merge into the model using __dbt_tmp table 44 | 3. Assert raises NotFound exception 45 | """ 46 | results = run_dbt(["run"]) 47 | assert len(results) == 1 48 | results = run_dbt(["run"]) 49 | assert len(results) == 1 50 | relation: BigQueryRelation = relation_from_name( 51 | project.adapter, "test_drop_relation__dbt_tmp" 52 | ) 53 | adapter = project.adapter 54 | with pytest.raises(NotFound): 55 | with get_connection(project.adapter) as conn: 56 | conn.handle.get_table( 57 | adapter.connections.get_bq_table( 58 | relation.database, relation.schema, relation.table 59 | ) 60 | ) 61 | -------------------------------------------------------------------------------- /tests/functional/test_get_columns_incomplete_database.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | # This is to test a edge case found in https://github.com/dbt-labs/dbt-bigquery/pull/165/files 5 | 6 | tests__get_cols_in_sql = """ 7 | {% test get_cols_in(model) %} 8 | 9 | {# The step which causes the issue #} 10 | {%- set relation = api.Relation.create(identifier=model.table) if execute -%} 11 | 12 | {% set columns = adapter.get_columns_in_relation(relation) %} 13 | 14 | select 15 | {% for col in columns %} 16 | {{ col.name }} {{ "," if not loop.last }} 17 | {% endfor %} 18 | 19 | from {{ model }} 20 | limit 0 21 | 22 | {% endtest %} 23 | """ 24 | 25 | models__my_model = """select 1 as id, 'text' as another_col 26 | """ 27 | 28 | properties__model_yml = """ 29 | version: 2 30 | models: 31 | - name: my_model 32 | tests: 33 | - get_cols_in 34 | """ 35 | 36 | 37 | class TestIncompleteRelationSetup: 38 | @pytest.fixture(scope="class") 39 | def properties(self): 40 | return {"properties__model_yml.yml": properties__model_yml} 41 | 42 | @pytest.fixture(scope="class") 43 | def macros(self): 44 | return {"get_col_in.sql": tests__get_cols_in_sql} 45 | 46 | @pytest.fixture(scope="class") 47 | def models(self): 48 | return {"my_model.sql": models__my_model} 49 | 50 | 51 | class TestIncompleteRelation(TestIncompleteRelationSetup): 52 | def test_incomplete_relation(self, project): 53 | run_dbt(["build"]) 54 | -------------------------------------------------------------------------------- /tests/functional/test_hours_to_expiration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt_and_capture 3 | 4 | _MODEL_SQL = """ 5 | select 1 as id 6 | """ 7 | 8 | 9 | class BaseBigQueryHoursToExpiration: 10 | @pytest.fixture(scope="class") 11 | def models(self): 12 | return { 13 | "model.sql": _MODEL_SQL, 14 | } 15 | 16 | @pytest.fixture(scope="class") 17 | def project_config_update(self): 18 | return { 19 | "models": {"test": {"materialized": "table", "model": {"hours_to_expiration": "4"}}} 20 | } 21 | 22 | 23 | class TestBigQueryHoursToExpiration(BaseBigQueryHoursToExpiration): 24 | def test_bigquery_hours_to_expiration(self, project): 25 | _, stdout = run_dbt_and_capture(["--debug", "run"]) 26 | assert "expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 4 hour)" in stdout 27 | -------------------------------------------------------------------------------- /tests/functional/test_incremental_materialization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | # This is a short term hack, we need to go back 5 | # and make adapter implementations of: 6 | # https://github.com/dbt-labs/dbt-core/pull/6330 7 | 8 | _INCREMENTAL_MODEL = """ 9 | {{ 10 | config( 11 | materialized="incremental", 12 | ) 13 | }} 14 | 15 | {% if not is_incremental() %} 16 | 17 | select 10 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all 18 | select 30 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour 19 | 20 | {% else %} 21 | 22 | select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all 23 | select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour 24 | 25 | {% endif %} 26 | -- Test Comment To Prevent Reccurence of https://github.com/dbt-labs/dbt-core/issues/6485 27 | """ 28 | 29 | 30 | class BaseIncrementalModelConfig: 31 | @pytest.fixture(scope="class") 32 | def models(self): 33 | return {"test_incremental.sql": _INCREMENTAL_MODEL} 34 | 35 | 36 | class TestIncrementalModel(BaseIncrementalModelConfig): 37 | def test_incremental_model_succeeds(self, project): 38 | results = run_dbt(["run"]) 39 | assert len(results) == 1 40 | results = run_dbt(["run"]) 41 | assert len(results) == 1 42 | -------------------------------------------------------------------------------- /tests/functional/test_job_timeout.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.util import run_dbt 4 | 5 | _REASONABLE_TIMEOUT = 300 6 | _SHORT_TIMEOUT = 1 7 | 8 | _LONG_RUNNING_MODEL_SQL = """ 9 | {{ config(materialized='table') }} 10 | with array_1 as ( 11 | select generated_ids from UNNEST(GENERATE_ARRAY(1, 200000)) AS generated_ids 12 | ), 13 | array_2 as ( 14 | select generated_ids from UNNEST(GENERATE_ARRAY(2, 200000)) AS generated_ids 15 | ) 16 | 17 | SELECT array_1.generated_ids 18 | FROM array_1 19 | LEFT JOIN array_1 as jnd on 1=1 20 | LEFT JOIN array_2 as jnd2 on 1=1 21 | LEFT JOIN array_1 as jnd3 on jnd3.generated_ids >= jnd2.generated_ids 22 | """ 23 | 24 | _SHORT_RUNNING_QUERY = """ 25 | SELECT 1 as id 26 | """ 27 | 28 | 29 | class TestSuccessfulJobRun: 30 | @pytest.fixture(scope="class") 31 | def models(self): 32 | return { 33 | "model.sql": _SHORT_RUNNING_QUERY, 34 | } 35 | 36 | @pytest.fixture(scope="class") 37 | def profiles_config_update(self, dbt_profile_target): 38 | outputs = {"default": dbt_profile_target} 39 | outputs["default"]["job_execution_timeout_seconds"] = _REASONABLE_TIMEOUT 40 | return {"test": {"outputs": outputs, "target": "default"}} 41 | 42 | def test_bigquery_job_run_succeeds_within_timeout(self, project): 43 | result = run_dbt() 44 | assert len(result) == 1 45 | 46 | 47 | class TestJobTimeout: 48 | @pytest.fixture(scope="class") 49 | def models(self): 50 | return { 51 | "model.sql": _LONG_RUNNING_MODEL_SQL, 52 | } 53 | 54 | @pytest.fixture(scope="class") 55 | def profiles_config_update(self, dbt_profile_target): 56 | outputs = {"default": dbt_profile_target} 57 | outputs["default"]["job_execution_timeout_seconds"] = _SHORT_TIMEOUT 58 | return {"test": {"outputs": outputs, "target": "default"}} 59 | 60 | def test_job_timeout(self, project): 61 | result = run_dbt(["run"], expect_pass=False) # project setup will fail 62 | expected_error = f"Operation did not complete within the designated timeout of {_SHORT_TIMEOUT} seconds." 63 | assert expected_error in result[0].message 64 | -------------------------------------------------------------------------------- /tests/functional/test_location_change.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from dbt.tests.util import run_dbt 4 | 5 | _MODEL_SQL = """ 6 | select 1 as id 7 | """ 8 | 9 | _INVALID_LOCATION = os.getenv("DBT_TEST_BIGQUERY_BAD_LOCATION", "northamerica-northeast1") 10 | _VALID_LOCATION = os.getenv("DBT_TEST_BIGQUERY_INITIAL_LOCATION", "US") 11 | 12 | 13 | class BaseBigQueryLocation: 14 | @pytest.fixture(scope="class") 15 | def models(self): 16 | return { 17 | "model.sql": _MODEL_SQL, 18 | } 19 | 20 | 21 | class TestBigqueryValidLocation(BaseBigQueryLocation): 22 | def test_bigquery_valid_location(self, project): 23 | results = run_dbt() 24 | for result in results: 25 | assert "US" == result.adapter_response["location"] 26 | 27 | 28 | class TestBigqueryInvalidLocation(BaseBigQueryLocation): 29 | @pytest.fixture(scope="class") 30 | def profiles_config_update(self, dbt_profile_target): 31 | outputs = {"default": dbt_profile_target} 32 | outputs["default"]["location"] = _INVALID_LOCATION 33 | yield 34 | outputs = {"default": dbt_profile_target} 35 | outputs["default"]["location"] = _VALID_LOCATION 36 | 37 | def test_bigquery_location_invalid(self, project): 38 | results = run_dbt() 39 | for result in results: 40 | assert "northamerica-northeast1" == result.adapter_response["location"] 41 | -------------------------------------------------------------------------------- /tests/functional/test_override_database/fixtures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.fixtures.project import write_project_files 3 | 4 | 5 | models__view_2_sql = """ 6 | {%- if target.type == 'bigquery' -%} 7 | {{ config(project=var('alternate_db')) }} 8 | {%- else -%} 9 | {{ config(database=var('alternate_db')) }} 10 | {%- endif -%} 11 | select * from {{ ref('seed') }} 12 | 13 | """ 14 | 15 | models__view_1_sql = """ 16 | {# 17 | We are running against a database that must be quoted. 18 | These calls ensure that we trigger an error if we're failing to quote at parse-time 19 | #} 20 | {% do adapter.already_exists(this.schema, this.table) %} 21 | {% do adapter.get_relation(this.database, this.schema, this.table) %} 22 | select * from {{ ref('seed') }} 23 | 24 | """ 25 | 26 | models__subfolder__view_4_sql = """ 27 | {{ 28 | config(database=var('alternate_db')) 29 | }} 30 | 31 | select * from {{ ref('seed') }} 32 | 33 | """ 34 | 35 | models__subfolder__view_3_sql = """ 36 | select * from {{ ref('seed') }} 37 | 38 | """ 39 | 40 | seeds__seed_csv = """id,name 41 | 1,a 42 | 2,b 43 | 3,c 44 | 4,d 45 | 5,e 46 | """ 47 | 48 | 49 | @pytest.fixture(scope="class") 50 | def models(): 51 | return { 52 | "view_2.sql": models__view_2_sql, 53 | "view_1.sql": models__view_1_sql, 54 | "subfolder": { 55 | "view_4.sql": models__subfolder__view_4_sql, 56 | "view_3.sql": models__subfolder__view_3_sql, 57 | }, 58 | } 59 | 60 | 61 | @pytest.fixture(scope="class") 62 | def seeds(): 63 | return {"seed.csv": seeds__seed_csv} 64 | 65 | 66 | @pytest.fixture(scope="class") 67 | def project_files( 68 | project_root, 69 | models, 70 | seeds, 71 | ): 72 | write_project_files(project_root, "models", models) 73 | write_project_files(project_root, "seeds", seeds) 74 | -------------------------------------------------------------------------------- /tests/functional/test_quota_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from dbt.tests.util import run_dbt 6 | 7 | _QUOTA_PROJECT = os.getenv("BIGQUERY_TEST_ALT_DATABASE") 8 | 9 | 10 | class TestNoQuotaProject: 11 | def test_no_quota_project(self, project): 12 | results = run_dbt() 13 | for result in results: 14 | assert None == result.adapter_response["quota_project"] 15 | 16 | 17 | class TestQuotaProjectOption: 18 | @pytest.fixture(scope="class") 19 | def profiles_config_update(self, dbt_profile_target): 20 | outputs = {"default": dbt_profile_target} 21 | outputs["default"]["quota_project"] = _QUOTA_PROJECT 22 | yield 23 | 24 | def test_quota_project_option(self, project): 25 | results = run_dbt() 26 | for result in results: 27 | assert _QUOTA_PROJECT == result.adapter_response["quota_project"] 28 | -------------------------------------------------------------------------------- /tests/functional/test_update_column_policy.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt, get_connection, relation_from_name 3 | 4 | from dbt.adapters.bigquery import BigQueryRelation 5 | 6 | _POLICY_TAG_MODEL = """{{ 7 | config( 8 | materialized='table', 9 | persist_docs={ 'columns': true } 10 | ) 11 | }} 12 | 13 | select 14 | 1 field 15 | """ 16 | 17 | _POLICY_TAG_YML = """version: 2 18 | 19 | models: 20 | - name: policy_tag_table 21 | columns: 22 | - name: field 23 | policy_tags: 24 | - '{{ var("policy_tag") }}' 25 | """ 26 | 27 | # Manually generated https://console.cloud.google.com/bigquery/policy-tags?project=dbt-test-env 28 | _POLICY_TAG = "projects/dbt-test-env/locations/us/taxonomies/5785568062805976401/policyTags/135489647357012267" 29 | _POLICY_TAG_MODEL_NAME = "policy_tag_table" 30 | 31 | 32 | class TestBigqueryUpdateColumnPolicy: 33 | """See BQ docs for more info on policy tags: 34 | https://cloud.google.com/bigquery/docs/column-level-security#work_with_policy_tags""" 35 | 36 | @pytest.fixture(scope="class") 37 | def project_config_update(self): 38 | return {"config-version": 2, "vars": {"policy_tag": _POLICY_TAG}} 39 | 40 | @pytest.fixture(scope="class") 41 | def models(self): 42 | return {f"{_POLICY_TAG_MODEL_NAME}.sql": _POLICY_TAG_MODEL, "schema.yml": _POLICY_TAG_YML} 43 | 44 | def test_bigquery_update_column_policy_tag(self, project): 45 | results = run_dbt(["run", "--models", "policy_tag_table"]) 46 | assert len(results) == 1 47 | relation: BigQueryRelation = relation_from_name(project.adapter, _POLICY_TAG_MODEL_NAME) 48 | adapter = project.adapter 49 | with get_connection(project.adapter) as conn: 50 | table = conn.handle.get_table( 51 | adapter.connections.get_bq_table( 52 | relation.database, relation.schema, relation.table 53 | ) 54 | ) 55 | for schema_field in table.schema: 56 | assert schema_field.policy_tags.names == (_POLICY_TAG,) 57 | -------------------------------------------------------------------------------- /tests/functional/test_update_field_description.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import relation_from_name, get_connection, run_dbt 3 | 4 | from dbt.adapters.bigquery import BigQueryRelation 5 | 6 | _FIELD_DESCRIPTION_MODEL = """{{ 7 | config( 8 | materialized='table', 9 | persist_docs={ 'columns': true } 10 | ) 11 | }} 12 | 13 | select 14 | 1 field 15 | """ 16 | _FIELD_DESCRIPTION_MODEL_NAME = "field_description_model" 17 | _FIELD_DESCRIPTION = "this is not a field" 18 | _FIELD_DESCRIPTION_MODEL_YML = """ 19 | version: 2 20 | 21 | models: 22 | - name: field_description_model 23 | columns: 24 | - name: field 25 | description: '{{ var("field_description") }}' 26 | """ 27 | 28 | 29 | class TestBigqueryUpdateColumnDescription: 30 | @pytest.fixture(scope="class") 31 | def project_config_update(self): 32 | return {"config-version": 2, "vars": {"field_description": _FIELD_DESCRIPTION}} 33 | 34 | @pytest.fixture(scope="class") 35 | def models(self): 36 | return { 37 | f"{_FIELD_DESCRIPTION_MODEL_NAME}.sql": _FIELD_DESCRIPTION_MODEL, 38 | "schema.yml": _FIELD_DESCRIPTION_MODEL_YML, 39 | } 40 | 41 | def test_bigquery_update_column_description(self, project): 42 | results = run_dbt(["run"]) 43 | assert len(results) == 1 44 | relation: BigQueryRelation = relation_from_name( 45 | project.adapter, _FIELD_DESCRIPTION_MODEL_NAME 46 | ) 47 | adapter = project.adapter 48 | with get_connection(project.adapter) as conn: 49 | table = conn.handle.get_table( 50 | adapter.connections.get_bq_table( 51 | relation.database, relation.schema, relation.table 52 | ) 53 | ) 54 | for schema_field in table.schema: 55 | assert schema_field.description == _FIELD_DESCRIPTION 56 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-bigquery/0995665e490cdee9c408d26aac8e1c19fefaebe0/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/mock_adapter.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from dbt.adapters.base import BaseAdapter 4 | from contextlib import contextmanager 5 | 6 | 7 | def adapter_factory(): 8 | class MockAdapter(BaseAdapter): 9 | ConnectionManager = mock.MagicMock(TYPE="mock") 10 | responder = mock.MagicMock() 11 | # some convenient defaults 12 | responder.quote.side_effect = lambda identifier: '"{}"'.format(identifier) 13 | responder.date_function.side_effect = lambda: "unitdate()" 14 | responder.is_cancelable.side_effect = lambda: False 15 | 16 | @contextmanager 17 | def exception_handler(self, *args, **kwargs): 18 | self.responder.exception_handler(*args, **kwargs) 19 | yield 20 | 21 | def execute(self, *args, **kwargs): 22 | return self.responder.execute(*args, **kwargs) 23 | 24 | def drop_relation(self, *args, **kwargs): 25 | return self.responder.drop_relation(*args, **kwargs) 26 | 27 | def truncate_relation(self, *args, **kwargs): 28 | return self.responder.truncate_relation(*args, **kwargs) 29 | 30 | def rename_relation(self, *args, **kwargs): 31 | return self.responder.rename_relation(*args, **kwargs) 32 | 33 | def get_columns_in_relation(self, *args, **kwargs): 34 | return self.responder.get_columns_in_relation(*args, **kwargs) 35 | 36 | def expand_column_types(self, *args, **kwargs): 37 | return self.responder.expand_column_types(*args, **kwargs) 38 | 39 | def list_relations_without_caching(self, *args, **kwargs): 40 | return self.responder.list_relations_without_caching(*args, **kwargs) 41 | 42 | def create_schema(self, *args, **kwargs): 43 | return self.responder.create_schema(*args, **kwargs) 44 | 45 | def drop_schema(self, *args, **kwargs): 46 | return self.responder.drop_schema(*args, **kwargs) 47 | 48 | @classmethod 49 | def quote(cls, identifier): 50 | return cls.responder.quote(identifier) 51 | 52 | def convert_text_type(self, *args, **kwargs): 53 | return self.responder.convert_text_type(*args, **kwargs) 54 | 55 | def convert_number_type(self, *args, **kwargs): 56 | return self.responder.convert_number_type(*args, **kwargs) 57 | 58 | def convert_integer_type(self, *args, **kwargs): 59 | return self.responder.convert_integer_type(*args, **kwargs) 60 | 61 | def convert_boolean_type(self, *args, **kwargs): 62 | return self.responder.convert_boolean_type(*args, **kwargs) 63 | 64 | def convert_datetime_type(self, *args, **kwargs): 65 | return self.responder.convert_datetime_type(*args, **kwargs) 66 | 67 | def convert_date_type(self, *args, **kwargs): 68 | return self.responder.convert_date_type(*args, **kwargs) 69 | 70 | def convert_time_type(self, *args, **kwargs): 71 | return self.responder.convert_time_type(*args, **kwargs) 72 | 73 | def list_schemas(self, *args, **kwargs): 74 | return self.responder.list_schemas(*args, **kwargs) 75 | 76 | @classmethod 77 | def date_function(cls): 78 | return cls.responder.date_function() 79 | 80 | @classmethod 81 | def is_cancelable(cls): 82 | return cls.responder.is_cancelable() 83 | 84 | return MockAdapter 85 | -------------------------------------------------------------------------------- /tests/unit/test_configure_dataproc_batch.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from dbt.adapters.bigquery.python_submissions import _update_batch_from_config 4 | from google.cloud import dataproc_v1 5 | 6 | from .test_bigquery_adapter import BaseTestBigQueryAdapter 7 | 8 | 9 | # Test application of dataproc_batch configuration to a 10 | # google.cloud.dataproc_v1.Batch object. 11 | # This reuses the machinery from BaseTestBigQueryAdapter to get hold of the 12 | # parsed credentials 13 | class TestConfigureDataprocBatch(BaseTestBigQueryAdapter): 14 | @patch( 15 | "dbt.adapters.bigquery.credentials._create_bigquery_defaults", 16 | return_value=("credentials", "project_id"), 17 | ) 18 | def test_update_dataproc_serverless_batch(self, mock_get_bigquery_defaults): 19 | adapter = self.get_adapter("dataproc-serverless-configured") 20 | mock_get_bigquery_defaults.assert_called_once() 21 | 22 | credentials = adapter.acquire_connection("dummy").credentials 23 | self.assertIsNotNone(credentials) 24 | 25 | batchConfig = credentials.dataproc_batch 26 | self.assertIsNotNone(batchConfig) 27 | 28 | raw_batch_config = self.raw_profile["outputs"]["dataproc-serverless-configured"][ 29 | "dataproc_batch" 30 | ] 31 | raw_environment_config = raw_batch_config["environment_config"] 32 | raw_execution_config = raw_environment_config["execution_config"] 33 | raw_labels: dict[str, any] = raw_batch_config["labels"] 34 | raw_rt_config = raw_batch_config["runtime_config"] 35 | 36 | raw_batch_config = self.raw_profile["outputs"]["dataproc-serverless-configured"][ 37 | "dataproc_batch" 38 | ] 39 | 40 | batch = dataproc_v1.Batch() 41 | 42 | batch = _update_batch_from_config(raw_batch_config, batch) 43 | 44 | def to_str_values(d): 45 | """google's protobuf types expose maps as dict[str, str]""" 46 | return dict([(k, str(v)) for (k, v) in d.items()]) 47 | 48 | self.assertEqual( 49 | batch.environment_config.execution_config.service_account, 50 | raw_execution_config["service_account"], 51 | ) 52 | self.assertFalse(batch.environment_config.execution_config.network_uri) 53 | self.assertEqual( 54 | batch.environment_config.execution_config.subnetwork_uri, 55 | raw_execution_config["subnetwork_uri"], 56 | ) 57 | self.assertEqual( 58 | batch.environment_config.execution_config.network_tags, 59 | raw_execution_config["network_tags"], 60 | ) 61 | self.assertEqual(batch.labels, to_str_values(raw_labels)) 62 | self.assertEqual( 63 | batch.runtime_config.properties, to_str_values(raw_rt_config["properties"]) 64 | ) 65 | 66 | @patch( 67 | "dbt.adapters.bigquery.credentials._create_bigquery_defaults", 68 | return_value=("credentials", "project_id"), 69 | ) 70 | def test_default_dataproc_serverless_batch(self, mock_get_bigquery_defaults): 71 | adapter = self.get_adapter("dataproc-serverless-default") 72 | mock_get_bigquery_defaults.assert_called_once() 73 | 74 | credentials = adapter.acquire_connection("dummy").credentials 75 | self.assertIsNotNone(credentials) 76 | 77 | batchConfig = credentials.dataproc_batch 78 | self.assertIsNone(batchConfig) 79 | -------------------------------------------------------------------------------- /tests/unit/test_dataset.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset 2 | from dbt.adapters.bigquery import BigQueryRelation 3 | 4 | from google.cloud.bigquery import Dataset, AccessEntry, DatasetReference 5 | 6 | 7 | def test_add_access_entry_to_dataset_updates_dataset(): 8 | database = "someDb" 9 | dataset = "someDataset" 10 | entity = BigQueryRelation.from_dict( 11 | { 12 | "type": None, 13 | "path": { 14 | "database": "test-project", 15 | "schema": "test_schema", 16 | "identifier": "my_table", 17 | }, 18 | "quote_policy": {"identifier": False}, 19 | } 20 | ).to_dict() 21 | dataset_ref = DatasetReference(project=database, dataset_id=dataset) 22 | dataset = Dataset(dataset_ref) 23 | access_entry = AccessEntry(None, "table", entity) 24 | dataset = add_access_entry_to_dataset(dataset, access_entry) 25 | assert access_entry in dataset.access_entries 26 | 27 | 28 | def test_add_access_entry_to_dataset_updates_with_pre_existing_entries(): 29 | database = "someOtherDb" 30 | dataset = "someOtherDataset" 31 | entity_2 = BigQueryRelation.from_dict( 32 | { 33 | "type": None, 34 | "path": { 35 | "database": "test-project", 36 | "schema": "test_schema", 37 | "identifier": "some_other_view", 38 | }, 39 | "quote_policy": {"identifier": False}, 40 | } 41 | ).to_dict() 42 | dataset_ref = DatasetReference(project=database, dataset_id=dataset) 43 | dataset = Dataset(dataset_ref) 44 | initial_entry = AccessEntry(None, "view", entity_2) 45 | initial_entry._properties.pop("role") 46 | dataset.access_entries = [initial_entry] 47 | access_entry = AccessEntry(None, "view", entity_2) 48 | dataset = add_access_entry_to_dataset(dataset, access_entry) 49 | assert len(dataset.access_entries) == 2 50 | 51 | 52 | def test_is_access_entry_in_dataset_returns_true_if_entry_in_dataset(): 53 | database = "someDb" 54 | dataset = "someDataset" 55 | entity = BigQueryRelation.from_dict( 56 | { 57 | "type": None, 58 | "path": { 59 | "database": "test-project", 60 | "schema": "test_schema", 61 | "identifier": "my_table", 62 | }, 63 | "quote_policy": {"identifier": False}, 64 | } 65 | ).to_dict() 66 | dataset_ref = DatasetReference(project=database, dataset_id=dataset) 67 | dataset = Dataset(dataset_ref) 68 | access_entry = AccessEntry(None, "table", entity) 69 | dataset = add_access_entry_to_dataset(dataset, access_entry) 70 | assert is_access_entry_in_dataset(dataset, access_entry) 71 | 72 | 73 | def test_is_access_entry_in_dataset_returns_false_if_entry_not_in_dataset(): 74 | database = "someDb" 75 | dataset = "someDataset" 76 | entity = BigQueryRelation.from_dict( 77 | { 78 | "type": None, 79 | "path": { 80 | "database": "test-project", 81 | "schema": "test_schema", 82 | "identifier": "my_table", 83 | }, 84 | "quote_policy": {"identifier": False}, 85 | } 86 | ).to_dict() 87 | dataset_ref = DatasetReference(project=database, dataset_id=dataset) 88 | dataset = Dataset(dataset_ref) 89 | access_entry = AccessEntry(None, "table", entity) 90 | assert not is_access_entry_in_dataset(dataset, access_entry) 91 | -------------------------------------------------------------------------------- /tests/unit/test_renamed_relations.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.bigquery.relation import BigQueryRelation 2 | from dbt.adapters.contracts.relation import RelationType 3 | 4 | 5 | def test_renameable_relation(): 6 | relation = BigQueryRelation.create( 7 | database="my_db", 8 | schema="my_schema", 9 | identifier="my_table", 10 | type=RelationType.Table, 11 | ) 12 | assert relation.renameable_relations == frozenset( 13 | { 14 | RelationType.Table, 15 | } 16 | ) 17 | -------------------------------------------------------------------------------- /third-party-stubs/agate/__init__.pyi: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | from typing import Any, Optional, Callable, Iterable, Dict, Union 4 | 5 | from . import data_types as data_types 6 | from .data_types import ( 7 | Text as Text, 8 | Number as Number, 9 | Boolean as Boolean, 10 | DateTime as DateTime, 11 | Date as Date, 12 | TimeDelta as TimeDelta, 13 | ) 14 | 15 | class MappedSequence(Sequence): 16 | def __init__(self, values: Any, keys: Optional[Any] = ...) -> None: ... 17 | def __unicode__(self): ... 18 | def __getitem__(self, key: Any): ... 19 | def __setitem__(self, key: Any, value: Any) -> None: ... 20 | def __iter__(self): ... 21 | def __len__(self): ... 22 | def __eq__(self, other: Any): ... 23 | def __ne__(self, other: Any): ... 24 | def __contains__(self, value: Any): ... 25 | def keys(self): ... 26 | def values(self): ... 27 | def items(self): ... 28 | def get(self, key: Any, default: Optional[Any] = ...): ... 29 | def dict(self): ... 30 | 31 | class Row(MappedSequence): ... 32 | 33 | class Table: 34 | def __init__( 35 | self, 36 | rows: Any, 37 | column_names: Optional[Any] = ..., 38 | column_types: Optional[Any] = ..., 39 | row_names: Optional[Any] = ..., 40 | _is_fork: bool = ..., 41 | ) -> None: ... 42 | def __len__(self): ... 43 | def __iter__(self): ... 44 | def __getitem__(self, key: Any): ... 45 | @property 46 | def column_types(self): ... 47 | @property 48 | def column_names(self): ... 49 | @property 50 | def row_names(self): ... 51 | @property 52 | def columns(self): ... 53 | @property 54 | def rows(self): ... 55 | def print_csv(self, **kwargs: Any) -> None: ... 56 | def print_json(self, **kwargs: Any) -> None: ... 57 | def where(self, test: Callable[[Row], bool]) -> "Table": ... 58 | def select(self, key: Union[Iterable[str], str]) -> "Table": ... 59 | # these definitions are much narrower than what's actually accepted 60 | @classmethod 61 | def from_object( 62 | cls, obj: Iterable[Dict[str, Any]], *, column_types: Optional["TypeTester"] = None 63 | ) -> "Table": ... 64 | @classmethod 65 | def from_csv( 66 | cls, path: Iterable[str], *, column_types: Optional["TypeTester"] = None 67 | ) -> "Table": ... 68 | @classmethod 69 | def merge(cls, tables: Iterable["Table"]) -> "Table": ... 70 | def rename( 71 | self, 72 | column_names: Optional[Iterable[str]] = None, 73 | row_names: Optional[Any] = None, 74 | slug_columns: bool = False, 75 | slug_rows: bool = False, 76 | **kwargs: Any, 77 | ) -> "Table": ... 78 | 79 | class TypeTester: 80 | def __init__( 81 | self, force: Any = ..., limit: Optional[Any] = ..., types: Optional[Any] = ... 82 | ) -> None: ... 83 | def run(self, rows: Any, column_names: Any): ... 84 | 85 | class MaxPrecision: 86 | def __init__(self, column_name: Any) -> None: ... 87 | 88 | # this is not strictly true, but it's all we care about. 89 | def aggregate(self, aggregations: MaxPrecision) -> int: ... 90 | -------------------------------------------------------------------------------- /third-party-stubs/agate/data_types.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | DEFAULT_NULL_VALUES: Any 4 | 5 | class DataType: 6 | null_values: Any = ... 7 | def __init__(self, null_values: Any = ...) -> None: ... 8 | def test(self, d: Any): ... 9 | def cast(self, d: Any) -> None: ... 10 | def csvify(self, d: Any): ... 11 | def jsonify(self, d: Any): ... 12 | 13 | DEFAULT_TRUE_VALUES: Any 14 | DEFAULT_FALSE_VALUES: Any 15 | 16 | class Boolean(DataType): 17 | true_values: Any = ... 18 | false_values: Any = ... 19 | def __init__( 20 | self, true_values: Any = ..., false_values: Any = ..., null_values: Any = ... 21 | ) -> None: ... 22 | def cast(self, d: Any): ... 23 | def jsonify(self, d: Any): ... 24 | 25 | ZERO_DT: Any 26 | 27 | class Date(DataType): 28 | date_format: Any = ... 29 | parser: Any = ... 30 | def __init__(self, date_format: Optional[Any] = ..., **kwargs: Any) -> None: ... 31 | def cast(self, d: Any): ... 32 | def csvify(self, d: Any): ... 33 | def jsonify(self, d: Any): ... 34 | 35 | class DateTime(DataType): 36 | datetime_format: Any = ... 37 | timezone: Any = ... 38 | def __init__( 39 | self, datetime_format: Optional[Any] = ..., timezone: Optional[Any] = ..., **kwargs: Any 40 | ) -> None: ... 41 | def cast(self, d: Any): ... 42 | def csvify(self, d: Any): ... 43 | def jsonify(self, d: Any): ... 44 | 45 | DEFAULT_CURRENCY_SYMBOLS: Any 46 | POSITIVE: Any 47 | NEGATIVE: Any 48 | 49 | class Number(DataType): 50 | locale: Any = ... 51 | currency_symbols: Any = ... 52 | group_symbol: Any = ... 53 | decimal_symbol: Any = ... 54 | def __init__( 55 | self, 56 | locale: str = ..., 57 | group_symbol: Optional[Any] = ..., 58 | decimal_symbol: Optional[Any] = ..., 59 | currency_symbols: Any = ..., 60 | **kwargs: Any, 61 | ) -> None: ... 62 | def cast(self, d: Any): ... 63 | def jsonify(self, d: Any): ... 64 | 65 | class TimeDelta(DataType): 66 | def cast(self, d: Any): ... 67 | 68 | class Text(DataType): 69 | cast_nulls: Any = ... 70 | def __init__(self, cast_nulls: bool = ..., **kwargs: Any) -> None: ... 71 | def cast(self, d: Any): ... 72 | --------------------------------------------------------------------------------