├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── ci.yml │ ├── integration_tests.yml │ ├── stale.yml │ └── triage-labels.yml ├── .gitignore ├── .python-version ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── RELEASE.md ├── dbt_project.yml ├── etc └── sample_docs.png ├── integration_tests ├── .python-version ├── README.md ├── dbt_project.yml ├── macros │ ├── common │ │ └── prep_external.sql │ └── plugins │ │ ├── redshift │ │ └── prep_external.sql │ │ ├── snowflake │ │ └── prep_external.sql │ │ └── sqlserver │ │ ├── dbt_utils_tsql.sql │ │ └── prep_external.sql ├── models │ ├── common │ │ └── control.yml │ └── plugins │ │ ├── azuresql │ │ └── azuresql_external.yml │ │ ├── bigquery │ │ └── bigquery_external.yml │ │ ├── redshift │ │ └── redshift_external.yml │ │ ├── snowflake │ │ ├── people_alias.sql │ │ ├── people_expression.sql │ │ └── snowflake_external.yml │ │ ├── spark │ │ └── spark_external.yml │ │ └── synapse │ │ └── synapse_external.yml ├── package-lock.yml ├── packages.yml ├── profiles.yml ├── public_data │ ├── capitalize_parquet.ipynb │ ├── csv │ │ ├── section=a │ │ │ └── people_a.csv │ │ ├── section=b │ │ │ └── people_b.csv │ │ ├── section=c │ │ │ └── people_c.csv │ │ └── section=d │ │ │ └── people_d.csv │ ├── json │ │ ├── section=a │ │ │ └── people_a.json │ │ ├── section=b │ │ │ └── people_b.json │ │ ├── section=c │ │ │ └── people_c.json │ │ └── section=d │ │ │ └── people_d.json │ ├── parquet │ │ ├── section=a │ │ │ └── people_a.parquet │ │ ├── section=b │ │ │ └── people_b.parquet │ │ ├── section=c │ │ │ └── people_c.parquet │ │ └── section=d │ │ │ └── people_d.parquet │ └── parquet_capitalized │ │ ├── section=a │ │ └── people_a.parquet │ │ ├── section=b │ │ └── people_b.parquet │ │ ├── section=c │ │ └── people_c.parquet │ │ └── section=d │ │ └── people_d.parquet ├── seeds │ └── people.csv ├── test.env.sample └── vars.env.sample ├── macros ├── common │ ├── create_external_schema.sql │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ ├── helpers │ │ ├── dropif.sql │ │ └── transaction.sql │ ├── refresh_external_table.sql │ ├── stage_external_sources.sql │ └── update_external_table_columns.sql └── plugins │ ├── bigquery │ ├── create_external_schema.sql │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ └── update_external_table_columns.sql │ ├── fabric │ ├── create_external_schema.sql │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ └── helpers │ │ └── dropif.sql │ ├── redshift │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ ├── helpers │ │ ├── add_partitions.sql │ │ ├── dropif.sql │ │ ├── is_ext_tbl.sql │ │ ├── paths.sql │ │ ├── render_macro.sql │ │ └── transaction.sql │ └── refresh_external_table.sql │ ├── snowflake │ ├── create_external_schema.sql │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ ├── helpers │ │ └── is_csv.sql │ ├── refresh_external_table.sql │ └── snowpipe │ │ ├── create_empty_table.sql │ │ ├── create_snowpipe.sql │ │ ├── get_copy_sql.sql │ │ └── refresh_snowpipe.sql │ └── spark │ ├── create_external_table.sql │ ├── get_external_build_plan.sql │ ├── helpers │ ├── dropif.sql │ └── recover_partitions.sql │ └── refresh_external_table.sql ├── pyproject.toml ├── run_test.sh ├── sample_analysis └── external_sources_dry_run.sql ├── sample_sources ├── bigquery.yml ├── redshift.yml ├── snowflake.yml ├── spark.yml └── synapse.yml ├── supported_adapters.env ├── tox.ini └── uv.lock /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @jeremyyeo 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug or an issue you've found with this package 4 | title: '' 5 | labels: bug, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the bug 11 | 14 | 15 | ### Steps to reproduce 16 | 19 | 20 | ### Expected results 21 | 24 | 25 | ### Actual results 26 | 29 | 30 | ### Screenshots and log output 31 | 34 | 35 | ### System information 36 | **The contents of your `packages.yml` file:** 37 | 38 | **Which database are you using dbt with?** 39 | - [ ] redshift 40 | - [ ] snowflake 41 | - [ ] other (specify: ____________) 42 | 43 | 44 | **The output of `dbt --version`:** 45 | ``` 46 | 47 | ``` 48 | 49 | **The operating system you're using:** 50 | 51 | **The output of `python --version`:** 52 | 53 | ### Additional context 54 | 57 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the feature 11 | A clear and concise description of what you want to happen. 12 | 13 | ### Describe alternatives you've considered 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | ### Additional context 17 | Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here. 18 | 19 | ### Who will this benefit? 20 | What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description & motivation 2 | 5 | 6 | ## Checklist 7 | - [ ] I have verified that these changes work locally 8 | - [ ] I have updated the README.md (if applicable) 9 | - [ ] I have added an integration test for my fix/feature (if applicable) 10 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # Run tests for dbt-external-tables against supported adapters 3 | 4 | # **why?** 5 | # To ensure that dbt-external-tables works as expected with all supported adapters 6 | 7 | # **when?** 8 | # On every PR, and every push to main and when manually triggered 9 | 10 | name: Package Integration Tests 11 | 12 | on: 13 | push: 14 | branches: 15 | - main 16 | pull_request_target: 17 | workflow_dispatch: 18 | 19 | jobs: 20 | run-tests: 21 | uses: dbt-labs/dbt-package-testing/.github/workflows/run_tox.yml@v1 22 | with: 23 | # redshift 24 | REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} 25 | REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} 26 | REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }} 27 | REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} 28 | REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}" 29 | # snowflake 30 | SNOWFLAKE_USER: ${{ vars.SNOWFLAKE_USER }} 31 | SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }} 32 | SNOWFLAKE_ROLE: ${{ vars.SNOWFLAKE_ROLE }} 33 | SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }} 34 | SNOWFLAKE_SCHEMA: "integration_tests_snowflake_${{ github.run_number }}" 35 | # bigquery 36 | BIGQUERY_PROJECT: ${{ vars.BIGQUERY_PROJECT }} 37 | BIGQUERY_SCHEMA: "integration_tests_bigquery_${{ github.run_number }}" 38 | # synapse 39 | # temporarily removed until we can get the cluster hooked up to the blob correctly 40 | # SYNAPSE_DRIVER: ${{ vars.SYNAPSE_DRIVER }} 41 | # SYNAPSE_HOST: ${{ vars.SYNAPSE_HOST }} 42 | # SYNAPSE_PORT: ${{ vars.SYNAPSE_PORT }} 43 | # SYNAPSE_DATABASE: ${{ vars.SYNAPSE_DATABASE }} 44 | # SYNAPSE_AUTHENTICATION: ${{ vars.SYNAPSE_AUTHENTICATION }} 45 | # SYNAPSE_TENANT_ID: ${{ vars.SYNAPSE_TENANT_ID }} 46 | # SYNAPSE_CLIENT_ID: ${{ vars.SYNAPSE_CLIENT_ID }} 47 | 48 | secrets: 49 | DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }} 50 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 51 | DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.DBT_ENV_SECRET_SNOWFLAKE_PASS }} 52 | BIGQUERY_KEYFILE_JSON: ${{ secrets.BIGQUERY_KEYFILE_JSON }} 53 | DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET: ${{ secrets.DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET }} 54 | -------------------------------------------------------------------------------- /.github/workflows/integration_tests.yml: -------------------------------------------------------------------------------- 1 | name: Integration Testing 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: true 15 | max-parallel: 3 16 | matrix: 17 | python-version: [ "3.11"] # "3.10", "3.12"] 18 | dbt-version: ["1.8.0"] # "1.6.0", , "1.8.0b1"] 19 | data-platform: ["redshift", "snowflake", "bigquery"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dbt-${{ matrix.data-platform }}~=${{ matrix.dbt-version }} 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install "dbt-${{ matrix.data-platform }}~=${{ matrix.dbt-version }}" "dbt-core~=${{ matrix.dbt-version }}" 31 | - name: run integration_tests project on ${{ matrix.data-platform }} 32 | run: | 33 | cd integration_tests 34 | export DBT_PROFILES_DIR=. 35 | dbt deps --target ${{ matrix.data-platform }} 36 | dbt seed --full-refresh --target ${{ matrix.data-platform }} 37 | dbt run --target ${{ matrix.data-platform }} 38 | dbt run-operation prep_external --target ${{ matrix.data-platform }} 39 | dbt -d run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target ${{ matrix.data-platform }} 40 | dbt run-operation dbt_external_tables.stage_external_sources --target ${{ matrix.data-platform }} 41 | dbt test --target ${{ matrix.data-platform }} 42 | env: 43 | 44 | env: 45 | # redshift 46 | REDSHIFT_HOST: ${{ secrets.REDSHIFT_HOST }} 47 | REDSHIFT_USER: ${{ secrets.REDSHIFT_USER }} 48 | DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }} 49 | REDSHIFT_PORT: ${{ secrets.REDSHIFT_PORT }} 50 | REDSHIFT_DATABASE: ${{ secrets.REDSHIFT_DATABASE }} 51 | REDSHIFT_SPECTRUM_IAM_ROLE: ${{ secrets.REDSHIFT_SPECTRUM_IAM_ROLE }} 52 | REDSHIFT_SCHEMA: "dbt_external_tables_integration_tests_redshift" 53 | #snowflake 54 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 55 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} 56 | DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.DBT_ENV_SECRET_SNOWFLAKE_PASS }} 57 | SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }} 58 | SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }} 59 | SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }} 60 | SNOWFLAKE_SCHEMA: "dbt_external_tables_integration_tests_snowflake" 61 | # bigquery 62 | BIGQUERY_PROJECT: ${{ vars.BIGQUERY_PROJECT }} 63 | BIGQUERY_SCHEMA: "dbt_external_tables_integration_tests_bigquery" 64 | BIGQUERY_KEYFILE_JSON: ${{ secrets.BIGQUERY_KEYFILE_JSON }} -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # For issues that have been open for awhile without activity, label 3 | # them as stale with a warning that they will be closed out. If 4 | # anyone comments to keep the issue open, it will automatically 5 | # remove the stale label and keep it open. 6 | 7 | # Stale label rules: 8 | # awaiting_response, more_information_needed -> 90 days 9 | # good_first_issue, help_wanted -> 360 days (a year) 10 | # tech_debt -> 720 (2 years) 11 | # all else defaults -> 180 days (6 months) 12 | 13 | # **why?** 14 | # To keep the repo in a clean state from issues that aren't relevant anymore 15 | 16 | # **when?** 17 | # Once a day 18 | 19 | name: "Close stale issues and PRs" 20 | on: 21 | schedule: 22 | - cron: "30 1 * * *" 23 | 24 | permissions: 25 | issues: write 26 | pull-requests: write 27 | 28 | jobs: 29 | stale: 30 | uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main 31 | -------------------------------------------------------------------------------- /.github/workflows/triage-labels.yml: -------------------------------------------------------------------------------- 1 | # **what?** 2 | # When the maintenance team triages, we sometimes need more information from the issue creator. In 3 | # those cases we remove the `triage` label and add the `awaiting_response` label. Once we 4 | # recieve a response in the form of a comment, we want the `awaiting_response` label removed 5 | # in favor of the `triage` label so we are aware that the issue needs action. 6 | 7 | # **why?** 8 | # To help with out team triage issue tracking 9 | 10 | # **when?** 11 | # This will run when a comment is added to an issue and that issue has the `awaiting_response` label. 12 | 13 | name: Update Triage Label 14 | 15 | on: issue_comment 16 | 17 | defaults: 18 | run: 19 | shell: bash 20 | 21 | permissions: 22 | issues: write 23 | 24 | jobs: 25 | triage_label: 26 | if: contains(github.event.issue.labels.*.name, 'awaiting_response') 27 | uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main 28 | with: 29 | add_label: "triage" 30 | remove_label: "awaiting_response" 31 | secrets: inherit 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | **/target/ 3 | **/dbt_modules/ 4 | **/dbt_packages/ 5 | **/logs/ 6 | **/env/ 7 | **/venv/ 8 | **/test.env 9 | integration_tests/vars.env 10 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## dbt-external-tables v0.11.0 4 | 5 | ### Synapse & SQL Server 6 | * Reenable sqlserver and synapse support https://github.com/dbt-labs/dbt-external-tables/pull/332 7 | 8 | 9 | **Full Changelog**: https://github.com/dbt-labs/dbt-external-tables/compare/0.10.1...0.10.0 10 | 11 | ## dbt-external-tables v0.10.1 12 | 13 | * [FIX] OOPS! Revert https://github.com/dbt-labs/dbt-external-tables/pull/330 "stage_external_sources Comparing source_name of the node instead of the name of the node" by @dataders in https://github.com/dbt-labs/dbt-external-tables/pull/330 14 | * Update CI trigger to run off forks by @emmyoop in https://github.com/dbt-labs/dbt-external-tables/pull/329 15 | 16 | 17 | **Full Changelog**: https://github.com/dbt-labs/dbt-external-tables/compare/0.10.1...0.10.0 18 | 19 | ## BROKEN dbt-external-tables v0.10.0 20 | 21 | DO NOT USE THIS VERSION. USE `v0.10.1` or higher. 22 | 23 | ### Snowflake 24 | * Refactor create_external_table.sql in snowflake plugin by @kyleburke-meq in https://github.com/dbt-labs/dbt-external-tables/pull/318 25 | * stage_external_sources Comparing source_name of the node instead of the name of the node by @ward-resa in https://github.com/dbt-labs/dbt-external-tables/pull/312 26 | * added ignore_case for Snowflake by @cakkinep in https://github.com/dbt-labs/dbt-external-tables/pull/308 27 | 28 | ## New Contributors 29 | * @ward-resa made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/312 30 | 31 | **Full Changelog**: https://github.com/dbt-labs/dbt-external-tables/compare/0.9.0...0.10.0 32 | 33 | ## dbt-external-tables v0.9.0 34 | 35 | ### Snowflake 36 | * Add metadata_file_last_modified for snowpiped tables by @Catisyf in https://github.com/dbt-labs/dbt-external-tables/pull/239 37 | * snowflake delta format by @danielefrigo in https://github.com/dbt-labs/dbt-external-tables/pull/240 38 | * Support aws_sns_topic property in Snowflake by @jtmcn in https://github.com/dbt-labs/dbt-external-tables/pull/243 39 | * alias column for snowflake external table by @cakkinep in https://github.com/dbt-labs/dbt-external-tables/pull/257 40 | * Snowflake: Add expression parameter to columns by @kyleburke-meq @jpear3 in https://github.com/dbt-labs/dbt-external-tables/pull/275 41 | 42 | ### BigQuery 43 | * Handle BigQuery non-string option 'max_staleness' by @marcbllv in https://github.com/dbt-labs/dbt-external-tables/pull/237 44 | * quote project name by @thomas-vl in https://github.com/dbt-labs/dbt-external-tables/pull/242 45 | * update external table columns by @thomas-vl in https://github.com/dbt-labs/dbt-external-tables/pull/252 46 | 47 | ### under the hood 48 | * Fix protobuf v5 issue in CI by @thomas-vl in https://github.com/dbt-labs/dbt-external-tables/pull/258 49 | * move to GitHub Actions by @dataders in https://github.com/dbt-labs/dbt-external-tables/pull/265 50 | * Rebase test by @dataders in https://github.com/dbt-labs/dbt-external-tables/pull/273 51 | * run workflow in context of base repo by @dataders in https://github.com/dbt-labs/dbt-external-tables/pull/278 52 | * actual test case for #257 by @dataders in https://github.com/dbt-labs/dbt-external-tables/pull/290 53 | 54 | ## New Contributors 55 | * @marcbllv made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/237 56 | * @Catisyf made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/239 57 | * @danielefrigo made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/240 58 | * @jtmcn made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/243 59 | * @cakkinep made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/257 60 | * @kyleburke-meq made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/275 61 | * @jpear3 made their first contribution in https://github.com/dbt-labs/dbt-external-tables/pull/275 62 | 63 | **Full Changelog**: https://github.com/dbt-labs/dbt-external-tables/compare/0.8.7...0.9.0 64 | 65 | ## dbt-external-tables v0.8.0 66 | 67 | This release supports any version (minor and patch) of v1, which means far less need for compatibility releases in the future. 68 | 69 | ### Features 70 | - (Snowflake) Support for regex `pattern` in snowpipes ([#111](https://github.com/dbt-labs/dbt-external-tables/pull/111), [#122](https://github.com/dbt-labs/dbt-external-tables/pull/122)) 71 | - (Apache Spark) Real support for partitioned external tables. Note that external sources with `partitions` defined were implicitly skipped. Going forward, sources with partitions defined (excluding those with `using: delta`) will run `alter table ... recover partitions`. 72 | 73 | ### Under the hood 74 | - Use standard logging, thereby removing dependency on `dbt_utils` ([#119](https://github.com/dbt-labs/dbt-external-tables/pull/119)) 75 | - Remove `synapse__`-prefixed "passthrough" macros, now that `dbt-synapse` can use `sqlserver__`-prefixed macros instead ([#110](https://github.com/dbt-labs/dbt-external-tables/pull/110)) 76 | 77 | ### Contributors 78 | - [@JCZuurmond](https://github.com/JCZuurmond) ([#116](https://github.com/dbt-labs/dbt-external-tables/pull/116)) 79 | - [@stumelius](https://github.com/stumelius) ([#111](https://github.com/dbt-labs/dbt-external-tables/pull/111)) 80 | - [@swanderz](https://github.com/swanderz) ([#110](https://github.com/dbt-labs/dbt-external-tables/pull/110)) 81 | 82 | ## dbt-external-tables v0.7.3 83 | 84 | ### Fixes 85 | - Hard code printer width for backwards compatibility with older versions of dbt Core ([#120](https://github.com/dbt-labs/dbt-external-tables/pull/120)) 86 | 87 | ## dbt-external-tables v0.7.2 88 | 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉). Projects using this version with `dbt-core` v1.0.x can expect to see a deprecation warning. This will be resolved in the next minor release. 89 | 90 | ### Fixes 91 | - (BigQuery) Fix `create external tables` with multiple partitions, by including missing comma ([#114](https://github.com/dbt-labs/dbt-external-tables/pull/114)) 92 | - (Snowflake) Fix `auto_refresh` when not specified `False` ([#117](https://github.com/dbt-labs/dbt-external-tables/pull/117)) 93 | 94 | ### Contributors 95 | - [@stumelius](https://github.com/stumelius) ([#114](https://github.com/dbt-labs/dbt-external-tables/pull/114)) 96 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # External sources in dbt 2 | 3 | dbt v0.15.0 [added support](https://github.com/dbt-labs/dbt/pull/1784) for an `external` property within `sources` that can include information about `location`, `partitions`, and other database-specific properties. 4 | 5 | This package provides: 6 | * Macros to create/replace external tables and refresh their partitions, using the metadata provided in your `.yml` file source definitions 7 | * Snowflake-specific macros to create, backfill, and refresh snowpipes, using the same metadata 8 | 9 | ## Supported databases 10 | 11 | * Redshift (Spectrum) 12 | * Snowflake 13 | * BigQuery 14 | * Spark 15 | * Synapse 16 | * Azure SQL 17 | 18 | ![sample docs](etc/sample_docs.png) 19 | 20 | ## Installation 21 | 22 | Follow the instructions at [hub.getdbt.com](https://hub.getdbt.com/dbt-labs/dbt_external_tables/latest/) on how to modify your `packages.yml` and run `dbt deps`. 23 | 24 | ## Syntax 25 | 26 | The `stage_external_sources` macro is the primary point of entry when using this package. It has two operational modes: standard and "full refresh." 27 | 28 | ```bash 29 | # iterate through all source nodes, create if missing, refresh metadata 30 | $ dbt run-operation stage_external_sources 31 | 32 | # iterate through all source nodes, create or replace (+ refresh if necessary) 33 | $ dbt run-operation stage_external_sources --vars "ext_full_refresh: true" 34 | ``` 35 | 36 | The `stage_external_sources` macro accepts a limited node selection syntax similar to 37 | [snapshotting source freshness](https://docs.getdbt.com/docs/running-a-dbt-project/command-line-interface/source/#specifying-sources-to-snapshot): 38 | 39 | ```bash 40 | # stage all Snowplow and Logs external sources: 41 | $ dbt run-operation stage_external_sources --args "select: snowplow logs" 42 | 43 | # stage a particular external source table: 44 | $ dbt run-operation stage_external_sources --args "select: snowplow.event" 45 | ``` 46 | 47 | ## Setup 48 | 49 | The macros assume that you: 50 | 1. Have already created your database's required scaffolding for external resources: 51 | - an external stage (Snowflake) 52 | - an external schema + S3 bucket (Redshift Spectrum) 53 | - an external data source and file format (Synapse) 54 | - an external data source and databse-scoped credential (Azure SQL) 55 | - a Google Cloud Storage bucket (BigQuery) 56 | - an accessible set of files (Spark) 57 | 2. Have the appropriate permissions on to create tables using that scaffolding 58 | 3. Have already created the database/project and/or schema/dataset in which dbt will create external tables (or snowpiped tables) 59 | 60 | ## Spec 61 | 62 | ```yml 63 | version: 2 64 | 65 | sources: 66 | - name: snowplow 67 | tables: 68 | - name: event 69 | description: > 70 | This source table is actually a set of files in external storage. 71 | The dbt-external-tables package provides handy macros for getting 72 | those files queryable, just in time for modeling. 73 | 74 | external: 75 | location: # required: S3 file path, GCS file path, Snowflake stage, Synapse data source 76 | 77 | ... # database-specific properties of external table 78 | 79 | partitions: # optional 80 | - name: collector_date 81 | data_type: date 82 | ... # database-specific properties 83 | 84 | # Specify ALL column names + datatypes. 85 | # Column order must match for CSVs, column names must match for other formats. 86 | # Some databases support schema inference. 87 | 88 | columns: 89 | - name: app_id 90 | data_type: varchar(255) 91 | description: "Application ID" 92 | - name: platform 93 | data_type: varchar(255) 94 | description: "Platform" 95 | ... 96 | ``` 97 | 98 | The `stage_external_sources` macro will use this YAML config to compile and 99 | execute the appropriate `create`, `refresh`, and/or `drop` commands: 100 | 101 | ``` 102 | 19:01:48 + 1 of 1 START external source spectrum.my_partitioned_tbl 103 | 19:01:48 + 1 of 1 (1) drop table if exists "db"."spectrum"."my_partitioned_tbl" 104 | 19:01:48 + 1 of 1 (1) DROP TABLE 105 | 19:01:48 + 1 of 1 (2) create external table "db"."spectrum"."my_partitioned_tbl"... 106 | 19:01:48 + 1 of 1 (2) CREATE EXTERNAL TABLE 107 | 19:01:48 + 1 of 1 (3) alter table "db"."spectrum"."my_partitioned_tbl"... 108 | 19:01:49 + 1 of 1 (3) ALTER EXTERNAL TABLE 109 | ``` 110 | 111 | ## Resources 112 | 113 | * [`sample_sources`](sample_sources): detailed example source specs, with annotations, for each database's implementation 114 | * [`sample_analysis`](sample_analysis): a "dry run" version of the compiled DDL/DML that 115 | `stage_external_sources` runs as an operation 116 | * [`tested specs`](integration_tests/models/plugins): source spec variations that are confirmed to work on each database, via integration tests 117 | 118 | If you encounter issues using this package or have questions, please check the [open issues](https://github.com/dbt-labs/dbt-external-tables/issues), as there's a chance it's a known limitation or work in progress. If not, you can: 119 | - open a new issue to report a bug or suggest an enhancement 120 | - post a technical question to [the Community Forum](https://discourse.getdbt.com/c/help/19) 121 | - post a conceptual question to the relevant database channel (#db-redshift, #dbt-snowflake, etc) in the [dbt Slack community](https://community.getdbt.com/) 122 | 123 | Additional contributions to this package are very welcome! Please create issues or open PRs against `main`. Check out [this post](https://discourse.getdbt.com/t/contributing-to-an-external-dbt-package/657) on the best workflow for contributing to a package. 124 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # dbt-external-tables releases 2 | 3 | ## When do we release? 4 | There's a few scenarios that might prompt a release: 5 | 6 | | Scenario | Release type | 7 | |--------------------------------------------|--------------| 8 | | Breaking changes to existing macros | minor | 9 | | New functionality | minor | 10 | | Fixes to existing macros | patch | 11 | 12 | ## Release process 13 | 14 | 1. Begin a new release by clicking [here](https://github.com/dbt-labs/dbt-external-tables/releases/new) 15 | 1. Click "Choose a tag", then paste your version number (with no "v" in the name), then click "Create new tag: x.y.z. on publish" 16 | - The “Release title” will be identical to the tag name 17 | 1. Click the "Generate release notes" button 18 | 1. Copy and paste the generated release notes into `CHANGELOG.md`, commit, and merge into the `main` branch 19 | 1. Click the "Publish release" button 20 | - This will automatically create an "Assets" section containing: 21 | - Source code (zip) 22 | - Source code (tar.gz) 23 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_external_tables' 2 | version: '0.8.0' 3 | config-version: 2 4 | require-dbt-version: [">=1.0.0", "<2.0.0"] 5 | macro-paths: ["macros"] 6 | -------------------------------------------------------------------------------- /etc/sample_docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/etc/sample_docs.png -------------------------------------------------------------------------------- /integration_tests/.python-version: -------------------------------------------------------------------------------- 1 | dbt-external-tables 2 | -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- 1 | ## Integration tests 2 | 3 | The files in `public_data` are available in two public storage buckets: 4 | - `s3://dbt-external-tables-testing` 5 | - `gs://dbt-external-tables-testing/` 6 | 7 | These integration tests confirm that, when staged as external tables, using different databases / file formats / partitioning schemes, the final combined output is equivalent to `seeds/people.csv`. 8 | -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'dbt_external_tables_integration_tests' 3 | version: '1.0' 4 | 5 | profile: 'integration_tests' 6 | 7 | config-version: 2 8 | 9 | model-paths: ["models"] 10 | analysis-paths: ["analysis"] 11 | test-paths: ["tests"] 12 | seed-paths: ["seeds"] 13 | macro-paths: ["macros"] 14 | 15 | target-path: "target" 16 | clean-targets: 17 | - "target" 18 | - "dbt_packages" 19 | 20 | flags: 21 | send_anonymous_usage_stats: False 22 | use_colors: True 23 | 24 | dispatch: 25 | - macro_namespace: dbt_external_tables 26 | search_order: ['dbt_external_tables_integration_tests', 'dbt_external_tables'] 27 | 28 | seeds: 29 | +quote_columns: false 30 | 31 | models: 32 | dbt_external_tables_integration_tests: 33 | plugins: 34 | snowflake: 35 | +enabled: "{{ target.type == 'snowflake' }}" 36 | 37 | sources: 38 | dbt_external_tables_integration_tests: 39 | plugins: 40 | redshift: 41 | +enabled: "{{ target.type == 'redshift' }}" 42 | snowflake: 43 | +enabled: "{{ target.type == 'snowflake' }}" 44 | bigquery: 45 | +enabled: "{{ target.type == 'bigquery' }}" 46 | spark: 47 | +enabled: "{{ target.type == 'spark' }}" 48 | synapse: 49 | +enabled: "{{ target.type == 'synapse' }}" 50 | azuresql: 51 | +enabled: "{{ target.type == 'sqlserver' }}" 52 | 53 | tests: 54 | dbt_external_tables_integration_tests: 55 | plugins: 56 | redshift: 57 | +enabled: "{{ target.type == 'redshift' }}" 58 | snowflake: 59 | +enabled: "{{ target.type == 'snowflake' }}" 60 | bigquery: 61 | +enabled: "{{ target.type == 'bigquery' }}" 62 | spark: 63 | +enabled: "{{ target.type == 'spark' }}" 64 | synapse: 65 | +enabled: "{{ target.type == 'synapse' }}" 66 | azuresql: 67 | +enabled: "{{ target.type == 'sqlserver' }}" 68 | -------------------------------------------------------------------------------- /integration_tests/macros/common/prep_external.sql: -------------------------------------------------------------------------------- 1 | {% macro prep_external() %} 2 | {{ return(adapter.dispatch('prep_external', 'dbt_external_tables')()) }} 3 | {% endmacro %} 4 | 5 | {% macro default__prep_external() %} 6 | {% do log('No prep necessary, skipping', info = true) %} 7 | {# noop #} 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /integration_tests/macros/plugins/redshift/prep_external.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__prep_external() %} 2 | 3 | {% set external_schema = target.schema ~ '_spectrum' %} 4 | 5 | {% set create_external_schema %} 6 | 7 | create external schema if not exists 8 | {{ external_schema }} 9 | from data catalog 10 | database '{{ external_schema }}' 11 | iam_role 'arn:aws:iam::859831564954:role/RedshiftSpectrumTesting' 12 | create external database if not exists; 13 | 14 | {% endset %} 15 | 16 | {% do log('Creating external schema ' ~ external_schema, info = true) %} 17 | {% do run_query(create_external_schema) %} 18 | 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /integration_tests/macros/plugins/snowflake/prep_external.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__prep_external() %} 2 | 3 | {% set external_stage = target.schema ~ '.dbt_external_tables_testing' %} 4 | {% set parquet_file_format = target.schema ~ '.dbt_external_tables_testing_parquet' %} 5 | 6 | {% set create_external_stage_and_file_format %} 7 | 8 | begin; 9 | create or replace stage 10 | {{ external_stage }} 11 | url = 's3://dbt-external-tables-testing'; 12 | 13 | create or replace file format {{ parquet_file_format }} type = parquet; 14 | commit; 15 | 16 | {% endset %} 17 | 18 | {% do log('Creating external stage ' ~ external_stage, info = true) %} 19 | {% do log('Creating parquet file format ' ~ parquet_file_format, info = true) %} 20 | {% do run_query(create_external_stage_and_file_format) %} 21 | 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /integration_tests/macros/plugins/sqlserver/dbt_utils_tsql.sql: -------------------------------------------------------------------------------- 1 | {% macro test_tsql_equal_rowcount(model) %} 2 | 3 | {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} 4 | 5 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 6 | {%- if not execute -%} 7 | {{ return('') }} 8 | {% endif %} 9 | 10 | with a as ( 11 | 12 | select count(*) as count_a from {{ model.include(database=False) }} 13 | 14 | ), 15 | b as ( 16 | 17 | select count(*) as count_b from {{ compare_model.include(database=False) }} 18 | 19 | ), 20 | final as ( 21 | 22 | select abs( 23 | (select count_a from a) - 24 | (select count_b from b) 25 | ) 26 | as diff_count 27 | 28 | ) 29 | 30 | select diff_count from final 31 | 32 | {% endmacro %} 33 | 34 | 35 | {% macro test_tsql_equality(model) %} 36 | 37 | 38 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 39 | {%- if not execute -%} 40 | {{ return('') }} 41 | {% endif %} 42 | 43 | -- setup 44 | {%- do dbt_utils._is_relation(model, 'test_equality') -%} 45 | 46 | {#- 47 | If the compare_cols arg is provided, we can run this test without querying the 48 | information schema — this allows the model to be an ephemeral model 49 | -#} 50 | {%- if not kwargs.get('compare_columns', None) -%} 51 | {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} 52 | {%- endif -%} 53 | 54 | {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} 55 | {% set compare_columns = kwargs.get('compare_columns', adapter.get_columns_in_relation(model) | map(attribute='quoted') ) %} 56 | {% set compare_cols_csv = compare_columns | join(', ') %} 57 | 58 | with a as ( 59 | 60 | select * from {{ model.include(database=False) }} 61 | 62 | ), 63 | 64 | b as ( 65 | 66 | select * from {{ compare_model.include(database=False) }} 67 | 68 | ), 69 | 70 | a_minus_b as ( 71 | 72 | select {{compare_cols_csv}} from a 73 | {{ dbt_utils.except() }} 74 | select {{compare_cols_csv}} from b 75 | 76 | ), 77 | 78 | b_minus_a as ( 79 | 80 | select {{compare_cols_csv}} from b 81 | {{ dbt_utils.except() }} 82 | select {{compare_cols_csv}} from a 83 | 84 | ), 85 | 86 | unioned as ( 87 | 88 | select * from a_minus_b 89 | union all 90 | select * from b_minus_a 91 | 92 | ), 93 | 94 | final as ( 95 | 96 | select (select count(*) from unioned) + 97 | (select abs( 98 | (select count(*) from a_minus_b) - 99 | (select count(*) from b_minus_a) 100 | )) 101 | as count 102 | 103 | ) 104 | 105 | select count from final 106 | 107 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/macros/plugins/sqlserver/prep_external.sql: -------------------------------------------------------------------------------- 1 | {% macro fabric__prep_external() %} 2 | 3 | {% set external_data_source = target.schema ~ '.dbt_external_tables_testing' %} 4 | 5 | {% if target.type == "synapse"%} 6 | 7 | {% set create_external_data_source %} 8 | IF NOT EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' ) 9 | 10 | CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH ( 11 | TYPE = HADOOP, 12 | LOCATION = 'wasbs://dbt-external-tables-testing@dbtsynapselake.blob.core.windows.net' 13 | ) 14 | {% endset %} 15 | 16 | {% set external_file_format = target.schema ~ '.dbt_external_ff_testing' %} 17 | 18 | {% set create_external_file_format %} 19 | IF NOT EXISTS ( SELECT * FROM sys.external_file_formats WHERE name = '{{external_file_format}}' ) 20 | 21 | CREATE EXTERNAL FILE FORMAT [{{external_file_format}}] 22 | WITH ( 23 | FORMAT_TYPE = DELIMITEDTEXT, 24 | FORMAT_OPTIONS ( 25 | FIELD_TERMINATOR = N',', 26 | FIRST_ROW = 2, 27 | USE_TYPE_DEFAULT = True 28 | ) 29 | ) 30 | {% endset %} 31 | 32 | {% elif target.type == "sqlserver" %} 33 | 34 | {% set cred_name = 'synapse_reader' %} 35 | 36 | {% set create_database_scoped_credential %} 37 | IF NOT EXISTS ( SELECT * FROM sys.database_scoped_credentials WHERE name = '{{ cred_name }}') 38 | CREATE DATABASE SCOPED CREDENTIAL [{{ cred_name }}] WITH 39 | IDENTITY = '{{ env_var("DBT_SYNAPSE_UID") }}', 40 | SECRET = '{{ env_var("DBT_SYNAPSE_PWD") }}' 41 | 42 | {% endset %} 43 | 44 | {% set create_external_data_source %} 45 | IF NOT EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' ) 46 | 47 | CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH ( 48 | TYPE = RDBMS, 49 | LOCATION = '{{ env_var("DBT_SYNAPSE_SERVER") }}', 50 | DATABASE_NAME = '{{ env_var("DBT_SYNAPSE_DB") }}', 51 | CREDENTIAL = [{{ cred_name }}] 52 | ) 53 | {% endset %} 54 | 55 | {%- endif %} 56 | 57 | 58 | {% if target.type == "sqlserver" -%} 59 | {% do log('Creating database scoped credential ' ~ cred_name, info = true) %} 60 | {% do run_query(create_database_scoped_credential) %} 61 | {%- endif %} 62 | 63 | {% do log('Creating external data source ' ~ external_data_source, info = true) %} 64 | {% do run_query(create_external_data_source) %} 65 | 66 | {% if target.type == "synapse" -%} 67 | {% do log('Creating external file format ' ~ external_file_format, info = true) %} 68 | {% do run_query(create_external_file_format) %} 69 | {%- endif %} 70 | 71 | {% endmacro %} 72 | -------------------------------------------------------------------------------- /integration_tests/models/common/control.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: not_external 5 | tables: 6 | - name: take_no_action 7 | columns: 8 | - name: id 9 | data_type: varchar(255) 10 | description: "primary key" 11 | -------------------------------------------------------------------------------- /integration_tests/models/plugins/azuresql/azuresql_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: azuresql_external 5 | schema: "{{ target.schema }}" 6 | loader: RDBMS cross database query 7 | tables: 8 | - name: people_csv_unpartitioned 9 | external: 10 | data_source: "{{ target.schema ~ '.dbt_external_tables_testing' }}" 11 | schema_name: 'dbt_external_tables_integration_tests_synapse' 12 | object_name: 'people_csv_unpartitioned' 13 | columns: &cols-of-the-people 14 | - name: id 15 | data_type: int 16 | - name: first_name 17 | data_type: varchar(64) 18 | - name: last_name 19 | data_type: varchar(64) 20 | - name: email 21 | data_type: varchar(64) 22 | tests: &equal-to-the-people 23 | - dbt_external_tables_integration_tests.tsql_equality: 24 | compare_model: ref('people') 25 | compare_columns: 26 | - id 27 | - first_name 28 | - last_name 29 | - email 30 | 31 | # TODO: JSON IS NOT SUPPORTED BY SYNAPSE ATM 32 | 33 | # - name: people_json_unpartitioned 34 | # external: &json-people 35 | # location: '@{{ target.schema }}.dbt_external_tables_testing/json' 36 | # file_format: '( type = json )' 37 | # columns: *cols-of-the-people 38 | # tests: *equal-to-the-people 39 | 40 | # - name: people_json_partitioned 41 | # external: 42 | # <<: *json-people 43 | # partitions: *parts-of-the-people 44 | # columns: *cols-of-the-people 45 | # tests: *equal-to-the-people 46 | 47 | # TODO: syntax when no columns specified 48 | # - name: people_csv_unpartitioned_no_columns 49 | # external: *csv-people 50 | # tests: &same-rowcount 51 | # - dbt_external_tables_integration_tests.tsql_equrowcount: 52 | # compare_model: ref('people') 53 | # 54 | # - name: people_csv_partitioned_no_columns 55 | # external: 56 | # <<: *csv-people 57 | # # partitions: *parts-of-the-people 58 | # tests: *same-rowcount 59 | 60 | # - name: people_json_unpartitioned_no_columns 61 | # external: *csv-people 62 | # tests: *same-rowcount 63 | 64 | # - name: people_json_partitioned_no_columns 65 | # external: 66 | # <<: *json-people 67 | # partitions: *parts-of-the-people 68 | # tests: *same-rowcount 69 | 70 | # - name: people_json_multipartitioned_no_columns 71 | # external: 72 | # <<: *json-people 73 | # partitions: 74 | # - name: file_type 75 | # data_type: varchar 76 | # expression: "split_part(metadata$filename, 'section=', 1)" 77 | # - name: section 78 | # data_type: varchar 79 | # expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" 80 | # tests: *same-rowcount 81 | -------------------------------------------------------------------------------- /integration_tests/models/plugins/bigquery/bigquery_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: bigquery_external 5 | schema: "{{ target.schema }}" 6 | loader: Cloud Storage 7 | 8 | tables: 9 | 10 | - name: people_csv_unpartitioned 11 | external: 12 | location: 'gs://dbt-external-tables-testing/csv/*' 13 | options: 14 | format: csv 15 | skip_leading_rows: 1 16 | columns: &cols-of-the-people 17 | - name: id 18 | data_type: int64 19 | description: id_of_the_person 20 | - name: first_name 21 | data_type: string 22 | - name: last_name 23 | data_type: string 24 | - name: email 25 | data_type: string 26 | tests: &equal-to-the-people 27 | - dbt_utils.equality: 28 | compare_model: ref('people') 29 | compare_columns: 30 | - id 31 | - first_name 32 | - last_name 33 | - email 34 | 35 | - name: people_csv_partitioned 36 | external: 37 | location: 'gs://dbt-external-tables-testing/csv/*' 38 | options: 39 | format: csv 40 | skip_leading_rows: 1 41 | hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv' 42 | partitions: &parts-of-the-people 43 | - name: section 44 | data_type: string 45 | columns: *cols-of-the-people 46 | tests: *equal-to-the-people 47 | 48 | - name: people_csv_schema_auto_detect 49 | external: 50 | location: 'gs://dbt-external-tables-testing/csv/*' 51 | options: 52 | format: csv 53 | skip_leading_rows: 1 54 | hive_partition_uri_prefix: 'gs://dbt-external-tables-testing/csv' 55 | tests: *equal-to-the-people 56 | 57 | - name: people_csv_override_uris 58 | external: 59 | location: this can be anything 60 | options: 61 | format: csv 62 | skip_leading_rows: 1 63 | uris: 64 | - 'gs://dbt-external-tables-testing/csv/section=a/people_a.csv' 65 | - 'gs://dbt-external-tables-testing/csv/section=b/people_b.csv' 66 | - 'gs://dbt-external-tables-testing/csv/section=c/people_c.csv' 67 | - 'gs://dbt-external-tables-testing/csv/section=d/people_d.csv' 68 | columns: *cols-of-the-people 69 | tests: *equal-to-the-people 70 | 71 | - name: people_csv_with_max_staleness 72 | external: 73 | location: 'gs://dbt-external-tables-testing/csv/*' 74 | options: 75 | format: csv 76 | skip_leading_rows: 1 77 | max_staleness: INTERVAL 1 HOUR 78 | columns: *cols-of-the-people 79 | tests: *equal-to-the-people 80 | 81 | # - name: people_json_unpartitioned 82 | # external: &json-people 83 | # location: 'gs://dbt-external-tables-testing/json/*' 84 | # options: 85 | # format: json 86 | # columns: *cols-of-the-people 87 | # tests: *equal-to-the-people 88 | # 89 | # - name: people_json_partitioned 90 | # external: 91 | # location: 'gs://dbt-external-tables-testing/json/*' 92 | # options: 93 | # format: json 94 | # hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/json'" 95 | # partitions: *parts-of-the-people 96 | # columns: *cols-of-the-people 97 | # tests: *equal-to-the-people 98 | # 99 | # - name: people_json_schema_auto_detect 100 | # external: 101 | # location: 'gs://dbt-external-tables-testing/json/*' 102 | # options: 103 | # format: csv 104 | # skip_leading_rows: 1 105 | # hive_partition_uri_prefix: "'gs://dbt-external-tables-testing/json'" 106 | # tests: *equal-to-the-people 107 | -------------------------------------------------------------------------------- /integration_tests/models/plugins/redshift/redshift_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: redshift_external 5 | schema: "{{ target.schema }}_spectrum" 6 | loader: S3 7 | 8 | tables: 9 | 10 | - name: people_csv_unpartitioned 11 | external: &csv-people 12 | location: "s3://dbt-external-tables-testing/csv/" 13 | row_format: serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' 14 | table_properties: "('skip.header.line.count'='1')" 15 | columns: &cols-of-the-people 16 | - name: id 17 | data_type: int 18 | - name: first_name 19 | data_type: varchar(64) 20 | - name: last_name 21 | data_type: varchar(64) 22 | - name: email 23 | data_type: varchar(64) 24 | tests: &equal-to-the-people 25 | - dbt_utils.equality: 26 | compare_model: ref('people') 27 | compare_columns: 28 | - id 29 | - first_name 30 | - last_name 31 | - email 32 | 33 | - name: people_csv_partitioned 34 | external: 35 | <<: *csv-people 36 | partitions: &parts-of-the-people 37 | - name: section 38 | data_type: varchar 39 | vals: ['a','b','c','d'] 40 | path_macro: dbt_external_tables.key_value 41 | columns: *cols-of-the-people 42 | tests: *equal-to-the-people 43 | 44 | # ensure that all partitions are created 45 | - name: people_csv_multipartitioned 46 | external: 47 | <<: *csv-people 48 | location: "s3://dbt-external-tables-testing/" 49 | partitions: 50 | - name: file_format 51 | data_type: varchar 52 | vals: ['csv', 'json'] 53 | path_macro: dbt_external_tables.value_only 54 | - name: section 55 | data_type: varchar 56 | vals: ['a','b','c','d'] 57 | path_macro: dbt_external_tables.key_value 58 | - name: some_date 59 | data_type: date 60 | vals: 61 | macro: dbt.dates_in_range 62 | args: 63 | start_date_str: '2020-01-01' 64 | end_date_str: '2020-02-01' 65 | in_fmt: "%Y-%m-%d" 66 | out_fmt: "%Y-%m-%d" 67 | path_macro: dbt_external_tables.year_month_day 68 | - name: file_name 69 | data_type: varchar 70 | vals: ['people', 'not_people'] 71 | path_macro: dbt_external_tables.value_only 72 | columns: *cols-of-the-people 73 | 74 | - name: people_json_unpartitioned 75 | external: &json-people 76 | location: "s3://dbt-external-tables-testing/json/" 77 | row_format: "serde 'org.openx.data.jsonserde.JsonSerDe' 78 | with serdeproperties ( 79 | 'strip.outer.array'='false' 80 | )" 81 | columns: *cols-of-the-people 82 | tests: *equal-to-the-people 83 | 84 | - name: people_json_partitioned 85 | external: 86 | <<: *json-people 87 | partitions: *parts-of-the-people 88 | columns: *cols-of-the-people 89 | tests: *equal-to-the-people 90 | -------------------------------------------------------------------------------- /integration_tests/models/plugins/snowflake/people_alias.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | {{ dbt_utils.star(from=ref('people'), except=['email']) }}, 3 | email as email_alias 4 | FROM {{ ref('people') }} -------------------------------------------------------------------------------- /integration_tests/models/plugins/snowflake/people_expression.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | {{ dbt_utils.star(from=ref('people')) }}, 3 | split_part(email, '@', 2) as email_domain 4 | FROM {{ ref('people') }} -------------------------------------------------------------------------------- /integration_tests/models/plugins/snowflake/snowflake_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: snowflake_external 5 | schema: "{{ target.schema }}" 6 | loader: S3 7 | 8 | tables: 9 | - name: people_csv_unpartitioned 10 | external: &csv-people 11 | location: '@{{ target.schema }}.dbt_external_tables_testing/csv' 12 | file_format: '( type = csv skip_header = 1 )' 13 | columns: &cols-of-the-people 14 | - name: id 15 | data_type: int 16 | - name: first_name 17 | data_type: varchar(64) 18 | - name: last_name 19 | data_type: varchar(64) 20 | - name: email 21 | data_type: varchar(64) 22 | tests: &equal-to-the-people 23 | - dbt_utils.equality: 24 | compare_model: ref('people') 25 | compare_columns: 26 | - id 27 | - first_name 28 | - last_name 29 | - email 30 | 31 | - name: people_csv_partitioned 32 | external: 33 | <<: *csv-people 34 | auto_refresh: false # make sure this templates right 35 | partitions: &parts-of-the-people 36 | - name: section 37 | data_type: varchar 38 | expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" 39 | columns: *cols-of-the-people 40 | tests: *equal-to-the-people 41 | 42 | - name: people_json_unpartitioned 43 | external: &json-people 44 | location: '@{{ target.schema }}.dbt_external_tables_testing/json' 45 | file_format: '( type = json )' 46 | columns: *cols-of-the-people 47 | tests: *equal-to-the-people 48 | 49 | - name: people_json_partitioned 50 | external: 51 | <<: *json-people 52 | partitions: *parts-of-the-people 53 | columns: *cols-of-the-people 54 | tests: *equal-to-the-people 55 | 56 | - name: people_json_snowpipe 57 | external: 58 | <<: *json-people 59 | snowpipe: 60 | auto_ingest: false 61 | copy_options: '' 62 | columns: *cols-of-the-people 63 | tests: *equal-to-the-people 64 | 65 | - name: people_json_snowpipe_pattern 66 | external: 67 | <<: *json-people 68 | pattern: '.*[.]json' 69 | snowpipe: 70 | auto_ingest: false 71 | copy_options: '' 72 | columns: *cols-of-the-people 73 | tests: *equal-to-the-people 74 | 75 | # just to test syntax 76 | - name: people_csv_unpartitioned_no_columns 77 | external: *csv-people 78 | tests: &same-rowcount 79 | - dbt_utils.equal_rowcount: 80 | compare_model: ref('people') 81 | 82 | - name: people_csv_partitioned_no_columns 83 | external: 84 | <<: *csv-people 85 | partitions: *parts-of-the-people 86 | tests: *same-rowcount 87 | 88 | - name: people_csv_with_keyword_colname 89 | external: *csv-people 90 | columns: 91 | - name: UNION 92 | quote: true 93 | data_type: varchar(64) 94 | tests: *same-rowcount 95 | 96 | - name: people_json_unpartitioned_no_columns 97 | external: *json-people 98 | tests: *same-rowcount 99 | 100 | - name: people_json_partitioned_no_columns 101 | external: 102 | <<: *json-people 103 | partitions: *parts-of-the-people 104 | tests: *same-rowcount 105 | 106 | - name: people_json_multipartitioned_no_columns 107 | external: 108 | <<: *json-people 109 | partitions: 110 | - name: file_type 111 | data_type: varchar 112 | expression: "split_part(metadata$filename, 'section=', 1)" 113 | - name: section 114 | data_type: varchar 115 | expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" 116 | tests: *same-rowcount 117 | 118 | - name: people_parquet_column_list_unpartitioned 119 | external: &parquet-people 120 | location: '@{{ target.schema }}.dbt_external_tables_testing/parquet/' 121 | file_format: '{{ target.schema }}.dbt_external_tables_testing_parquet' 122 | columns: *cols-of-the-people 123 | tests: *equal-to-the-people 124 | 125 | - name: people_parquet_column_list_partitioned 126 | external: 127 | <<: *parquet-people 128 | partitions: *parts-of-the-people 129 | columns: *cols-of-the-people 130 | tests: *equal-to-the-people 131 | 132 | - name: people_parquet_infer_schema_unpartitioned 133 | external: 134 | <<: *parquet-people 135 | infer_schema: true 136 | tests: *equal-to-the-people 137 | 138 | - name: people_parquet_infer_schema_partitioned 139 | external: 140 | <<: *parquet-people 141 | partitions: *parts-of-the-people 142 | infer_schema: true 143 | tests: *equal-to-the-people 144 | 145 | - name: people_parquet_infer_schema_partitioned_and_column_desc 146 | external: 147 | <<: *parquet-people 148 | partitions: *parts-of-the-people 149 | infer_schema: true 150 | tests: *equal-to-the-people 151 | columns: 152 | - name: id 153 | description: "the unique ID for people" 154 | 155 | # test for column aliasing 156 | - name: people_csv_aliased 157 | external: *csv-people 158 | columns: 159 | - name: id 160 | data_type: int 161 | - name: first_name 162 | data_type: varchar(64) 163 | - name: last_name 164 | data_type: varchar(64) 165 | - name: email 166 | alias: email_alias 167 | data_type: varchar(64) 168 | tests: 169 | - dbt_utils.equality: 170 | compare_model: ref('people_alias') 171 | compare_columns: 172 | - id 173 | - first_name 174 | - last_name 175 | - email_alias 176 | 177 | # test for column expression 178 | - name: people_json_expression 179 | external: *json-people 180 | columns: 181 | - name: id 182 | data_type: int 183 | - name: first_name 184 | data_type: varchar(64) 185 | - name: last_name 186 | data_type: varchar(64) 187 | - name: email 188 | data_type: varchar(64) 189 | - name: email_domain 190 | data_type: varchar(64) 191 | alias: EMAIL_DOMAIN 192 | quote: true 193 | expression: split_part(value:email::VARCHAR, '@', 2) 194 | tests: 195 | - dbt_utils.equality: 196 | compare_model: ref('people_expression') 197 | compare_columns: 198 | - id 199 | - first_name 200 | - last_name 201 | - email 202 | - email_domain 203 | 204 | 205 | - name: people_parquet_infer_schema_ignore_case_unpartitioned 206 | external: 207 | <<: *parquet-people 208 | location: '@{{ target.schema }}.dbt_external_tables_testing/parquet_capitalized' 209 | infer_schema: true 210 | ignore_case: true 211 | tests: 212 | - dbt_utils.equality: 213 | compare_model: ref('people') 214 | compare_columns: 215 | - id 216 | - first_name 217 | - last_name 218 | - email 219 | 220 | - name: people_parquet_column_list_ignore_case_partitioned 221 | external: 222 | <<: *parquet-people 223 | location: '@{{ target.schema }}.dbt_external_tables_testing/parquet_capitalized' 224 | partitions: *parts-of-the-people 225 | ignore_case: true 226 | columns: *cols-of-the-people 227 | tests: 228 | - dbt_utils.equality: 229 | compare_model: ref('people') 230 | compare_columns: 231 | - id 232 | - first_name 233 | - last_name 234 | - email 235 | # temporarily disabled until we can properly address Iceberg tables 236 | # - name: people_iceberg_no_optional_configs 237 | # external: &iceberg-people 238 | # table_format: iceberg 239 | # external_volume: test_dbt_external_tables 240 | # catalog: test_dbt_external_tables_glue_catalog 241 | # catalog_table_name: people_csv 242 | # catalog_namespace: test_dbt_external_tables_db 243 | # tests: *equal-to-the-people 244 | 245 | # - name: people_iceberg_with_optional_configs 246 | # external: 247 | # <<: *iceberg-people 248 | # replace_invalid_characters: true 249 | # auto_refresh: true 250 | # comment: "testing adding comments" 251 | # tests: *equal-to-the-people -------------------------------------------------------------------------------- /integration_tests/models/plugins/spark/spark_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: spark_external 5 | schema: "{{ target.schema }}" 6 | loader: S3 7 | 8 | tables: 9 | 10 | - name: people_csv_unpartitioned_using 11 | external: &csv-people-using 12 | location: "s3://dbt-external-tables-testing/csv/" 13 | using: csv 14 | options: &csv-people-options 15 | sep: ',' 16 | header: 'true' 17 | columns: &cols-of-the-people 18 | - name: id 19 | data_type: int 20 | - name: first_name 21 | data_type: string 22 | - name: last_name 23 | data_type: string 24 | - name: email 25 | data_type: string 26 | tests: &equal-to-the-people 27 | - dbt_utils.equality: 28 | compare_model: ref('people') 29 | compare_columns: 30 | - id 31 | - first_name 32 | - last_name 33 | - email 34 | 35 | - name: people_csv_partitioned_using 36 | external: 37 | <<: *csv-people-using 38 | partitions: &parts-of-the-people 39 | - name: section 40 | data_type: string 41 | columns: *cols-of-the-people 42 | tests: *equal-to-the-people 43 | 44 | # ----- TODO: hive format 45 | 46 | # - name: people_csv_unpartitioned_hive_format 47 | # external: &csv-people-hive 48 | # location: "s3://dbt-external-tables-testing/csv/" 49 | # row_format: delimited fields terminated by ',' 50 | # file_format: textfile 51 | # tbl_properties: "('skip.header.line.count': 1)" 52 | # columns: *cols-of-the-people 53 | # 54 | # - name: people_csv_partitioned_hive_format 55 | # external: 56 | # <<: *csv-people-hive 57 | # partitions: *parts-of-the-people 58 | # columns: *cols-of-the-people 59 | 60 | # ----- TODO: json 61 | 62 | # - name: people_json_unpartitioned_using 63 | # external: &json-people-using 64 | # location: "s3://dbt-external-tables-testing/json/" 65 | # using: json 66 | # columns: *cols-of-the-people 67 | # tests: *equal-to-the-people 68 | # 69 | # - name: people_json_partitioned_using 70 | # external: 71 | # <<: *json-people-using 72 | # partitions: *parts-of-the-people 73 | # columns: *cols-of-the-people 74 | # tests: *equal-to-the-people 75 | -------------------------------------------------------------------------------- /integration_tests/models/plugins/synapse/synapse_external.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: synapse_external 5 | schema: "{{ target.schema }}" 6 | loader: ADLSblob 7 | 8 | tables: 9 | 10 | - name: people_csv_unpartitioned 11 | external: &csv-people 12 | location: '/csv' 13 | file_format: "{{ target.schema ~ '.dbt_external_ff_testing' }}" 14 | data_source: "{{ target.schema ~ '.dbt_external_tables_testing' }}" 15 | reject_type: VALUE 16 | reject_value: 0 17 | ansi_nulls: true 18 | quoted_identifier: true 19 | columns: &cols-of-the-people 20 | - name: id 21 | data_type: int 22 | - name: first_name 23 | data_type: varchar(64) 24 | - name: last_name 25 | data_type: varchar(64) 26 | - name: email 27 | data_type: varchar(64) 28 | tests: &equal-to-the-people 29 | - dbt_external_tables_integration_tests.tsql_equality: 30 | compare_model: ref('people') 31 | compare_columns: 32 | - id 33 | - first_name 34 | - last_name 35 | - email 36 | 37 | - name: people_csv_partitioned 38 | external: 39 | <<: *csv-people 40 | # TODO: SYNAPSE DOES NOT DO PARTITIONS 41 | # (BUT WE COULD MAKE A WORKAROUND !!!) 42 | # partitions: &parts-of-the-people 43 | # - name: section 44 | # data_type: varchar 45 | columns: *cols-of-the-people 46 | tests: *equal-to-the-people 47 | -------------------------------------------------------------------------------- /integration_tests/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | - package: dbt-labs/dbt_utils 4 | version: 1.3.0 5 | sha1_hash: d412a8f2761befedebc730e6d5197956aec4fc9b 6 | -------------------------------------------------------------------------------- /integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | - package: dbt-labs/dbt_utils 4 | version: 1.3.0 5 | -------------------------------------------------------------------------------- /integration_tests/profiles.yml: -------------------------------------------------------------------------------- 1 | 2 | # HEY! This file is used in the dbt-external-tables integrations tests with CircleCI. 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :) 4 | 5 | integration_tests: 6 | target: postgres 7 | outputs: 8 | 9 | redshift: 10 | type: "redshift" 11 | host: "{{ env_var('REDSHIFT_HOST') }}" 12 | user: "{{ env_var('REDSHIFT_USER') }}" 13 | pass: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_PASS') }}" 14 | dbname: "{{ env_var('REDSHIFT_DATABASE') }}" 15 | port: "{{ env_var('REDSHIFT_PORT') | as_number }}" 16 | schema: "{{ env_var('REDSHIFT_SCHEMA') }}" 17 | threads: 5 18 | 19 | snowflake: 20 | type: "snowflake" 21 | account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" 22 | user: "{{ env_var('SNOWFLAKE_USER') }}" 23 | password: "{{ env_var('DBT_ENV_SECRET_SNOWFLAKE_PASS') }}" 24 | role: "{{ env_var('SNOWFLAKE_ROLE') }}" 25 | database: "{{ env_var('SNOWFLAKE_DATABASE') }}" 26 | warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" 27 | schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" 28 | threads: 10 29 | 30 | bigquery: 31 | type: "bigquery" 32 | method: "service-account-json" 33 | project: "{{ env_var('BIGQUERY_PROJECT') }}" 34 | dataset: "{{ env_var('BIGQUERY_SCHEMA') }}" 35 | threads: 10 36 | keyfile_json: 37 | "{{ env_var('BIGQUERY_KEYFILE_JSON') | as_native}}" 38 | job_retries: 3 39 | 40 | databricks: 41 | type: spark 42 | method: odbc 43 | port: 443 44 | driver: "{{ env_var('ODBC_DRIVER') }}" 45 | host: "{{ env_var('DATABRICKS_TEST_HOST') }}" 46 | endpoint: "{{ env_var('DATBRICKS_TEST_ENDPOINT') }}" 47 | token: "{{ env_var('DATABRICKS_TOKEN') }}" 48 | schema: dbt_external_tables_integration_tests_databricks 49 | 50 | synapse: 51 | type: synapse 52 | driver: "{{ env_var('SYNAPSE_DRIVER') }}" 53 | port: "{{ env_var('SYNAPSE_PORT') }}" 54 | host: "{{ env_var('SYNAPSE_HOST') }}" 55 | database: "{{ env_var('SYNAPSE_DATABASE') }}" 56 | authentication: "{{ env_var('SYNAPSE_AUTHENTICATION') }}" 57 | tenant_id: "{{ env_var('SYNAPSE_TENANT_ID') }}" 58 | client_id: "{{ env_var('SYNAPSE_CLIENT_ID') }}" 59 | client_secret: "{{ env_var('DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET') }}" 60 | schema: dbt_external_tables_integration_tests_synapse 61 | threads: 1 62 | 63 | azuresql: 64 | type: sqlserver 65 | driver: "ODBC Driver 17 for SQL Server" 66 | port: 1433 67 | host: "{{ env_var('AZURESQL_TEST_SERVER') }}" 68 | database: "{{ env_var('AZURESQL_TEST_DBNAME') }}" 69 | authentication: CLI 70 | schema: dbt_external_tables_integration_tests_azuresql 71 | threads: 1 72 | -------------------------------------------------------------------------------- /integration_tests/public_data/capitalize_parquet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import polars as pl\n", 10 | "import os" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 24, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "
\n", 28 | "shape: (50, 4)
idfirst_namelast_nameemail
i64strstrstr
1"Jack""Hunter""jhunter0@pbs.org"
2"Kathryn""Walker""kwalker1@ezinearticles.com"
3"Gerald""Ryan""gryan2@com.com"
4"Bonnie""Spencer""bspencer3@ameblo.jp"
5"Harold""Taylor""htaylor4@people.com.cn"
46"Anthony""Garcia""agarcia19@flavors.me"
47"Doris""Lopez""dlopez1a@sphinn.com"
48"Susan""Nichols""snichols1b@freewebs.com"
49"Wanda""Ferguson""wferguson1c@yahoo.co.jp"
50"Andrea""Pierce""apierce1d@google.co.uk"
" 29 | ], 30 | "text/plain": [ 31 | "shape: (50, 4)\n", 32 | "┌─────┬────────────┬───────────┬────────────────────────────┐\n", 33 | "│ id ┆ first_name ┆ last_name ┆ email │\n", 34 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 35 | "│ i64 ┆ str ┆ str ┆ str │\n", 36 | "╞═════╪════════════╪═══════════╪════════════════════════════╡\n", 37 | "│ 1 ┆ Jack ┆ Hunter ┆ jhunter0@pbs.org │\n", 38 | "│ 2 ┆ Kathryn ┆ Walker ┆ kwalker1@ezinearticles.com │\n", 39 | "│ 3 ┆ Gerald ┆ Ryan ┆ gryan2@com.com │\n", 40 | "│ 4 ┆ Bonnie ┆ Spencer ┆ bspencer3@ameblo.jp │\n", 41 | "│ 5 ┆ Harold ┆ Taylor ┆ htaylor4@people.com.cn │\n", 42 | "│ … ┆ … ┆ … ┆ … │\n", 43 | "│ 46 ┆ Anthony ┆ Garcia ┆ agarcia19@flavors.me │\n", 44 | "│ 47 ┆ Doris ┆ Lopez ┆ dlopez1a@sphinn.com │\n", 45 | "│ 48 ┆ Susan ┆ Nichols ┆ snichols1b@freewebs.com │\n", 46 | "│ 49 ┆ Wanda ┆ Ferguson ┆ wferguson1c@yahoo.co.jp │\n", 47 | "│ 50 ┆ Andrea ┆ Pierce ┆ apierce1d@google.co.uk │\n", 48 | "└─────┴────────────┴───────────┴────────────────────────────┘" 49 | ] 50 | }, 51 | "execution_count": 24, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "# Read the parquet file\n", 58 | "df = pl.read_parquet('integration_tests/public_data/parquet/section=a/people_a.parquet')\n", 59 | "df" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 25, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "
\n", 77 | "shape: (50, 4)
IdFirst_NameLast_NameEmail
i64strstrstr
1"Jack""Hunter""jhunter0@pbs.org"
2"Kathryn""Walker""kwalker1@ezinearticles.com"
3"Gerald""Ryan""gryan2@com.com"
4"Bonnie""Spencer""bspencer3@ameblo.jp"
5"Harold""Taylor""htaylor4@people.com.cn"
46"Anthony""Garcia""agarcia19@flavors.me"
47"Doris""Lopez""dlopez1a@sphinn.com"
48"Susan""Nichols""snichols1b@freewebs.com"
49"Wanda""Ferguson""wferguson1c@yahoo.co.jp"
50"Andrea""Pierce""apierce1d@google.co.uk"
" 78 | ], 79 | "text/plain": [ 80 | "shape: (50, 4)\n", 81 | "┌─────┬────────────┬───────────┬────────────────────────────┐\n", 82 | "│ Id ┆ First_Name ┆ Last_Name ┆ Email │\n", 83 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 84 | "│ i64 ┆ str ┆ str ┆ str │\n", 85 | "╞═════╪════════════╪═══════════╪════════════════════════════╡\n", 86 | "│ 1 ┆ Jack ┆ Hunter ┆ jhunter0@pbs.org │\n", 87 | "│ 2 ┆ Kathryn ┆ Walker ┆ kwalker1@ezinearticles.com │\n", 88 | "│ 3 ┆ Gerald ┆ Ryan ┆ gryan2@com.com │\n", 89 | "│ 4 ┆ Bonnie ┆ Spencer ┆ bspencer3@ameblo.jp │\n", 90 | "│ 5 ┆ Harold ┆ Taylor ┆ htaylor4@people.com.cn │\n", 91 | "│ … ┆ … ┆ … ┆ … │\n", 92 | "│ 46 ┆ Anthony ┆ Garcia ┆ agarcia19@flavors.me │\n", 93 | "│ 47 ┆ Doris ┆ Lopez ┆ dlopez1a@sphinn.com │\n", 94 | "│ 48 ┆ Susan ┆ Nichols ┆ snichols1b@freewebs.com │\n", 95 | "│ 49 ┆ Wanda ┆ Ferguson ┆ wferguson1c@yahoo.co.jp │\n", 96 | "│ 50 ┆ Andrea ┆ Pierce ┆ apierce1d@google.co.uk │\n", 97 | "└─────┴────────────┴───────────┴────────────────────────────┘" 98 | ] 99 | }, 100 | "execution_count": 25, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "new__col_dict = {'id':'Id', 'first_name':'First_Name', 'last_name':'Last_Name', 'email':'Email'}\n", 107 | "df_rename = df.rename(new_dict)\n", 108 | "df_rename" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 26, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "fpath = 'integration_tests/public_data/parquet{}/section={}/people_{}.parquet'" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 28, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/html": [ 128 | "
\n", 135 | "shape: (50, 4)
IdFirst_NameLast_NameEmail
i64strstrstr
1"Jack""Hunter""jhunter0@pbs.org"
2"Kathryn""Walker""kwalker1@ezinearticles.com"
3"Gerald""Ryan""gryan2@com.com"
4"Bonnie""Spencer""bspencer3@ameblo.jp"
5"Harold""Taylor""htaylor4@people.com.cn"
46"Anthony""Garcia""agarcia19@flavors.me"
47"Doris""Lopez""dlopez1a@sphinn.com"
48"Susan""Nichols""snichols1b@freewebs.com"
49"Wanda""Ferguson""wferguson1c@yahoo.co.jp"
50"Andrea""Pierce""apierce1d@google.co.uk"
" 136 | ], 137 | "text/plain": [ 138 | "shape: (50, 4)\n", 139 | "┌─────┬────────────┬───────────┬────────────────────────────┐\n", 140 | "│ Id ┆ First_Name ┆ Last_Name ┆ Email │\n", 141 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 142 | "│ i64 ┆ str ┆ str ┆ str │\n", 143 | "╞═════╪════════════╪═══════════╪════════════════════════════╡\n", 144 | "│ 1 ┆ Jack ┆ Hunter ┆ jhunter0@pbs.org │\n", 145 | "│ 2 ┆ Kathryn ┆ Walker ┆ kwalker1@ezinearticles.com │\n", 146 | "│ 3 ┆ Gerald ┆ Ryan ┆ gryan2@com.com │\n", 147 | "│ 4 ┆ Bonnie ┆ Spencer ┆ bspencer3@ameblo.jp │\n", 148 | "│ 5 ┆ Harold ┆ Taylor ┆ htaylor4@people.com.cn │\n", 149 | "│ … ┆ … ┆ … ┆ … │\n", 150 | "│ 46 ┆ Anthony ┆ Garcia ┆ agarcia19@flavors.me │\n", 151 | "│ 47 ┆ Doris ┆ Lopez ┆ dlopez1a@sphinn.com │\n", 152 | "│ 48 ┆ Susan ┆ Nichols ┆ snichols1b@freewebs.com │\n", 153 | "│ 49 ┆ Wanda ┆ Ferguson ┆ wferguson1c@yahoo.co.jp │\n", 154 | "│ 50 ┆ Andrea ┆ Pierce ┆ apierce1d@google.co.uk │\n", 155 | "└─────┴────────────┴───────────┴────────────────────────────┘" 156 | ] 157 | }, 158 | "execution_count": 28, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "os.makedirs(os.path.dirname(fpath.format('_capitalized', 'a', 'a')), exist_ok=True)\n", 165 | "\n", 166 | "df_a = (pl.read_parquet(fpath.format('', 'a', 'a'))\n", 167 | " .rename(new__col_dict)\n", 168 | " # .write_parquet(fpath.format('_capitalized', 'a', 'a'))\n", 169 | ")\n", 170 | "df_a.write_parquet(fpath.format('_capitalized', 'a', 'a'))\n", 171 | "df_a" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 33, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/html": [ 182 | "
\n", 189 | "shape: (50, 4)
IdFirst_NameLast_NameEmail
i64strstrstr
51"Lawrence""Phillips""lphillips1e@jugem.jp"
52"Judy""Gilbert""jgilbert1f@multiply.com"
53"Eric""Williams""ewilliams1g@joomla.org"
54"Ralph""Romero""rromero1h@sogou.com"
55"Jean""Wilson""jwilson1i@ocn.ne.jp"
96"Adam""Greene""agreene2n@fastcompany.com"
97"Earl""Sanders""esanders2o@hc360.com"
98"Angela""Brooks""abrooks2p@mtv.com"
99"Harold""Foster""hfoster2q@privacy.gov.au"
100"Carl""Meyer""cmeyer2r@disqus.com"
" 190 | ], 191 | "text/plain": [ 192 | "shape: (50, 4)\n", 193 | "┌─────┬────────────┬───────────┬───────────────────────────┐\n", 194 | "│ Id ┆ First_Name ┆ Last_Name ┆ Email │\n", 195 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 196 | "│ i64 ┆ str ┆ str ┆ str │\n", 197 | "╞═════╪════════════╪═══════════╪═══════════════════════════╡\n", 198 | "│ 51 ┆ Lawrence ┆ Phillips ┆ lphillips1e@jugem.jp │\n", 199 | "│ 52 ┆ Judy ┆ Gilbert ┆ jgilbert1f@multiply.com │\n", 200 | "│ 53 ┆ Eric ┆ Williams ┆ ewilliams1g@joomla.org │\n", 201 | "│ 54 ┆ Ralph ┆ Romero ┆ rromero1h@sogou.com │\n", 202 | "│ 55 ┆ Jean ┆ Wilson ┆ jwilson1i@ocn.ne.jp │\n", 203 | "│ … ┆ … ┆ … ┆ … │\n", 204 | "│ 96 ┆ Adam ┆ Greene ┆ agreene2n@fastcompany.com │\n", 205 | "│ 97 ┆ Earl ┆ Sanders ┆ esanders2o@hc360.com │\n", 206 | "│ 98 ┆ Angela ┆ Brooks ┆ abrooks2p@mtv.com │\n", 207 | "│ 99 ┆ Harold ┆ Foster ┆ hfoster2q@privacy.gov.au │\n", 208 | "│ 100 ┆ Carl ┆ Meyer ┆ cmeyer2r@disqus.com │\n", 209 | "└─────┴────────────┴───────────┴───────────────────────────┘" 210 | ] 211 | }, 212 | "execution_count": 33, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "os.makedirs(os.path.dirname(fpath.format('_capitalized', 'b', 'b')), exist_ok=True)\n", 219 | "\n", 220 | "df_b = (pl.read_parquet(fpath.format('', 'b', 'b'))\n", 221 | " .rename(new__col_dict)\n", 222 | ")\n", 223 | "df_b.write_parquet(fpath.format('_capitalized', 'b', 'b'))\n", 224 | "df_b" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 32, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/html": [ 235 | "
\n", 242 | "shape: (50, 4)
IdFirst_NameLast_NameEmail
i64strstrstr
101"Michael""Perez""mperez0@chronoengine.com"
102"Shawn""Mccoy""smccoy1@reddit.com"
103"Kathleen""Payne""kpayne2@cargocollective.com"
104"Jimmy""Cooper""jcooper3@cargocollective.com"
105"Katherine""Rice""krice4@typepad.com"
146"Norma""Cruz""ncruz19@si.edu"
147"Marie""Peters""mpeters1a@mlb.com"
148"Lillian""Carr""lcarr1b@typepad.com"
149"Judy""Nichols""jnichols1c@t-online.de"
150"Billy""Long""blong1d@yahoo.com"
" 243 | ], 244 | "text/plain": [ 245 | "shape: (50, 4)\n", 246 | "┌─────┬────────────┬───────────┬──────────────────────────────┐\n", 247 | "│ Id ┆ First_Name ┆ Last_Name ┆ Email │\n", 248 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 249 | "│ i64 ┆ str ┆ str ┆ str │\n", 250 | "╞═════╪════════════╪═══════════╪══════════════════════════════╡\n", 251 | "│ 101 ┆ Michael ┆ Perez ┆ mperez0@chronoengine.com │\n", 252 | "│ 102 ┆ Shawn ┆ Mccoy ┆ smccoy1@reddit.com │\n", 253 | "│ 103 ┆ Kathleen ┆ Payne ┆ kpayne2@cargocollective.com │\n", 254 | "│ 104 ┆ Jimmy ┆ Cooper ┆ jcooper3@cargocollective.com │\n", 255 | "│ 105 ┆ Katherine ┆ Rice ┆ krice4@typepad.com │\n", 256 | "│ … ┆ … ┆ … ┆ … │\n", 257 | "│ 146 ┆ Norma ┆ Cruz ┆ ncruz19@si.edu │\n", 258 | "│ 147 ┆ Marie ┆ Peters ┆ mpeters1a@mlb.com │\n", 259 | "│ 148 ┆ Lillian ┆ Carr ┆ lcarr1b@typepad.com │\n", 260 | "│ 149 ┆ Judy ┆ Nichols ┆ jnichols1c@t-online.de │\n", 261 | "│ 150 ┆ Billy ┆ Long ┆ blong1d@yahoo.com │\n", 262 | "└─────┴────────────┴───────────┴──────────────────────────────┘" 263 | ] 264 | }, 265 | "execution_count": 32, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "os.makedirs(os.path.dirname(fpath.format('_capitalized', 'c', 'c')), exist_ok=True)\n", 272 | "\n", 273 | "df_c = (pl.read_parquet(fpath.format('', 'c', 'c'))\n", 274 | " .rename(new__col_dict)\n", 275 | ")\n", 276 | "df_c.write_parquet(fpath.format('_capitalized', 'c', 'c'))\n", 277 | "df_c" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 31, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/html": [ 288 | "
\n", 295 | "shape: (50, 4)
IdFirst_NameLast_NameEmail
i64strstrstr
151"Howard""Reid""hreid1e@exblog.jp"
152"Laura""Ferguson""lferguson1f@tuttocitta.it"
153"Anne""Bailey""abailey1g@geocities.com"
154"Rose""Morgan""rmorgan1h@ehow.com"
155"Nicholas""Reyes""nreyes1i@google.ru"
196"Jacqueline""Anderson""janderson2n@cargocollective.co…
197"Shirley""Diaz""sdiaz2o@ucla.edu"
198"Nicole""Meyer""nmeyer2p@flickr.com"
199"Mary""Gray""mgray2q@constantcontact.com"
200"Jean""Mcdonald""jmcdonald2r@baidu.com"
" 296 | ], 297 | "text/plain": [ 298 | "shape: (50, 4)\n", 299 | "┌─────┬────────────┬───────────┬─────────────────────────────────┐\n", 300 | "│ Id ┆ First_Name ┆ Last_Name ┆ Email │\n", 301 | "│ --- ┆ --- ┆ --- ┆ --- │\n", 302 | "│ i64 ┆ str ┆ str ┆ str │\n", 303 | "╞═════╪════════════╪═══════════╪═════════════════════════════════╡\n", 304 | "│ 151 ┆ Howard ┆ Reid ┆ hreid1e@exblog.jp │\n", 305 | "│ 152 ┆ Laura ┆ Ferguson ┆ lferguson1f@tuttocitta.it │\n", 306 | "│ 153 ┆ Anne ┆ Bailey ┆ abailey1g@geocities.com │\n", 307 | "│ 154 ┆ Rose ┆ Morgan ┆ rmorgan1h@ehow.com │\n", 308 | "│ 155 ┆ Nicholas ┆ Reyes ┆ nreyes1i@google.ru │\n", 309 | "│ … ┆ … ┆ … ┆ … │\n", 310 | "│ 196 ┆ Jacqueline ┆ Anderson ┆ janderson2n@cargocollective.co… │\n", 311 | "│ 197 ┆ Shirley ┆ Diaz ┆ sdiaz2o@ucla.edu │\n", 312 | "│ 198 ┆ Nicole ┆ Meyer ┆ nmeyer2p@flickr.com │\n", 313 | "│ 199 ┆ Mary ┆ Gray ┆ mgray2q@constantcontact.com │\n", 314 | "│ 200 ┆ Jean ┆ Mcdonald ┆ jmcdonald2r@baidu.com │\n", 315 | "└─────┴────────────┴───────────┴─────────────────────────────────┘" 316 | ] 317 | }, 318 | "execution_count": 31, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "os.makedirs(os.path.dirname(fpath.format('_capitalized', 'd', 'd')), exist_ok=True)\n", 325 | "\n", 326 | "df_d = (pl.read_parquet(fpath.format('', 'd', 'd'))\n", 327 | " .rename(new__col_dict)\n", 328 | " \n", 329 | ")\n", 330 | "df_d.write_parquet(fpath.format('_capitalized', 'd', 'd'))\n", 331 | "df_d" 332 | ] 333 | } 334 | ], 335 | "metadata": { 336 | "kernelspec": { 337 | "display_name": "dbt", 338 | "language": "python", 339 | "name": "python3" 340 | }, 341 | "language_info": { 342 | "codemirror_mode": { 343 | "name": "ipython", 344 | "version": 3 345 | }, 346 | "file_extension": ".py", 347 | "mimetype": "text/x-python", 348 | "name": "python", 349 | "nbconvert_exporter": "python", 350 | "pygments_lexer": "ipython3", 351 | "version": "3.11.8" 352 | } 353 | }, 354 | "nbformat": 4, 355 | "nbformat_minor": 2 356 | } 357 | -------------------------------------------------------------------------------- /integration_tests/public_data/csv/section=a/people_a.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 1,Jack,Hunter,jhunter0@pbs.org 3 | 2,Kathryn,Walker,kwalker1@ezinearticles.com 4 | 3,Gerald,Ryan,gryan2@com.com 5 | 4,Bonnie,Spencer,bspencer3@ameblo.jp 6 | 5,Harold,Taylor,htaylor4@people.com.cn 7 | 6,Jacqueline,Griffin,jgriffin5@t.co 8 | 7,Wanda,Arnold,warnold6@google.nl 9 | 8,Craig,Ortiz,cortiz7@sciencedaily.com 10 | 9,Gary,Day,gday8@nih.gov 11 | 10,Rose,Wright,rwright9@yahoo.co.jp 12 | 11,Raymond,Kelley,rkelleya@fc2.com 13 | 12,Gerald,Robinson,grobinsonb@disqus.com 14 | 13,Mildred,Martinez,mmartinezc@samsung.com 15 | 14,Dennis,Arnold,darnoldd@google.com 16 | 15,Judy,Gray,jgraye@opensource.org 17 | 16,Theresa,Garza,tgarzaf@epa.gov 18 | 17,Gerald,Robertson,grobertsong@csmonitor.com 19 | 18,Philip,Hernandez,phernandezh@adobe.com 20 | 19,Julia,Gonzalez,jgonzalezi@cam.ac.uk 21 | 20,Andrew,Davis,adavisj@patch.com 22 | 21,Kimberly,Harper,kharperk@foxnews.com 23 | 22,Mark,Martin,mmartinl@marketwatch.com 24 | 23,Cynthia,Ruiz,cruizm@google.fr 25 | 24,Samuel,Carroll,scarrolln@youtu.be 26 | 25,Jennifer,Larson,jlarsono@vinaora.com 27 | 26,Ashley,Perry,aperryp@rakuten.co.jp 28 | 27,Howard,Rodriguez,hrodriguezq@shutterfly.com 29 | 28,Amy,Brooks,abrooksr@theatlantic.com 30 | 29,Louise,Warren,lwarrens@adobe.com 31 | 30,Tina,Watson,twatsont@myspace.com 32 | 31,Janice,Kelley,jkelleyu@creativecommons.org 33 | 32,Terry,Mccoy,tmccoyv@bravesites.com 34 | 33,Jeffrey,Morgan,jmorganw@surveymonkey.com 35 | 34,Louis,Harvey,lharveyx@sina.com.cn 36 | 35,Philip,Miller,pmillery@samsung.com 37 | 36,Willie,Marshall,wmarshallz@ow.ly 38 | 37,Patrick,Lopez,plopez10@redcross.org 39 | 38,Adam,Jenkins,ajenkins11@harvard.edu 40 | 39,Benjamin,Cruz,bcruz12@linkedin.com 41 | 40,Ruby,Hawkins,rhawkins13@gmpg.org 42 | 41,Carlos,Barnes,cbarnes14@a8.net 43 | 42,Ruby,Griffin,rgriffin15@bravesites.com 44 | 43,Sean,Mason,smason16@icq.com 45 | 44,Anthony,Payne,apayne17@utexas.edu 46 | 45,Steve,Cruz,scruz18@pcworld.com 47 | 46,Anthony,Garcia,agarcia19@flavors.me 48 | 47,Doris,Lopez,dlopez1a@sphinn.com 49 | 48,Susan,Nichols,snichols1b@freewebs.com 50 | 49,Wanda,Ferguson,wferguson1c@yahoo.co.jp 51 | 50,Andrea,Pierce,apierce1d@google.co.uk 52 | -------------------------------------------------------------------------------- /integration_tests/public_data/csv/section=b/people_b.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 51,Lawrence,Phillips,lphillips1e@jugem.jp 3 | 52,Judy,Gilbert,jgilbert1f@multiply.com 4 | 53,Eric,Williams,ewilliams1g@joomla.org 5 | 54,Ralph,Romero,rromero1h@sogou.com 6 | 55,Jean,Wilson,jwilson1i@ocn.ne.jp 7 | 56,Lori,Reynolds,lreynolds1j@illinois.edu 8 | 57,Donald,Moreno,dmoreno1k@bbc.co.uk 9 | 58,Steven,Berry,sberry1l@eepurl.com 10 | 59,Theresa,Shaw,tshaw1m@people.com.cn 11 | 60,John,Stephens,jstephens1n@nationalgeographic.com 12 | 61,Richard,Jacobs,rjacobs1o@state.tx.us 13 | 62,Andrew,Lawson,alawson1p@over-blog.com 14 | 63,Peter,Morgan,pmorgan1q@rambler.ru 15 | 64,Nicole,Garrett,ngarrett1r@zimbio.com 16 | 65,Joshua,Kim,jkim1s@edublogs.org 17 | 66,Ralph,Roberts,rroberts1t@people.com.cn 18 | 67,George,Montgomery,gmontgomery1u@smugmug.com 19 | 68,Gerald,Alvarez,galvarez1v@flavors.me 20 | 69,Donald,Olson,dolson1w@whitehouse.gov 21 | 70,Carlos,Morgan,cmorgan1x@pbs.org 22 | 71,Aaron,Stanley,astanley1y@webnode.com 23 | 72,Virginia,Long,vlong1z@spiegel.de 24 | 73,Robert,Berry,rberry20@tripadvisor.com 25 | 74,Antonio,Brooks,abrooks21@unesco.org 26 | 75,Ruby,Garcia,rgarcia22@ovh.net 27 | 76,Jack,Hanson,jhanson23@blogtalkradio.com 28 | 77,Kathryn,Nelson,knelson24@walmart.com 29 | 78,Jason,Reed,jreed25@printfriendly.com 30 | 79,George,Coleman,gcoleman26@people.com.cn 31 | 80,Rose,King,rking27@ucoz.com 32 | 81,Johnny,Holmes,jholmes28@boston.com 33 | 82,Katherine,Gilbert,kgilbert29@altervista.org 34 | 83,Joshua,Thomas,jthomas2a@ustream.tv 35 | 84,Julie,Perry,jperry2b@opensource.org 36 | 85,Richard,Perry,rperry2c@oracle.com 37 | 86,Kenneth,Ruiz,kruiz2d@wikimedia.org 38 | 87,Jose,Morgan,jmorgan2e@webnode.com 39 | 88,Donald,Campbell,dcampbell2f@goo.ne.jp 40 | 89,Debra,Collins,dcollins2g@uol.com.br 41 | 90,Jesse,Johnson,jjohnson2h@stumbleupon.com 42 | 91,Elizabeth,Stone,estone2i@histats.com 43 | 92,Angela,Rogers,arogers2j@goodreads.com 44 | 93,Emily,Dixon,edixon2k@mlb.com 45 | 94,Albert,Scott,ascott2l@tinypic.com 46 | 95,Barbara,Peterson,bpeterson2m@ow.ly 47 | 96,Adam,Greene,agreene2n@fastcompany.com 48 | 97,Earl,Sanders,esanders2o@hc360.com 49 | 98,Angela,Brooks,abrooks2p@mtv.com 50 | 99,Harold,Foster,hfoster2q@privacy.gov.au 51 | 100,Carl,Meyer,cmeyer2r@disqus.com 52 | -------------------------------------------------------------------------------- /integration_tests/public_data/csv/section=c/people_c.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 101,Michael,Perez,mperez0@chronoengine.com 3 | 102,Shawn,Mccoy,smccoy1@reddit.com 4 | 103,Kathleen,Payne,kpayne2@cargocollective.com 5 | 104,Jimmy,Cooper,jcooper3@cargocollective.com 6 | 105,Katherine,Rice,krice4@typepad.com 7 | 106,Sarah,Ryan,sryan5@gnu.org 8 | 107,Martin,Mcdonald,mmcdonald6@opera.com 9 | 108,Frank,Robinson,frobinson7@wunderground.com 10 | 109,Jennifer,Franklin,jfranklin8@mail.ru 11 | 110,Henry,Welch,hwelch9@list-manage.com 12 | 111,Fred,Snyder,fsnydera@reddit.com 13 | 112,Amy,Dunn,adunnb@nba.com 14 | 113,Kathleen,Meyer,kmeyerc@cdc.gov 15 | 114,Steve,Ferguson,sfergusond@reverbnation.com 16 | 115,Teresa,Hill,thille@dion.ne.jp 17 | 116,Amanda,Harper,aharperf@mail.ru 18 | 117,Kimberly,Ray,krayg@xing.com 19 | 118,Johnny,Knight,jknighth@jalbum.net 20 | 119,Virginia,Freeman,vfreemani@tiny.cc 21 | 120,Anna,Austin,aaustinj@diigo.com 22 | 121,Willie,Hill,whillk@mail.ru 23 | 122,Sean,Harris,sharrisl@zdnet.com 24 | 123,Mildred,Adams,madamsm@usatoday.com 25 | 124,David,Graham,dgrahamn@zimbio.com 26 | 125,Victor,Hunter,vhuntero@ehow.com 27 | 126,Aaron,Ruiz,aruizp@weebly.com 28 | 127,Benjamin,Brooks,bbrooksq@jalbum.net 29 | 128,Lisa,Wilson,lwilsonr@japanpost.jp 30 | 129,Benjamin,King,bkings@comsenz.com 31 | 130,Christina,Williamson,cwilliamsont@boston.com 32 | 131,Jane,Gonzalez,jgonzalezu@networksolutions.com 33 | 132,Thomas,Owens,towensv@psu.edu 34 | 133,Katherine,Moore,kmoorew@naver.com 35 | 134,Jennifer,Stewart,jstewartx@yahoo.com 36 | 135,Sara,Tucker,stuckery@topsy.com 37 | 136,Harold,Ortiz,hortizz@vkontakte.ru 38 | 137,Shirley,James,sjames10@yelp.com 39 | 138,Dennis,Johnson,djohnson11@slate.com 40 | 139,Louise,Weaver,lweaver12@china.com.cn 41 | 140,Maria,Armstrong,marmstrong13@prweb.com 42 | 141,Gloria,Cruz,gcruz14@odnoklassniki.ru 43 | 142,Diana,Spencer,dspencer15@ifeng.com 44 | 143,Kelly,Nguyen,knguyen16@altervista.org 45 | 144,Jane,Rodriguez,jrodriguez17@biblegateway.com 46 | 145,Scott,Brown,sbrown18@geocities.jp 47 | 146,Norma,Cruz,ncruz19@si.edu 48 | 147,Marie,Peters,mpeters1a@mlb.com 49 | 148,Lillian,Carr,lcarr1b@typepad.com 50 | 149,Judy,Nichols,jnichols1c@t-online.de 51 | 150,Billy,Long,blong1d@yahoo.com 52 | -------------------------------------------------------------------------------- /integration_tests/public_data/csv/section=d/people_d.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 151,Howard,Reid,hreid1e@exblog.jp 3 | 152,Laura,Ferguson,lferguson1f@tuttocitta.it 4 | 153,Anne,Bailey,abailey1g@geocities.com 5 | 154,Rose,Morgan,rmorgan1h@ehow.com 6 | 155,Nicholas,Reyes,nreyes1i@google.ru 7 | 156,Joshua,Kennedy,jkennedy1j@house.gov 8 | 157,Paul,Watkins,pwatkins1k@upenn.edu 9 | 158,Kathryn,Kelly,kkelly1l@businessweek.com 10 | 159,Adam,Armstrong,aarmstrong1m@techcrunch.com 11 | 160,Norma,Wallace,nwallace1n@phoca.cz 12 | 161,Timothy,Reyes,treyes1o@google.cn 13 | 162,Elizabeth,Patterson,epatterson1p@sun.com 14 | 163,Edward,Gomez,egomez1q@google.fr 15 | 164,David,Cox,dcox1r@friendfeed.com 16 | 165,Brenda,Wood,bwood1s@over-blog.com 17 | 166,Adam,Walker,awalker1t@blogs.com 18 | 167,Michael,Hart,mhart1u@wix.com 19 | 168,Jesse,Ellis,jellis1v@google.co.uk 20 | 169,Janet,Powell,jpowell1w@un.org 21 | 170,Helen,Ford,hford1x@creativecommons.org 22 | 171,Gerald,Carpenter,gcarpenter1y@about.me 23 | 172,Kathryn,Oliver,koliver1z@army.mil 24 | 173,Alan,Berry,aberry20@gov.uk 25 | 174,Harry,Andrews,handrews21@ameblo.jp 26 | 175,Andrea,Hall,ahall22@hp.com 27 | 176,Barbara,Wells,bwells23@behance.net 28 | 177,Anne,Wells,awells24@apache.org 29 | 178,Harry,Harper,hharper25@rediff.com 30 | 179,Jack,Ray,jray26@wufoo.com 31 | 180,Phillip,Hamilton,phamilton27@joomla.org 32 | 181,Shirley,Hunter,shunter28@newsvine.com 33 | 182,Arthur,Daniels,adaniels29@reuters.com 34 | 183,Virginia,Rodriguez,vrodriguez2a@walmart.com 35 | 184,Christina,Ryan,cryan2b@hibu.com 36 | 185,Theresa,Mendoza,tmendoza2c@vinaora.com 37 | 186,Jason,Cole,jcole2d@ycombinator.com 38 | 187,Phillip,Bryant,pbryant2e@rediff.com 39 | 188,Adam,Torres,atorres2f@sun.com 40 | 189,Margaret,Johnston,mjohnston2g@ucsd.edu 41 | 190,Paul,Payne,ppayne2h@hhs.gov 42 | 191,Todd,Willis,twillis2i@businessweek.com 43 | 192,Willie,Oliver,woliver2j@noaa.gov 44 | 193,Frances,Robertson,frobertson2k@go.com 45 | 194,Gregory,Hawkins,ghawkins2l@joomla.org 46 | 195,Lisa,Perkins,lperkins2m@si.edu 47 | 196,Jacqueline,Anderson,janderson2n@cargocollective.com 48 | 197,Shirley,Diaz,sdiaz2o@ucla.edu 49 | 198,Nicole,Meyer,nmeyer2p@flickr.com 50 | 199,Mary,Gray,mgray2q@constantcontact.com 51 | 200,Jean,Mcdonald,jmcdonald2r@baidu.com 52 | -------------------------------------------------------------------------------- /integration_tests/public_data/json/section=a/people_a.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1, 3 | "first_name": "Jack", 4 | "last_name": "Hunter", 5 | "email": "jhunter0@pbs.org" 6 | } 7 | { 8 | "id": 2, 9 | "first_name": "Kathryn", 10 | "last_name": "Walker", 11 | "email": "kwalker1@ezinearticles.com" 12 | } 13 | { 14 | "id": 3, 15 | "first_name": "Gerald", 16 | "last_name": "Ryan", 17 | "email": "gryan2@com.com" 18 | } 19 | { 20 | "id": 4, 21 | "first_name": "Bonnie", 22 | "last_name": "Spencer", 23 | "email": "bspencer3@ameblo.jp" 24 | } 25 | { 26 | "id": 5, 27 | "first_name": "Harold", 28 | "last_name": "Taylor", 29 | "email": "htaylor4@people.com.cn" 30 | } 31 | { 32 | "id": 6, 33 | "first_name": "Jacqueline", 34 | "last_name": "Griffin", 35 | "email": "jgriffin5@t.co" 36 | } 37 | { 38 | "id": 7, 39 | "first_name": "Wanda", 40 | "last_name": "Arnold", 41 | "email": "warnold6@google.nl" 42 | } 43 | { 44 | "id": 8, 45 | "first_name": "Craig", 46 | "last_name": "Ortiz", 47 | "email": "cortiz7@sciencedaily.com" 48 | } 49 | { 50 | "id": 9, 51 | "first_name": "Gary", 52 | "last_name": "Day", 53 | "email": "gday8@nih.gov" 54 | } 55 | { 56 | "id": 10, 57 | "first_name": "Rose", 58 | "last_name": "Wright", 59 | "email": "rwright9@yahoo.co.jp" 60 | } 61 | { 62 | "id": 11, 63 | "first_name": "Raymond", 64 | "last_name": "Kelley", 65 | "email": "rkelleya@fc2.com" 66 | } 67 | { 68 | "id": 12, 69 | "first_name": "Gerald", 70 | "last_name": "Robinson", 71 | "email": "grobinsonb@disqus.com" 72 | } 73 | { 74 | "id": 13, 75 | "first_name": "Mildred", 76 | "last_name": "Martinez", 77 | "email": "mmartinezc@samsung.com" 78 | } 79 | { 80 | "id": 14, 81 | "first_name": "Dennis", 82 | "last_name": "Arnold", 83 | "email": "darnoldd@google.com" 84 | } 85 | { 86 | "id": 15, 87 | "first_name": "Judy", 88 | "last_name": "Gray", 89 | "email": "jgraye@opensource.org" 90 | } 91 | { 92 | "id": 16, 93 | "first_name": "Theresa", 94 | "last_name": "Garza", 95 | "email": "tgarzaf@epa.gov" 96 | } 97 | { 98 | "id": 17, 99 | "first_name": "Gerald", 100 | "last_name": "Robertson", 101 | "email": "grobertsong@csmonitor.com" 102 | } 103 | { 104 | "id": 18, 105 | "first_name": "Philip", 106 | "last_name": "Hernandez", 107 | "email": "phernandezh@adobe.com" 108 | } 109 | { 110 | "id": 19, 111 | "first_name": "Julia", 112 | "last_name": "Gonzalez", 113 | "email": "jgonzalezi@cam.ac.uk" 114 | } 115 | { 116 | "id": 20, 117 | "first_name": "Andrew", 118 | "last_name": "Davis", 119 | "email": "adavisj@patch.com" 120 | } 121 | { 122 | "id": 21, 123 | "first_name": "Kimberly", 124 | "last_name": "Harper", 125 | "email": "kharperk@foxnews.com" 126 | } 127 | { 128 | "id": 22, 129 | "first_name": "Mark", 130 | "last_name": "Martin", 131 | "email": "mmartinl@marketwatch.com" 132 | } 133 | { 134 | "id": 23, 135 | "first_name": "Cynthia", 136 | "last_name": "Ruiz", 137 | "email": "cruizm@google.fr" 138 | } 139 | { 140 | "id": 24, 141 | "first_name": "Samuel", 142 | "last_name": "Carroll", 143 | "email": "scarrolln@youtu.be" 144 | } 145 | { 146 | "id": 25, 147 | "first_name": "Jennifer", 148 | "last_name": "Larson", 149 | "email": "jlarsono@vinaora.com" 150 | } 151 | { 152 | "id": 26, 153 | "first_name": "Ashley", 154 | "last_name": "Perry", 155 | "email": "aperryp@rakuten.co.jp" 156 | } 157 | { 158 | "id": 27, 159 | "first_name": "Howard", 160 | "last_name": "Rodriguez", 161 | "email": "hrodriguezq@shutterfly.com" 162 | } 163 | { 164 | "id": 28, 165 | "first_name": "Amy", 166 | "last_name": "Brooks", 167 | "email": "abrooksr@theatlantic.com" 168 | } 169 | { 170 | "id": 29, 171 | "first_name": "Louise", 172 | "last_name": "Warren", 173 | "email": "lwarrens@adobe.com" 174 | } 175 | { 176 | "id": 30, 177 | "first_name": "Tina", 178 | "last_name": "Watson", 179 | "email": "twatsont@myspace.com" 180 | } 181 | { 182 | "id": 31, 183 | "first_name": "Janice", 184 | "last_name": "Kelley", 185 | "email": "jkelleyu@creativecommons.org" 186 | } 187 | { 188 | "id": 32, 189 | "first_name": "Terry", 190 | "last_name": "Mccoy", 191 | "email": "tmccoyv@bravesites.com" 192 | } 193 | { 194 | "id": 33, 195 | "first_name": "Jeffrey", 196 | "last_name": "Morgan", 197 | "email": "jmorganw@surveymonkey.com" 198 | } 199 | { 200 | "id": 34, 201 | "first_name": "Louis", 202 | "last_name": "Harvey", 203 | "email": "lharveyx@sina.com.cn" 204 | } 205 | { 206 | "id": 35, 207 | "first_name": "Philip", 208 | "last_name": "Miller", 209 | "email": "pmillery@samsung.com" 210 | } 211 | { 212 | "id": 36, 213 | "first_name": "Willie", 214 | "last_name": "Marshall", 215 | "email": "wmarshallz@ow.ly" 216 | } 217 | { 218 | "id": 37, 219 | "first_name": "Patrick", 220 | "last_name": "Lopez", 221 | "email": "plopez10@redcross.org" 222 | } 223 | { 224 | "id": 38, 225 | "first_name": "Adam", 226 | "last_name": "Jenkins", 227 | "email": "ajenkins11@harvard.edu" 228 | } 229 | { 230 | "id": 39, 231 | "first_name": "Benjamin", 232 | "last_name": "Cruz", 233 | "email": "bcruz12@linkedin.com" 234 | } 235 | { 236 | "id": 40, 237 | "first_name": "Ruby", 238 | "last_name": "Hawkins", 239 | "email": "rhawkins13@gmpg.org" 240 | } 241 | { 242 | "id": 41, 243 | "first_name": "Carlos", 244 | "last_name": "Barnes", 245 | "email": "cbarnes14@a8.net" 246 | } 247 | { 248 | "id": 42, 249 | "first_name": "Ruby", 250 | "last_name": "Griffin", 251 | "email": "rgriffin15@bravesites.com" 252 | } 253 | { 254 | "id": 43, 255 | "first_name": "Sean", 256 | "last_name": "Mason", 257 | "email": "smason16@icq.com" 258 | } 259 | { 260 | "id": 44, 261 | "first_name": "Anthony", 262 | "last_name": "Payne", 263 | "email": "apayne17@utexas.edu" 264 | } 265 | { 266 | "id": 45, 267 | "first_name": "Steve", 268 | "last_name": "Cruz", 269 | "email": "scruz18@pcworld.com" 270 | } 271 | { 272 | "id": 46, 273 | "first_name": "Anthony", 274 | "last_name": "Garcia", 275 | "email": "agarcia19@flavors.me" 276 | } 277 | { 278 | "id": 47, 279 | "first_name": "Doris", 280 | "last_name": "Lopez", 281 | "email": "dlopez1a@sphinn.com" 282 | } 283 | { 284 | "id": 48, 285 | "first_name": "Susan", 286 | "last_name": "Nichols", 287 | "email": "snichols1b@freewebs.com" 288 | } 289 | { 290 | "id": 49, 291 | "first_name": "Wanda", 292 | "last_name": "Ferguson", 293 | "email": "wferguson1c@yahoo.co.jp" 294 | } 295 | { 296 | "id": 50, 297 | "first_name": "Andrea", 298 | "last_name": "Pierce", 299 | "email": "apierce1d@google.co.uk" 300 | } -------------------------------------------------------------------------------- /integration_tests/public_data/json/section=b/people_b.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 51, 3 | "first_name": "Lawrence", 4 | "last_name": "Phillips", 5 | "email": "lphillips1e@jugem.jp" 6 | } 7 | { 8 | "id": 52, 9 | "first_name": "Judy", 10 | "last_name": "Gilbert", 11 | "email": "jgilbert1f@multiply.com" 12 | } 13 | { 14 | "id": 53, 15 | "first_name": "Eric", 16 | "last_name": "Williams", 17 | "email": "ewilliams1g@joomla.org" 18 | } 19 | { 20 | "id": 54, 21 | "first_name": "Ralph", 22 | "last_name": "Romero", 23 | "email": "rromero1h@sogou.com" 24 | } 25 | { 26 | "id": 55, 27 | "first_name": "Jean", 28 | "last_name": "Wilson", 29 | "email": "jwilson1i@ocn.ne.jp" 30 | } 31 | { 32 | "id": 56, 33 | "first_name": "Lori", 34 | "last_name": "Reynolds", 35 | "email": "lreynolds1j@illinois.edu" 36 | } 37 | { 38 | "id": 57, 39 | "first_name": "Donald", 40 | "last_name": "Moreno", 41 | "email": "dmoreno1k@bbc.co.uk" 42 | } 43 | { 44 | "id": 58, 45 | "first_name": "Steven", 46 | "last_name": "Berry", 47 | "email": "sberry1l@eepurl.com" 48 | } 49 | { 50 | "id": 59, 51 | "first_name": "Theresa", 52 | "last_name": "Shaw", 53 | "email": "tshaw1m@people.com.cn" 54 | } 55 | { 56 | "id": 60, 57 | "first_name": "John", 58 | "last_name": "Stephens", 59 | "email": "jstephens1n@nationalgeographic.com" 60 | } 61 | { 62 | "id": 61, 63 | "first_name": "Richard", 64 | "last_name": "Jacobs", 65 | "email": "rjacobs1o@state.tx.us" 66 | } 67 | { 68 | "id": 62, 69 | "first_name": "Andrew", 70 | "last_name": "Lawson", 71 | "email": "alawson1p@over-blog.com" 72 | } 73 | { 74 | "id": 63, 75 | "first_name": "Peter", 76 | "last_name": "Morgan", 77 | "email": "pmorgan1q@rambler.ru" 78 | } 79 | { 80 | "id": 64, 81 | "first_name": "Nicole", 82 | "last_name": "Garrett", 83 | "email": "ngarrett1r@zimbio.com" 84 | } 85 | { 86 | "id": 65, 87 | "first_name": "Joshua", 88 | "last_name": "Kim", 89 | "email": "jkim1s@edublogs.org" 90 | } 91 | { 92 | "id": 66, 93 | "first_name": "Ralph", 94 | "last_name": "Roberts", 95 | "email": "rroberts1t@people.com.cn" 96 | } 97 | { 98 | "id": 67, 99 | "first_name": "George", 100 | "last_name": "Montgomery", 101 | "email": "gmontgomery1u@smugmug.com" 102 | } 103 | { 104 | "id": 68, 105 | "first_name": "Gerald", 106 | "last_name": "Alvarez", 107 | "email": "galvarez1v@flavors.me" 108 | } 109 | { 110 | "id": 69, 111 | "first_name": "Donald", 112 | "last_name": "Olson", 113 | "email": "dolson1w@whitehouse.gov" 114 | } 115 | { 116 | "id": 70, 117 | "first_name": "Carlos", 118 | "last_name": "Morgan", 119 | "email": "cmorgan1x@pbs.org" 120 | } 121 | { 122 | "id": 71, 123 | "first_name": "Aaron", 124 | "last_name": "Stanley", 125 | "email": "astanley1y@webnode.com" 126 | } 127 | { 128 | "id": 72, 129 | "first_name": "Virginia", 130 | "last_name": "Long", 131 | "email": "vlong1z@spiegel.de" 132 | } 133 | { 134 | "id": 73, 135 | "first_name": "Robert", 136 | "last_name": "Berry", 137 | "email": "rberry20@tripadvisor.com" 138 | } 139 | { 140 | "id": 74, 141 | "first_name": "Antonio", 142 | "last_name": "Brooks", 143 | "email": "abrooks21@unesco.org" 144 | } 145 | { 146 | "id": 75, 147 | "first_name": "Ruby", 148 | "last_name": "Garcia", 149 | "email": "rgarcia22@ovh.net" 150 | } 151 | { 152 | "id": 76, 153 | "first_name": "Jack", 154 | "last_name": "Hanson", 155 | "email": "jhanson23@blogtalkradio.com" 156 | } 157 | { 158 | "id": 77, 159 | "first_name": "Kathryn", 160 | "last_name": "Nelson", 161 | "email": "knelson24@walmart.com" 162 | } 163 | { 164 | "id": 78, 165 | "first_name": "Jason", 166 | "last_name": "Reed", 167 | "email": "jreed25@printfriendly.com" 168 | } 169 | { 170 | "id": 79, 171 | "first_name": "George", 172 | "last_name": "Coleman", 173 | "email": "gcoleman26@people.com.cn" 174 | } 175 | { 176 | "id": 80, 177 | "first_name": "Rose", 178 | "last_name": "King", 179 | "email": "rking27@ucoz.com" 180 | } 181 | { 182 | "id": 81, 183 | "first_name": "Johnny", 184 | "last_name": "Holmes", 185 | "email": "jholmes28@boston.com" 186 | } 187 | { 188 | "id": 82, 189 | "first_name": "Katherine", 190 | "last_name": "Gilbert", 191 | "email": "kgilbert29@altervista.org" 192 | } 193 | { 194 | "id": 83, 195 | "first_name": "Joshua", 196 | "last_name": "Thomas", 197 | "email": "jthomas2a@ustream.tv" 198 | } 199 | { 200 | "id": 84, 201 | "first_name": "Julie", 202 | "last_name": "Perry", 203 | "email": "jperry2b@opensource.org" 204 | } 205 | { 206 | "id": 85, 207 | "first_name": "Richard", 208 | "last_name": "Perry", 209 | "email": "rperry2c@oracle.com" 210 | } 211 | { 212 | "id": 86, 213 | "first_name": "Kenneth", 214 | "last_name": "Ruiz", 215 | "email": "kruiz2d@wikimedia.org" 216 | } 217 | { 218 | "id": 87, 219 | "first_name": "Jose", 220 | "last_name": "Morgan", 221 | "email": "jmorgan2e@webnode.com" 222 | } 223 | { 224 | "id": 88, 225 | "first_name": "Donald", 226 | "last_name": "Campbell", 227 | "email": "dcampbell2f@goo.ne.jp" 228 | } 229 | { 230 | "id": 89, 231 | "first_name": "Debra", 232 | "last_name": "Collins", 233 | "email": "dcollins2g@uol.com.br" 234 | } 235 | { 236 | "id": 90, 237 | "first_name": "Jesse", 238 | "last_name": "Johnson", 239 | "email": "jjohnson2h@stumbleupon.com" 240 | } 241 | { 242 | "id": 91, 243 | "first_name": "Elizabeth", 244 | "last_name": "Stone", 245 | "email": "estone2i@histats.com" 246 | } 247 | { 248 | "id": 92, 249 | "first_name": "Angela", 250 | "last_name": "Rogers", 251 | "email": "arogers2j@goodreads.com" 252 | } 253 | { 254 | "id": 93, 255 | "first_name": "Emily", 256 | "last_name": "Dixon", 257 | "email": "edixon2k@mlb.com" 258 | } 259 | { 260 | "id": 94, 261 | "first_name": "Albert", 262 | "last_name": "Scott", 263 | "email": "ascott2l@tinypic.com" 264 | } 265 | { 266 | "id": 95, 267 | "first_name": "Barbara", 268 | "last_name": "Peterson", 269 | "email": "bpeterson2m@ow.ly" 270 | } 271 | { 272 | "id": 96, 273 | "first_name": "Adam", 274 | "last_name": "Greene", 275 | "email": "agreene2n@fastcompany.com" 276 | } 277 | { 278 | "id": 97, 279 | "first_name": "Earl", 280 | "last_name": "Sanders", 281 | "email": "esanders2o@hc360.com" 282 | } 283 | { 284 | "id": 98, 285 | "first_name": "Angela", 286 | "last_name": "Brooks", 287 | "email": "abrooks2p@mtv.com" 288 | } 289 | { 290 | "id": 99, 291 | "first_name": "Harold", 292 | "last_name": "Foster", 293 | "email": "hfoster2q@privacy.gov.au" 294 | } 295 | { 296 | "id": 100, 297 | "first_name": "Carl", 298 | "last_name": "Meyer", 299 | "email": "cmeyer2r@disqus.com" 300 | } -------------------------------------------------------------------------------- /integration_tests/public_data/json/section=c/people_c.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 101, 3 | "first_name": "Michael", 4 | "last_name": "Perez", 5 | "email": "mperez0@chronoengine.com" 6 | } 7 | { 8 | "id": 102, 9 | "first_name": "Shawn", 10 | "last_name": "Mccoy", 11 | "email": "smccoy1@reddit.com" 12 | } 13 | { 14 | "id": 103, 15 | "first_name": "Kathleen", 16 | "last_name": "Payne", 17 | "email": "kpayne2@cargocollective.com" 18 | } 19 | { 20 | "id": 104, 21 | "first_name": "Jimmy", 22 | "last_name": "Cooper", 23 | "email": "jcooper3@cargocollective.com" 24 | } 25 | { 26 | "id": 105, 27 | "first_name": "Katherine", 28 | "last_name": "Rice", 29 | "email": "krice4@typepad.com" 30 | } 31 | { 32 | "id": 106, 33 | "first_name": "Sarah", 34 | "last_name": "Ryan", 35 | "email": "sryan5@gnu.org" 36 | } 37 | { 38 | "id": 107, 39 | "first_name": "Martin", 40 | "last_name": "Mcdonald", 41 | "email": "mmcdonald6@opera.com" 42 | } 43 | { 44 | "id": 108, 45 | "first_name": "Frank", 46 | "last_name": "Robinson", 47 | "email": "frobinson7@wunderground.com" 48 | } 49 | { 50 | "id": 109, 51 | "first_name": "Jennifer", 52 | "last_name": "Franklin", 53 | "email": "jfranklin8@mail.ru" 54 | } 55 | { 56 | "id": 110, 57 | "first_name": "Henry", 58 | "last_name": "Welch", 59 | "email": "hwelch9@list-manage.com" 60 | } 61 | { 62 | "id": 111, 63 | "first_name": "Fred", 64 | "last_name": "Snyder", 65 | "email": "fsnydera@reddit.com" 66 | } 67 | { 68 | "id": 112, 69 | "first_name": "Amy", 70 | "last_name": "Dunn", 71 | "email": "adunnb@nba.com" 72 | } 73 | { 74 | "id": 113, 75 | "first_name": "Kathleen", 76 | "last_name": "Meyer", 77 | "email": "kmeyerc@cdc.gov" 78 | } 79 | { 80 | "id": 114, 81 | "first_name": "Steve", 82 | "last_name": "Ferguson", 83 | "email": "sfergusond@reverbnation.com" 84 | } 85 | { 86 | "id": 115, 87 | "first_name": "Teresa", 88 | "last_name": "Hill", 89 | "email": "thille@dion.ne.jp" 90 | } 91 | { 92 | "id": 116, 93 | "first_name": "Amanda", 94 | "last_name": "Harper", 95 | "email": "aharperf@mail.ru" 96 | } 97 | { 98 | "id": 117, 99 | "first_name": "Kimberly", 100 | "last_name": "Ray", 101 | "email": "krayg@xing.com" 102 | } 103 | { 104 | "id": 118, 105 | "first_name": "Johnny", 106 | "last_name": "Knight", 107 | "email": "jknighth@jalbum.net" 108 | } 109 | { 110 | "id": 119, 111 | "first_name": "Virginia", 112 | "last_name": "Freeman", 113 | "email": "vfreemani@tiny.cc" 114 | } 115 | { 116 | "id": 120, 117 | "first_name": "Anna", 118 | "last_name": "Austin", 119 | "email": "aaustinj@diigo.com" 120 | } 121 | { 122 | "id": 121, 123 | "first_name": "Willie", 124 | "last_name": "Hill", 125 | "email": "whillk@mail.ru" 126 | } 127 | { 128 | "id": 122, 129 | "first_name": "Sean", 130 | "last_name": "Harris", 131 | "email": "sharrisl@zdnet.com" 132 | } 133 | { 134 | "id": 123, 135 | "first_name": "Mildred", 136 | "last_name": "Adams", 137 | "email": "madamsm@usatoday.com" 138 | } 139 | { 140 | "id": 124, 141 | "first_name": "David", 142 | "last_name": "Graham", 143 | "email": "dgrahamn@zimbio.com" 144 | } 145 | { 146 | "id": 125, 147 | "first_name": "Victor", 148 | "last_name": "Hunter", 149 | "email": "vhuntero@ehow.com" 150 | } 151 | { 152 | "id": 126, 153 | "first_name": "Aaron", 154 | "last_name": "Ruiz", 155 | "email": "aruizp@weebly.com" 156 | } 157 | { 158 | "id": 127, 159 | "first_name": "Benjamin", 160 | "last_name": "Brooks", 161 | "email": "bbrooksq@jalbum.net" 162 | } 163 | { 164 | "id": 128, 165 | "first_name": "Lisa", 166 | "last_name": "Wilson", 167 | "email": "lwilsonr@japanpost.jp" 168 | } 169 | { 170 | "id": 129, 171 | "first_name": "Benjamin", 172 | "last_name": "King", 173 | "email": "bkings@comsenz.com" 174 | } 175 | { 176 | "id": 130, 177 | "first_name": "Christina", 178 | "last_name": "Williamson", 179 | "email": "cwilliamsont@boston.com" 180 | } 181 | { 182 | "id": 131, 183 | "first_name": "Jane", 184 | "last_name": "Gonzalez", 185 | "email": "jgonzalezu@networksolutions.com" 186 | } 187 | { 188 | "id": 132, 189 | "first_name": "Thomas", 190 | "last_name": "Owens", 191 | "email": "towensv@psu.edu" 192 | } 193 | { 194 | "id": 133, 195 | "first_name": "Katherine", 196 | "last_name": "Moore", 197 | "email": "kmoorew@naver.com" 198 | } 199 | { 200 | "id": 134, 201 | "first_name": "Jennifer", 202 | "last_name": "Stewart", 203 | "email": "jstewartx@yahoo.com" 204 | } 205 | { 206 | "id": 135, 207 | "first_name": "Sara", 208 | "last_name": "Tucker", 209 | "email": "stuckery@topsy.com" 210 | } 211 | { 212 | "id": 136, 213 | "first_name": "Harold", 214 | "last_name": "Ortiz", 215 | "email": "hortizz@vkontakte.ru" 216 | } 217 | { 218 | "id": 137, 219 | "first_name": "Shirley", 220 | "last_name": "James", 221 | "email": "sjames10@yelp.com" 222 | } 223 | { 224 | "id": 138, 225 | "first_name": "Dennis", 226 | "last_name": "Johnson", 227 | "email": "djohnson11@slate.com" 228 | } 229 | { 230 | "id": 139, 231 | "first_name": "Louise", 232 | "last_name": "Weaver", 233 | "email": "lweaver12@china.com.cn" 234 | } 235 | { 236 | "id": 140, 237 | "first_name": "Maria", 238 | "last_name": "Armstrong", 239 | "email": "marmstrong13@prweb.com" 240 | } 241 | { 242 | "id": 141, 243 | "first_name": "Gloria", 244 | "last_name": "Cruz", 245 | "email": "gcruz14@odnoklassniki.ru" 246 | } 247 | { 248 | "id": 142, 249 | "first_name": "Diana", 250 | "last_name": "Spencer", 251 | "email": "dspencer15@ifeng.com" 252 | } 253 | { 254 | "id": 143, 255 | "first_name": "Kelly", 256 | "last_name": "Nguyen", 257 | "email": "knguyen16@altervista.org" 258 | } 259 | { 260 | "id": 144, 261 | "first_name": "Jane", 262 | "last_name": "Rodriguez", 263 | "email": "jrodriguez17@biblegateway.com" 264 | } 265 | { 266 | "id": 145, 267 | "first_name": "Scott", 268 | "last_name": "Brown", 269 | "email": "sbrown18@geocities.jp" 270 | } 271 | { 272 | "id": 146, 273 | "first_name": "Norma", 274 | "last_name": "Cruz", 275 | "email": "ncruz19@si.edu" 276 | } 277 | { 278 | "id": 147, 279 | "first_name": "Marie", 280 | "last_name": "Peters", 281 | "email": "mpeters1a@mlb.com" 282 | } 283 | { 284 | "id": 148, 285 | "first_name": "Lillian", 286 | "last_name": "Carr", 287 | "email": "lcarr1b@typepad.com" 288 | } 289 | { 290 | "id": 149, 291 | "first_name": "Judy", 292 | "last_name": "Nichols", 293 | "email": "jnichols1c@t-online.de" 294 | } 295 | { 296 | "id": 150, 297 | "first_name": "Billy", 298 | "last_name": "Long", 299 | "email": "blong1d@yahoo.com" 300 | } -------------------------------------------------------------------------------- /integration_tests/public_data/json/section=d/people_d.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 151, 3 | "first_name": "Howard", 4 | "last_name": "Reid", 5 | "email": "hreid1e@exblog.jp" 6 | } 7 | { 8 | "id": 152, 9 | "first_name": "Laura", 10 | "last_name": "Ferguson", 11 | "email": "lferguson1f@tuttocitta.it" 12 | } 13 | { 14 | "id": 153, 15 | "first_name": "Anne", 16 | "last_name": "Bailey", 17 | "email": "abailey1g@geocities.com" 18 | } 19 | { 20 | "id": 154, 21 | "first_name": "Rose", 22 | "last_name": "Morgan", 23 | "email": "rmorgan1h@ehow.com" 24 | } 25 | { 26 | "id": 155, 27 | "first_name": "Nicholas", 28 | "last_name": "Reyes", 29 | "email": "nreyes1i@google.ru" 30 | } 31 | { 32 | "id": 156, 33 | "first_name": "Joshua", 34 | "last_name": "Kennedy", 35 | "email": "jkennedy1j@house.gov" 36 | } 37 | { 38 | "id": 157, 39 | "first_name": "Paul", 40 | "last_name": "Watkins", 41 | "email": "pwatkins1k@upenn.edu" 42 | } 43 | { 44 | "id": 158, 45 | "first_name": "Kathryn", 46 | "last_name": "Kelly", 47 | "email": "kkelly1l@businessweek.com" 48 | } 49 | { 50 | "id": 159, 51 | "first_name": "Adam", 52 | "last_name": "Armstrong", 53 | "email": "aarmstrong1m@techcrunch.com" 54 | } 55 | { 56 | "id": 160, 57 | "first_name": "Norma", 58 | "last_name": "Wallace", 59 | "email": "nwallace1n@phoca.cz" 60 | } 61 | { 62 | "id": 161, 63 | "first_name": "Timothy", 64 | "last_name": "Reyes", 65 | "email": "treyes1o@google.cn" 66 | } 67 | { 68 | "id": 162, 69 | "first_name": "Elizabeth", 70 | "last_name": "Patterson", 71 | "email": "epatterson1p@sun.com" 72 | } 73 | { 74 | "id": 163, 75 | "first_name": "Edward", 76 | "last_name": "Gomez", 77 | "email": "egomez1q@google.fr" 78 | } 79 | { 80 | "id": 164, 81 | "first_name": "David", 82 | "last_name": "Cox", 83 | "email": "dcox1r@friendfeed.com" 84 | } 85 | { 86 | "id": 165, 87 | "first_name": "Brenda", 88 | "last_name": "Wood", 89 | "email": "bwood1s@over-blog.com" 90 | } 91 | { 92 | "id": 166, 93 | "first_name": "Adam", 94 | "last_name": "Walker", 95 | "email": "awalker1t@blogs.com" 96 | } 97 | { 98 | "id": 167, 99 | "first_name": "Michael", 100 | "last_name": "Hart", 101 | "email": "mhart1u@wix.com" 102 | } 103 | { 104 | "id": 168, 105 | "first_name": "Jesse", 106 | "last_name": "Ellis", 107 | "email": "jellis1v@google.co.uk" 108 | } 109 | { 110 | "id": 169, 111 | "first_name": "Janet", 112 | "last_name": "Powell", 113 | "email": "jpowell1w@un.org" 114 | } 115 | { 116 | "id": 170, 117 | "first_name": "Helen", 118 | "last_name": "Ford", 119 | "email": "hford1x@creativecommons.org" 120 | } 121 | { 122 | "id": 171, 123 | "first_name": "Gerald", 124 | "last_name": "Carpenter", 125 | "email": "gcarpenter1y@about.me" 126 | } 127 | { 128 | "id": 172, 129 | "first_name": "Kathryn", 130 | "last_name": "Oliver", 131 | "email": "koliver1z@army.mil" 132 | } 133 | { 134 | "id": 173, 135 | "first_name": "Alan", 136 | "last_name": "Berry", 137 | "email": "aberry20@gov.uk" 138 | } 139 | { 140 | "id": 174, 141 | "first_name": "Harry", 142 | "last_name": "Andrews", 143 | "email": "handrews21@ameblo.jp" 144 | } 145 | { 146 | "id": 175, 147 | "first_name": "Andrea", 148 | "last_name": "Hall", 149 | "email": "ahall22@hp.com" 150 | } 151 | { 152 | "id": 176, 153 | "first_name": "Barbara", 154 | "last_name": "Wells", 155 | "email": "bwells23@behance.net" 156 | } 157 | { 158 | "id": 177, 159 | "first_name": "Anne", 160 | "last_name": "Wells", 161 | "email": "awells24@apache.org" 162 | } 163 | { 164 | "id": 178, 165 | "first_name": "Harry", 166 | "last_name": "Harper", 167 | "email": "hharper25@rediff.com" 168 | } 169 | { 170 | "id": 179, 171 | "first_name": "Jack", 172 | "last_name": "Ray", 173 | "email": "jray26@wufoo.com" 174 | } 175 | { 176 | "id": 180, 177 | "first_name": "Phillip", 178 | "last_name": "Hamilton", 179 | "email": "phamilton27@joomla.org" 180 | } 181 | { 182 | "id": 181, 183 | "first_name": "Shirley", 184 | "last_name": "Hunter", 185 | "email": "shunter28@newsvine.com" 186 | } 187 | { 188 | "id": 182, 189 | "first_name": "Arthur", 190 | "last_name": "Daniels", 191 | "email": "adaniels29@reuters.com" 192 | } 193 | { 194 | "id": 183, 195 | "first_name": "Virginia", 196 | "last_name": "Rodriguez", 197 | "email": "vrodriguez2a@walmart.com" 198 | } 199 | { 200 | "id": 184, 201 | "first_name": "Christina", 202 | "last_name": "Ryan", 203 | "email": "cryan2b@hibu.com" 204 | } 205 | { 206 | "id": 185, 207 | "first_name": "Theresa", 208 | "last_name": "Mendoza", 209 | "email": "tmendoza2c@vinaora.com" 210 | } 211 | { 212 | "id": 186, 213 | "first_name": "Jason", 214 | "last_name": "Cole", 215 | "email": "jcole2d@ycombinator.com" 216 | } 217 | { 218 | "id": 187, 219 | "first_name": "Phillip", 220 | "last_name": "Bryant", 221 | "email": "pbryant2e@rediff.com" 222 | } 223 | { 224 | "id": 188, 225 | "first_name": "Adam", 226 | "last_name": "Torres", 227 | "email": "atorres2f@sun.com" 228 | } 229 | { 230 | "id": 189, 231 | "first_name": "Margaret", 232 | "last_name": "Johnston", 233 | "email": "mjohnston2g@ucsd.edu" 234 | } 235 | { 236 | "id": 190, 237 | "first_name": "Paul", 238 | "last_name": "Payne", 239 | "email": "ppayne2h@hhs.gov" 240 | } 241 | { 242 | "id": 191, 243 | "first_name": "Todd", 244 | "last_name": "Willis", 245 | "email": "twillis2i@businessweek.com" 246 | } 247 | { 248 | "id": 192, 249 | "first_name": "Willie", 250 | "last_name": "Oliver", 251 | "email": "woliver2j@noaa.gov" 252 | } 253 | { 254 | "id": 193, 255 | "first_name": "Frances", 256 | "last_name": "Robertson", 257 | "email": "frobertson2k@go.com" 258 | } 259 | { 260 | "id": 194, 261 | "first_name": "Gregory", 262 | "last_name": "Hawkins", 263 | "email": "ghawkins2l@joomla.org" 264 | } 265 | { 266 | "id": 195, 267 | "first_name": "Lisa", 268 | "last_name": "Perkins", 269 | "email": "lperkins2m@si.edu" 270 | } 271 | { 272 | "id": 196, 273 | "first_name": "Jacqueline", 274 | "last_name": "Anderson", 275 | "email": "janderson2n@cargocollective.com" 276 | } 277 | { 278 | "id": 197, 279 | "first_name": "Shirley", 280 | "last_name": "Diaz", 281 | "email": "sdiaz2o@ucla.edu" 282 | } 283 | { 284 | "id": 198, 285 | "first_name": "Nicole", 286 | "last_name": "Meyer", 287 | "email": "nmeyer2p@flickr.com" 288 | } 289 | { 290 | "id": 199, 291 | "first_name": "Mary", 292 | "last_name": "Gray", 293 | "email": "mgray2q@constantcontact.com" 294 | } 295 | { 296 | "id": 200, 297 | "first_name": "Jean", 298 | "last_name": "Mcdonald", 299 | "email": "jmcdonald2r@baidu.com" 300 | } -------------------------------------------------------------------------------- /integration_tests/public_data/parquet/section=a/people_a.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet/section=a/people_a.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet/section=b/people_b.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet/section=b/people_b.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet/section=c/people_c.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet/section=c/people_c.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet/section=d/people_d.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet/section=d/people_d.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet_capitalized/section=a/people_a.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet_capitalized/section=a/people_a.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet_capitalized/section=b/people_b.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet_capitalized/section=b/people_b.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet_capitalized/section=c/people_c.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet_capitalized/section=c/people_c.parquet -------------------------------------------------------------------------------- /integration_tests/public_data/parquet_capitalized/section=d/people_d.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/dbt-external-tables/591957cc3827761af1e622f153f99ab64e7884bf/integration_tests/public_data/parquet_capitalized/section=d/people_d.parquet -------------------------------------------------------------------------------- /integration_tests/seeds/people.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 1,Jack,Hunter,jhunter0@pbs.org 3 | 2,Kathryn,Walker,kwalker1@ezinearticles.com 4 | 3,Gerald,Ryan,gryan2@com.com 5 | 4,Bonnie,Spencer,bspencer3@ameblo.jp 6 | 5,Harold,Taylor,htaylor4@people.com.cn 7 | 6,Jacqueline,Griffin,jgriffin5@t.co 8 | 7,Wanda,Arnold,warnold6@google.nl 9 | 8,Craig,Ortiz,cortiz7@sciencedaily.com 10 | 9,Gary,Day,gday8@nih.gov 11 | 10,Rose,Wright,rwright9@yahoo.co.jp 12 | 11,Raymond,Kelley,rkelleya@fc2.com 13 | 12,Gerald,Robinson,grobinsonb@disqus.com 14 | 13,Mildred,Martinez,mmartinezc@samsung.com 15 | 14,Dennis,Arnold,darnoldd@google.com 16 | 15,Judy,Gray,jgraye@opensource.org 17 | 16,Theresa,Garza,tgarzaf@epa.gov 18 | 17,Gerald,Robertson,grobertsong@csmonitor.com 19 | 18,Philip,Hernandez,phernandezh@adobe.com 20 | 19,Julia,Gonzalez,jgonzalezi@cam.ac.uk 21 | 20,Andrew,Davis,adavisj@patch.com 22 | 21,Kimberly,Harper,kharperk@foxnews.com 23 | 22,Mark,Martin,mmartinl@marketwatch.com 24 | 23,Cynthia,Ruiz,cruizm@google.fr 25 | 24,Samuel,Carroll,scarrolln@youtu.be 26 | 25,Jennifer,Larson,jlarsono@vinaora.com 27 | 26,Ashley,Perry,aperryp@rakuten.co.jp 28 | 27,Howard,Rodriguez,hrodriguezq@shutterfly.com 29 | 28,Amy,Brooks,abrooksr@theatlantic.com 30 | 29,Louise,Warren,lwarrens@adobe.com 31 | 30,Tina,Watson,twatsont@myspace.com 32 | 31,Janice,Kelley,jkelleyu@creativecommons.org 33 | 32,Terry,Mccoy,tmccoyv@bravesites.com 34 | 33,Jeffrey,Morgan,jmorganw@surveymonkey.com 35 | 34,Louis,Harvey,lharveyx@sina.com.cn 36 | 35,Philip,Miller,pmillery@samsung.com 37 | 36,Willie,Marshall,wmarshallz@ow.ly 38 | 37,Patrick,Lopez,plopez10@redcross.org 39 | 38,Adam,Jenkins,ajenkins11@harvard.edu 40 | 39,Benjamin,Cruz,bcruz12@linkedin.com 41 | 40,Ruby,Hawkins,rhawkins13@gmpg.org 42 | 41,Carlos,Barnes,cbarnes14@a8.net 43 | 42,Ruby,Griffin,rgriffin15@bravesites.com 44 | 43,Sean,Mason,smason16@icq.com 45 | 44,Anthony,Payne,apayne17@utexas.edu 46 | 45,Steve,Cruz,scruz18@pcworld.com 47 | 46,Anthony,Garcia,agarcia19@flavors.me 48 | 47,Doris,Lopez,dlopez1a@sphinn.com 49 | 48,Susan,Nichols,snichols1b@freewebs.com 50 | 49,Wanda,Ferguson,wferguson1c@yahoo.co.jp 51 | 50,Andrea,Pierce,apierce1d@google.co.uk 52 | 51,Lawrence,Phillips,lphillips1e@jugem.jp 53 | 52,Judy,Gilbert,jgilbert1f@multiply.com 54 | 53,Eric,Williams,ewilliams1g@joomla.org 55 | 54,Ralph,Romero,rromero1h@sogou.com 56 | 55,Jean,Wilson,jwilson1i@ocn.ne.jp 57 | 56,Lori,Reynolds,lreynolds1j@illinois.edu 58 | 57,Donald,Moreno,dmoreno1k@bbc.co.uk 59 | 58,Steven,Berry,sberry1l@eepurl.com 60 | 59,Theresa,Shaw,tshaw1m@people.com.cn 61 | 60,John,Stephens,jstephens1n@nationalgeographic.com 62 | 61,Richard,Jacobs,rjacobs1o@state.tx.us 63 | 62,Andrew,Lawson,alawson1p@over-blog.com 64 | 63,Peter,Morgan,pmorgan1q@rambler.ru 65 | 64,Nicole,Garrett,ngarrett1r@zimbio.com 66 | 65,Joshua,Kim,jkim1s@edublogs.org 67 | 66,Ralph,Roberts,rroberts1t@people.com.cn 68 | 67,George,Montgomery,gmontgomery1u@smugmug.com 69 | 68,Gerald,Alvarez,galvarez1v@flavors.me 70 | 69,Donald,Olson,dolson1w@whitehouse.gov 71 | 70,Carlos,Morgan,cmorgan1x@pbs.org 72 | 71,Aaron,Stanley,astanley1y@webnode.com 73 | 72,Virginia,Long,vlong1z@spiegel.de 74 | 73,Robert,Berry,rberry20@tripadvisor.com 75 | 74,Antonio,Brooks,abrooks21@unesco.org 76 | 75,Ruby,Garcia,rgarcia22@ovh.net 77 | 76,Jack,Hanson,jhanson23@blogtalkradio.com 78 | 77,Kathryn,Nelson,knelson24@walmart.com 79 | 78,Jason,Reed,jreed25@printfriendly.com 80 | 79,George,Coleman,gcoleman26@people.com.cn 81 | 80,Rose,King,rking27@ucoz.com 82 | 81,Johnny,Holmes,jholmes28@boston.com 83 | 82,Katherine,Gilbert,kgilbert29@altervista.org 84 | 83,Joshua,Thomas,jthomas2a@ustream.tv 85 | 84,Julie,Perry,jperry2b@opensource.org 86 | 85,Richard,Perry,rperry2c@oracle.com 87 | 86,Kenneth,Ruiz,kruiz2d@wikimedia.org 88 | 87,Jose,Morgan,jmorgan2e@webnode.com 89 | 88,Donald,Campbell,dcampbell2f@goo.ne.jp 90 | 89,Debra,Collins,dcollins2g@uol.com.br 91 | 90,Jesse,Johnson,jjohnson2h@stumbleupon.com 92 | 91,Elizabeth,Stone,estone2i@histats.com 93 | 92,Angela,Rogers,arogers2j@goodreads.com 94 | 93,Emily,Dixon,edixon2k@mlb.com 95 | 94,Albert,Scott,ascott2l@tinypic.com 96 | 95,Barbara,Peterson,bpeterson2m@ow.ly 97 | 96,Adam,Greene,agreene2n@fastcompany.com 98 | 97,Earl,Sanders,esanders2o@hc360.com 99 | 98,Angela,Brooks,abrooks2p@mtv.com 100 | 99,Harold,Foster,hfoster2q@privacy.gov.au 101 | 100,Carl,Meyer,cmeyer2r@disqus.com 102 | 101,Michael,Perez,mperez0@chronoengine.com 103 | 102,Shawn,Mccoy,smccoy1@reddit.com 104 | 103,Kathleen,Payne,kpayne2@cargocollective.com 105 | 104,Jimmy,Cooper,jcooper3@cargocollective.com 106 | 105,Katherine,Rice,krice4@typepad.com 107 | 106,Sarah,Ryan,sryan5@gnu.org 108 | 107,Martin,Mcdonald,mmcdonald6@opera.com 109 | 108,Frank,Robinson,frobinson7@wunderground.com 110 | 109,Jennifer,Franklin,jfranklin8@mail.ru 111 | 110,Henry,Welch,hwelch9@list-manage.com 112 | 111,Fred,Snyder,fsnydera@reddit.com 113 | 112,Amy,Dunn,adunnb@nba.com 114 | 113,Kathleen,Meyer,kmeyerc@cdc.gov 115 | 114,Steve,Ferguson,sfergusond@reverbnation.com 116 | 115,Teresa,Hill,thille@dion.ne.jp 117 | 116,Amanda,Harper,aharperf@mail.ru 118 | 117,Kimberly,Ray,krayg@xing.com 119 | 118,Johnny,Knight,jknighth@jalbum.net 120 | 119,Virginia,Freeman,vfreemani@tiny.cc 121 | 120,Anna,Austin,aaustinj@diigo.com 122 | 121,Willie,Hill,whillk@mail.ru 123 | 122,Sean,Harris,sharrisl@zdnet.com 124 | 123,Mildred,Adams,madamsm@usatoday.com 125 | 124,David,Graham,dgrahamn@zimbio.com 126 | 125,Victor,Hunter,vhuntero@ehow.com 127 | 126,Aaron,Ruiz,aruizp@weebly.com 128 | 127,Benjamin,Brooks,bbrooksq@jalbum.net 129 | 128,Lisa,Wilson,lwilsonr@japanpost.jp 130 | 129,Benjamin,King,bkings@comsenz.com 131 | 130,Christina,Williamson,cwilliamsont@boston.com 132 | 131,Jane,Gonzalez,jgonzalezu@networksolutions.com 133 | 132,Thomas,Owens,towensv@psu.edu 134 | 133,Katherine,Moore,kmoorew@naver.com 135 | 134,Jennifer,Stewart,jstewartx@yahoo.com 136 | 135,Sara,Tucker,stuckery@topsy.com 137 | 136,Harold,Ortiz,hortizz@vkontakte.ru 138 | 137,Shirley,James,sjames10@yelp.com 139 | 138,Dennis,Johnson,djohnson11@slate.com 140 | 139,Louise,Weaver,lweaver12@china.com.cn 141 | 140,Maria,Armstrong,marmstrong13@prweb.com 142 | 141,Gloria,Cruz,gcruz14@odnoklassniki.ru 143 | 142,Diana,Spencer,dspencer15@ifeng.com 144 | 143,Kelly,Nguyen,knguyen16@altervista.org 145 | 144,Jane,Rodriguez,jrodriguez17@biblegateway.com 146 | 145,Scott,Brown,sbrown18@geocities.jp 147 | 146,Norma,Cruz,ncruz19@si.edu 148 | 147,Marie,Peters,mpeters1a@mlb.com 149 | 148,Lillian,Carr,lcarr1b@typepad.com 150 | 149,Judy,Nichols,jnichols1c@t-online.de 151 | 150,Billy,Long,blong1d@yahoo.com 152 | 151,Howard,Reid,hreid1e@exblog.jp 153 | 152,Laura,Ferguson,lferguson1f@tuttocitta.it 154 | 153,Anne,Bailey,abailey1g@geocities.com 155 | 154,Rose,Morgan,rmorgan1h@ehow.com 156 | 155,Nicholas,Reyes,nreyes1i@google.ru 157 | 156,Joshua,Kennedy,jkennedy1j@house.gov 158 | 157,Paul,Watkins,pwatkins1k@upenn.edu 159 | 158,Kathryn,Kelly,kkelly1l@businessweek.com 160 | 159,Adam,Armstrong,aarmstrong1m@techcrunch.com 161 | 160,Norma,Wallace,nwallace1n@phoca.cz 162 | 161,Timothy,Reyes,treyes1o@google.cn 163 | 162,Elizabeth,Patterson,epatterson1p@sun.com 164 | 163,Edward,Gomez,egomez1q@google.fr 165 | 164,David,Cox,dcox1r@friendfeed.com 166 | 165,Brenda,Wood,bwood1s@over-blog.com 167 | 166,Adam,Walker,awalker1t@blogs.com 168 | 167,Michael,Hart,mhart1u@wix.com 169 | 168,Jesse,Ellis,jellis1v@google.co.uk 170 | 169,Janet,Powell,jpowell1w@un.org 171 | 170,Helen,Ford,hford1x@creativecommons.org 172 | 171,Gerald,Carpenter,gcarpenter1y@about.me 173 | 172,Kathryn,Oliver,koliver1z@army.mil 174 | 173,Alan,Berry,aberry20@gov.uk 175 | 174,Harry,Andrews,handrews21@ameblo.jp 176 | 175,Andrea,Hall,ahall22@hp.com 177 | 176,Barbara,Wells,bwells23@behance.net 178 | 177,Anne,Wells,awells24@apache.org 179 | 178,Harry,Harper,hharper25@rediff.com 180 | 179,Jack,Ray,jray26@wufoo.com 181 | 180,Phillip,Hamilton,phamilton27@joomla.org 182 | 181,Shirley,Hunter,shunter28@newsvine.com 183 | 182,Arthur,Daniels,adaniels29@reuters.com 184 | 183,Virginia,Rodriguez,vrodriguez2a@walmart.com 185 | 184,Christina,Ryan,cryan2b@hibu.com 186 | 185,Theresa,Mendoza,tmendoza2c@vinaora.com 187 | 186,Jason,Cole,jcole2d@ycombinator.com 188 | 187,Phillip,Bryant,pbryant2e@rediff.com 189 | 188,Adam,Torres,atorres2f@sun.com 190 | 189,Margaret,Johnston,mjohnston2g@ucsd.edu 191 | 190,Paul,Payne,ppayne2h@hhs.gov 192 | 191,Todd,Willis,twillis2i@businessweek.com 193 | 192,Willie,Oliver,woliver2j@noaa.gov 194 | 193,Frances,Robertson,frobertson2k@go.com 195 | 194,Gregory,Hawkins,ghawkins2l@joomla.org 196 | 195,Lisa,Perkins,lperkins2m@si.edu 197 | 196,Jacqueline,Anderson,janderson2n@cargocollective.com 198 | 197,Shirley,Diaz,sdiaz2o@ucla.edu 199 | 198,Nicole,Meyer,nmeyer2p@flickr.com 200 | 199,Mary,Gray,mgray2q@constantcontact.com 201 | 200,Jean,Mcdonald,jmcdonald2r@baidu.com 202 | -------------------------------------------------------------------------------- /integration_tests/test.env.sample: -------------------------------------------------------------------------------- 1 | # gh secret set -f integration_tests/test.env -e ci_testing 2 | 3 | # redshift 4 | DBT_ENV_SECRET_REDSHIFT_PASS= 5 | # local testing only 6 | # REDSHIFT_SCHEMA= 7 | 8 | # snowflake 9 | SNOWFLAKE_ACCOUNT= 10 | DBT_ENV_SECRET_SNOWFLAKE_PASS= 11 | # local testing only 12 | # SNOWFLAKE_SCHEMA= 13 | 14 | # bigquery 15 | BIGQUERY_PROJECT= 16 | BIGQUERY_SCHEMA= 17 | BIGQUERY_KEYFILE_JSON= 18 | # local testing only 19 | # BIGQUERY_SCHEMA= 20 | 21 | # synapse 22 | DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET= 23 | # local testing only 24 | # SYNAPSE_SCHEMA= 25 | 26 | 27 | # NOT CURRENTLY USED 28 | # databricks 29 | DATABRICKS_TEST_HOST= 30 | DATBRICKS_TEST_ENDPOINT= 31 | DATABRICKS_TOKEN= 32 | 33 | 34 | # old 35 | AZURESQL_TEST_SERVER= 36 | AZURESQL_TEST_DBNAME= 37 | AZURESQL_TEST_USER= 38 | AZURESQL_TEST_PASS= 39 | -------------------------------------------------------------------------------- /integration_tests/vars.env.sample: -------------------------------------------------------------------------------- 1 | # gh variable set -f integration_tests/vars.env 2 | 3 | # redshift 4 | # NOTE: REDSHIFT_SPECTRUM_IAM_ROLE is currently hard-coded 5 | REDSHIFT_HOST= 6 | REDSHIFT_USER= 7 | REDSHIFT_DATABASE= 8 | REDSHIFT_PORT= 9 | 10 | # snowflake 11 | SNOWFLAKE_USER= 12 | SNOWFLAKE_ROLE= 13 | SNOWFLAKE_DATABASE= 14 | SNOWFLAKE_WAREHOUSE= 15 | 16 | # bigquery 17 | BIGQUERY_PROJECT= 18 | 19 | # synapse 20 | SYNAPSE_DRIVER= 21 | SYNAPSE_HOST= 22 | SYNAPSE_PORT= 23 | SYNAPSE_DATABASE= 24 | SYNAPSE_AUTHENTICATION= 25 | SYNAPSE_TENANT_ID= 26 | SYNAPSE_CLIENT_ID= -------------------------------------------------------------------------------- /macros/common/create_external_schema.sql: -------------------------------------------------------------------------------- 1 | {%- macro create_external_schema(source_node) -%} 2 | {{ adapter.dispatch('create_external_schema', 'dbt_external_tables')(source_node) }} 3 | {%- endmacro -%} 4 | 5 | {%- macro default__create_external_schema(source_node) -%} 6 | {%- set fqn -%} 7 | {%- if source_node.database -%} 8 | {{ source_node.database }}.{{ source_node.schema }} 9 | {%- else -%} 10 | {{ source_node.schema }} 11 | {%- endif -%} 12 | {%- endset -%} 13 | 14 | {%- set ddl -%} 15 | create schema if not exists {{ fqn }} 16 | {%- endset -%} 17 | 18 | {{ return(ddl) }} 19 | {%- endmacro -%} 20 | -------------------------------------------------------------------------------- /macros/common/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro create_external_table(source_node) %} 2 | {{ adapter.dispatch('create_external_table', 'dbt_external_tables')(source_node) }} 3 | {% endmacro %} 4 | 5 | {% macro default__create_external_table(source_node) %} 6 | {{ exceptions.raise_compiler_error("External table creation is not implemented for the default adapter") }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/common/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro get_external_build_plan(source_node) %} 2 | {{ return(adapter.dispatch('get_external_build_plan', 'dbt_external_tables')(source_node)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_external_build_plan(source_node) %} 6 | {{ exceptions.raise_compiler_error("Staging external sources is not implemented for the default adapter") }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/common/helpers/dropif.sql: -------------------------------------------------------------------------------- 1 | {% macro dropif(node) %} 2 | {{ adapter.dispatch('dropif', 'dbt_external_tables')(node) }} 3 | {% endmacro %} 4 | 5 | {% macro default__dropif() %} 6 | {{ exceptions.raise_compiler_error( 7 | "Dropping external tables is not implemented for the default adapter" 8 | ) }} 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/common/helpers/transaction.sql: -------------------------------------------------------------------------------- 1 | {% macro exit_transaction() %} 2 | {{ return(adapter.dispatch('exit_transaction', 'dbt_external_tables')()) }} 3 | {% endmacro %} 4 | 5 | {% macro default__exit_transaction() %} 6 | {{ return('') }} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/common/refresh_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro refresh_external_table(source_node) %} 2 | {{ return(adapter.dispatch('refresh_external_table', 'dbt_external_tables')(source_node)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__refresh_external_table(source_node) %} 6 | {% do return([]) %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/common/stage_external_sources.sql: -------------------------------------------------------------------------------- 1 | {% macro stage_external_sources(select=none) %} 2 | 3 | {% set sources_to_stage = [] %} 4 | 5 | {% set source_nodes = graph.sources.values() if graph.sources else [] %} 6 | 7 | {% for node in source_nodes %} 8 | {% if node.external %} 9 | 10 | {% if select %} 11 | 12 | {% for src in select.split(' ') %} 13 | 14 | {% if '.' in src %} 15 | {% set src_s = src.split('.') %} 16 | {% if src_s[0] == node.source_name and src_s[1] == node.name %} 17 | {% do sources_to_stage.append(node) %} 18 | {% endif %} 19 | {% else %} 20 | {% if src == node.source_name %} 21 | {% do sources_to_stage.append(node) %} 22 | {% endif %} 23 | {% endif %} 24 | 25 | {% endfor %} 26 | 27 | {% else %} 28 | 29 | {% do sources_to_stage.append(node) %} 30 | 31 | {% endif %} 32 | {% endif %} 33 | 34 | {% endfor %} 35 | 36 | {% if sources_to_stage|length == 0 %} 37 | {% do log('No external sources selected', info = true) %} 38 | {% endif %} 39 | 40 | {% for node in sources_to_stage %} 41 | 42 | {% set loop_label = loop.index ~ ' of ' ~ loop.length %} 43 | 44 | {% do log(loop_label ~ ' START external source ' ~ node.schema ~ '.' ~ node.identifier, info = true) -%} 45 | 46 | {% set run_queue = dbt_external_tables.get_external_build_plan(node) %} 47 | 48 | {% do log(loop_label ~ ' SKIP', info = true) if run_queue == [] %} 49 | {% set width = flags.PRINTER_WIDTH %} 50 | 51 | {% for q in run_queue %} 52 | 53 | {% set q_msg = q|replace('\n','')|replace('begin;','')|trim %} 54 | {% set q_log = q_msg[:width] ~ '... ' if q_msg|length > width else q_msg %} 55 | 56 | {% do log(loop_label ~ ' (' ~ loop.index ~ ') ' ~ q_log, info = true) %} 57 | {% set exit_txn = dbt_external_tables.exit_transaction() %} 58 | 59 | {% call statement('runner', fetch_result = True, auto_begin = False) %} 60 | {{ exit_txn }} {{ q }} 61 | {% endcall %} 62 | 63 | {% set runner = load_result('runner') %} 64 | {% set log_msg = runner['response'] if 'response' in runner.keys() else runner['status'] %} 65 | {% do log(loop_label ~ ' (' ~ loop.index ~ ') ' ~ log_msg, info = true) %} 66 | 67 | {% endfor %} 68 | 69 | {% set update_columns = dbt_external_tables.update_external_table_columns(node) %} 70 | {{ update_columns }} 71 | 72 | {% endfor %} 73 | 74 | {% endmacro %} 75 | -------------------------------------------------------------------------------- /macros/common/update_external_table_columns.sql: -------------------------------------------------------------------------------- 1 | {% macro update_external_table_columns(source_node) %} 2 | {{ return(adapter.dispatch('update_external_table_columns', 'dbt_external_tables')(source_node)) }} 3 | {% endmacro %} 4 | 5 | {% macro default__update_external_table_columns(source_node) %} 6 | 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /macros/plugins/bigquery/create_external_schema.sql: -------------------------------------------------------------------------------- 1 | {%- macro bigquery__create_external_schema(source_node) -%} 2 | {%- set fqn -%} 3 | {%- if source_node.database -%} 4 | `{{ source_node.database }}`.{{ source_node.schema }} 5 | {%- else -%} 6 | {{ source_node.schema }} 7 | {%- endif -%} 8 | {%- endset -%} 9 | 10 | {% set schema_exists_query %} 11 | select * from `{{ source_node.database }}`.INFORMATION_SCHEMA.SCHEMATA where schema_name = '{{ source_node.schema }}' limit 1 12 | {% endset %} 13 | {% if execute %} 14 | {% set schema_exists = run_query(schema_exists_query)|length > 0 %} 15 | {% else %} 16 | {% set schema_exists = false %} 17 | {% endif %} 18 | 19 | {%- if not schema_exists -%} 20 | {%- set ddl -%} 21 | create schema if not exists {{ fqn }} 22 | {%- endset -%} 23 | {{ return(ddl) }} 24 | {%- else -%} 25 | {{ return('') }} 26 | {% endif %} 27 | {%- endmacro -%} 28 | -------------------------------------------------------------------------------- /macros/plugins/bigquery/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__create_external_table(source_node) %} 2 | {%- set columns = source_node.columns.values() -%} 3 | {%- set external = source_node.external -%} 4 | {%- set partitions = external.partitions -%} 5 | {%- set options = external.options -%} 6 | {%- set non_string_options = ['max_staleness'] %} 7 | 8 | {% if options is mapping and options.get('connection_name', none) %} 9 | {% set connection_name = options.pop('connection_name') %} 10 | {% endif %} 11 | 12 | {%- set uris = [] -%} 13 | {%- if options is mapping and options.get('uris', none) -%} 14 | {%- set uris = external.options.get('uris') -%} 15 | {%- else -%} 16 | {%- set uris = [external.location] -%} 17 | {%- endif -%} 18 | 19 | create or replace external table {{source(source_node.source_name, source_node.name)}} 20 | {%- if columns -%}( 21 | {% for column in columns %} 22 | {%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %} 23 | {{column_quoted}} {{column.data_type}} {{- ',' if not loop.last -}} 24 | {%- endfor -%} 25 | ) 26 | {% endif %} 27 | {% if options and options.get('hive_partition_uri_prefix', none) %} 28 | with partition columns {%- if partitions %} ( 29 | {%- for partition in partitions %} 30 | {{partition.name}} {{partition.data_type}}{{',' if not loop.last}} 31 | {%- endfor -%} 32 | ) {% endif -%} 33 | {% endif %} 34 | {% if connection_name %} 35 | with connection `{{ connection_name }}` 36 | {% endif %} 37 | options ( 38 | uris = [{%- for uri in uris -%} '{{uri}}' {{- "," if not loop.last}} {%- endfor -%}] 39 | {%- if options is mapping -%} 40 | {%- for key, value in options.items() if key != 'uris' %} 41 | {%- if value is string and key not in non_string_options -%} 42 | , {{key}} = '{{value}}' 43 | {%- else -%} 44 | , {{key}} = {{value}} 45 | {%- endif -%} 46 | {%- endfor -%} 47 | {%- endif -%} 48 | ) 49 | {% endmacro %} 50 | -------------------------------------------------------------------------------- /macros/plugins/bigquery/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__get_external_build_plan(source_node) %} 2 | 3 | {% set build_plan = [] %} 4 | 5 | {% set old_relation = adapter.get_relation( 6 | database = source_node.database, 7 | schema = source_node.schema, 8 | identifier = source_node.identifier 9 | ) %} 10 | 11 | {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %} 12 | 13 | {% if create_or_replace %} 14 | {% if not dbt_external_tables.create_external_schema(source_node)|length %} 15 | {% set build_plan = build_plan + [ 16 | dbt_external_tables.create_external_table(source_node) 17 | ] %} 18 | {% else %} 19 | {% set build_plan = build_plan + [ 20 | dbt_external_tables.create_external_schema(source_node), 21 | dbt_external_tables.create_external_table(source_node) 22 | ] %} 23 | {% endif %} 24 | {% else %} 25 | {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} 26 | {% endif %} 27 | 28 | {% do return(build_plan) %} 29 | 30 | {% endmacro %} 31 | -------------------------------------------------------------------------------- /macros/plugins/bigquery/update_external_table_columns.sql: -------------------------------------------------------------------------------- 1 | {% macro bigquery__update_external_table_columns(source_node) %} 2 | {%- set columns = source_node.columns -%} 3 | {%- set relation = source(source_node.source_name, source_node.name) -%} 4 | {%- do adapter.update_columns(relation, columns) -%} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /macros/plugins/fabric/create_external_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro fabric__create_external_schema(source_node) %} 2 | {# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-schema-transact-sql?view=sql-server-ver16 #} 3 | 4 | {% set ddl %} 5 | IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '{{ source_node.schema }}') 6 | BEGIN 7 | EXEC('CREATE SCHEMA [{{ source_node.schema }}]') 8 | END 9 | {% endset %} 10 | 11 | {{return(ddl)}} 12 | 13 | {% endmacro %} 14 | -------------------------------------------------------------------------------- /macros/plugins/fabric/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro fabric__create_external_table(source_node) %} 2 | 3 | {%- set columns = source_node.columns.values() -%} 4 | {%- set external = source_node.external -%} 5 | 6 | {% if external.ansi_nulls is true -%} SET ANSI_NULLS ON; {%- endif %} 7 | {% if external.quoted_identifier is true -%} SET QUOTED_IDENTIFIER ON; {%- endif %} 8 | 9 | create external table {{source(source_node.source_name, source_node.name)}} ( 10 | {% for column in columns %} 11 | {# TODO set nullity based on schema tests?? #} 12 | {%- set nullity = 'NOT NULL' if 'not_null' in columns.tests else 'NULL'-%} 13 | {{adapter.quote(column.name)}} {{column.data_type}} {{nullity}} 14 | {{- ',' if not loop.last -}} 15 | {% endfor %} 16 | ) 17 | WITH ( 18 | {# remove keys that are None (i.e. not defined for a given source) #} 19 | {%- for key, value in external.items() if value is not none and key not in ['ansi_nulls', 'quoted_identifier'] -%} 20 | {{key}} = 21 | {%- if key in ["location", "schema_name", "object_name"] -%} 22 | '{{value}}' 23 | {% elif key in ["data_source","file_format"] -%} 24 | [{{value}}] 25 | {% else -%} 26 | {{value}} 27 | {%- endif -%} 28 | {{- ',' if not loop.last -}} 29 | {%- endfor -%} 30 | ) 31 | {% endmacro %} 32 | -------------------------------------------------------------------------------- /macros/plugins/fabric/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro fabric__get_external_build_plan(source_node) %} 2 | 3 | {% set build_plan = [] %} 4 | 5 | {% set old_relation = adapter.get_relation( 6 | database = source_node.database, 7 | schema = source_node.schema, 8 | identifier = source_node.identifier 9 | ) %} 10 | 11 | {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %} 12 | 13 | {% if create_or_replace %} 14 | {% set build_plan = build_plan + [ 15 | dbt_external_tables.create_external_schema(source_node), 16 | dbt_external_tables.dropif(source_node), 17 | dbt_external_tables.create_external_table(source_node) 18 | ] %} 19 | {% else %} 20 | {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} 21 | {% endif %} 22 | {% do return(build_plan) %} 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /macros/plugins/fabric/helpers/dropif.sql: -------------------------------------------------------------------------------- 1 | {% macro fabric__dropif(node) %} 2 | 3 | {% set ddl %} 4 | if object_id ('{{source(node.source_name, node.name)}}') is not null 5 | begin 6 | drop external table {{source(node.source_name, node.name)}} 7 | end 8 | {% endset %} 9 | 10 | {{return(ddl)}} 11 | 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /macros/plugins/redshift/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__create_external_table(source_node) %} 2 | 3 | {%- set columns = source_node.columns.values() -%} 4 | {%- set external = source_node.external -%} 5 | {%- set partitions = external.partitions -%} 6 | 7 | {# https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html #} 8 | {# This assumes you have already created an external schema #} 9 | 10 | create external table {{source(source_node.source_name, source_node.name)}} ( 11 | {% for column in columns %} 12 | {{adapter.quote(column.name)}} {{column.data_type}} 13 | {{- ',' if not loop.last -}} 14 | {% endfor %} 15 | ) 16 | {% if partitions -%} partitioned by ( 17 | {%- for partition in partitions -%} 18 | {{adapter.quote(partition.name)}} {{partition.data_type}}{{', ' if not loop.last}} 19 | {%- endfor -%} 20 | ) {%- endif %} 21 | {% if external.row_format -%} row format {{external.row_format}} {%- endif %} 22 | {% if external.file_format -%} stored as {{external.file_format}} {%- endif %} 23 | {% if external.location -%} location '{{external.location}}' {%- endif %} 24 | {% if external.table_properties -%} table properties {{external.table_properties}} {%- endif %} 25 | 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /macros/plugins/redshift/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__get_external_build_plan(source_node) %} 2 | 3 | {% set build_plan = [] %} 4 | 5 | {% set create_or_replace = (var('ext_full_refresh', false) or not dbt_external_tables.redshift_is_ext_tbl(source_node)) %} 6 | 7 | {% if create_or_replace %} 8 | 9 | {% set build_plan = [ 10 | dbt_external_tables.dropif(source_node), 11 | dbt_external_tables.create_external_table(source_node) 12 | ] + dbt_external_tables.refresh_external_table(source_node) 13 | %} 14 | 15 | {% else %} 16 | 17 | {% set build_plan = dbt_external_tables.refresh_external_table(source_node) %} 18 | 19 | {% endif %} 20 | 21 | {% do return(build_plan) %} 22 | 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/add_partitions.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | Generates a series of alter statements to add a batch of partitions to a table. 4 | Ideally it would require a single alter statement to add all partitions, but 5 | Amazon imposes a limit of 100 partitions per alter statement. Therefore we need 6 | to generate multiple altes when the number of partitions to add exceeds 100. 7 | 8 | Arguments: 9 | - source (string): The name of the table to generate the partitions for. 10 | - source_external_location (string): Base location of the external table. Paths 11 | in the 'partitions' argument are specified relative to this location. 12 | - partitions (list): A list of partitions to be added to the external table. 13 | Each partition is represented by a dictionary with the keys: 14 | - partition_by (list): A set of columns that the partition is affected by 15 | Each column is represented by a dictionary with the keys: 16 | - name: Name of the column 17 | - value: Value of the column 18 | - path (string): The path to be added as a partition for the particular 19 | combination of columns defined in the 'partition_by' 20 | #} 21 | {% macro redshift_alter_table_add_partitions(source_node, partitions) %} 22 | 23 | {{ log("Generating ADD PARTITION statement for partition set between " 24 | ~ partitions[0]['path'] ~ " and " ~ (partitions|last)['path']) }} 25 | 26 | {% set ddl = [] %} 27 | 28 | {% if partitions|length > 0 %} 29 | 30 | {% set alter_table_add %} 31 | alter table {{source(source_node.source_name, source_node.name)}} add if not exists 32 | {% endset %} 33 | 34 | {%- set alters -%} 35 | 36 | {{ alter_table_add }} 37 | 38 | {%- for partition in partitions -%} 39 | 40 | {%- if loop.index0 != 0 and loop.index0 % 100 == 0 -%} 41 | 42 | ; {{ alter_table_add }} 43 | 44 | {%- endif -%} 45 | 46 | partition ({%- for part in partition.partition_by -%}{{ part.name }}='{{ part.value }}'{{', ' if not loop.last}}{%- endfor -%}) 47 | location '{{ source_node.external.location }}/{{ partition.path }}/' 48 | 49 | {% endfor -%} 50 | 51 | {%- endset -%} 52 | 53 | {% set ddl = ddl + alters.split(';') %} 54 | 55 | {% else %} 56 | 57 | {{ log("No partitions to be added") }} 58 | 59 | {% endif %} 60 | 61 | {% do return(ddl) %} 62 | 63 | {% endmacro %} 64 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/dropif.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__dropif(node) %} 2 | 3 | {% set ddl %} 4 | drop table if exists {{source(node.source_name, node.name)}} cascade 5 | {% endset %} 6 | 7 | {{return(ddl)}} 8 | 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/is_ext_tbl.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift_is_ext_tbl(node) %} 2 | 3 | {% set existing_relation = load_relation(node) %} 4 | 5 | {# external tables don't appear in information_schema.tables, 6 | so dbt doesn't cache them #} 7 | {% if existing_relation is none %} 8 | 9 | {% set find_ext_tbl %} 10 | 11 | select count(*) from svv_external_tables 12 | where schemaname = '{{node.schema}}' 13 | and tablename = '{{node.identifier}}' 14 | 15 | {% endset %} 16 | 17 | {% if execute %} 18 | {% set result = run_query(find_ext_tbl)[0][0] %} 19 | {% else %} 20 | {% set result = 0 %} 21 | {% endif %} 22 | 23 | {% set is_ext_tbl = (result > 0) %} 24 | {% do return(is_ext_tbl) %} 25 | 26 | {% else %} 27 | 28 | {% do return(false) %} 29 | 30 | {% endif %} 31 | 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/paths.sql: -------------------------------------------------------------------------------- 1 | {% macro year_month_day(name, value) %} 2 | {% set path = value.replace('-','/') %} 3 | {{return(path)}} 4 | {% endmacro %} 5 | 6 | {% macro key_value(name, value) %} 7 | {% set path = name ~ '=' ~ value %} 8 | {{return(path)}} 9 | {% endmacro %} 10 | 11 | {% macro value_only(name, value) %} 12 | {% set path = value %} 13 | {{return(path)}} 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/render_macro.sql: -------------------------------------------------------------------------------- 1 | {% macro render_from_context(name) -%} 2 | {% set original_name = name %} 3 | {% if '.' in name %} 4 | {% set package_name, name = name.split(".", 1) %} 5 | {% else %} 6 | {% set package_name = none %} 7 | {% endif %} 8 | 9 | {% if package_name is none %} 10 | {% set package_context = context %} 11 | {% elif package_name in context %} 12 | {% set package_context = context[package_name] %} 13 | {% else %} 14 | {% set error_msg %} 15 | Could not find package '{{package_name}}', called by macro '{{original_name}}' 16 | {% endset %} 17 | {{ exceptions.raise_compiler_error(error_msg | trim) }} 18 | {% endif %} 19 | 20 | {{ return(package_context[name](*varargs, **kwargs)) }} 21 | 22 | {%- endmacro %} 23 | -------------------------------------------------------------------------------- /macros/plugins/redshift/helpers/transaction.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__exit_transaction() %} 2 | {{ return('begin; commit;') }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/plugins/redshift/refresh_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro redshift__refresh_external_table(source_node) %} 2 | 3 | {%- set partitions = source_node.external.get('partitions',[]) -%} 4 | 5 | {%- if partitions -%} 6 | 7 | {%- set part_len = partitions|length -%} 8 | 9 | {%- set get_partitions_sql -%} 10 | 11 | select * from 12 | 13 | {%- for partition in partitions %} ( 14 | 15 | {%- set part_num = loop.index -%} 16 | 17 | {%- if partition.vals.macro -%} 18 | {%- set vals = dbt_external_tables.render_from_context(partition.vals.macro, **partition.vals.args) -%} 19 | {%- elif partition.vals is string -%} 20 | {%- set vals = [partition.vals] -%} 21 | {%- else -%} 22 | {%- set vals = partition.vals -%} 23 | {%- endif -%} 24 | 25 | {%- for val in vals %} 26 | 27 | select 28 | '"{{ partition.name }}"' as name_{{ part_num }}, 29 | '"{{ val }}"' as val_{{ part_num }}, 30 | '"{{ dbt_external_tables.render_from_context(partition.path_macro, partition.name, val) }}"' as path_{{ part_num }} 31 | 32 | {{ 'union all' if not loop.last else ') ' }} 33 | 34 | {%- endfor -%} 35 | 36 | {{ 'cross join' if not loop.last }} 37 | 38 | {%- endfor -%} 39 | 40 | {%- endset -%} 41 | 42 | {%- set finals = [] -%} 43 | 44 | {%- if execute -%} 45 | {%- set results = run_query(get_partitions_sql) -%} 46 | {%- for row in results -%} 47 | 48 | {%- set partition_parts = [] -%} 49 | {%- set path_parts = [] -%} 50 | 51 | {%- for i in range(0, part_len) -%} 52 | {%- do partition_parts.append({ 53 | 'name': row[i * 3][1:-1], 54 | 'value': row[i * 3 + 1][1:-1] 55 | }) -%} 56 | {%- do path_parts.append(row[i * 3 + 2][1:-1]) -%} 57 | {%- endfor -%} 58 | 59 | {%- set construct = { 60 | 'partition_by': partition_parts, 61 | 'path': path_parts | join('/') 62 | } -%} 63 | 64 | {% do finals.append(construct) %} 65 | {%- endfor -%} 66 | {%- endif -%} 67 | 68 | {%- set ddl = dbt_external_tables.redshift_alter_table_add_partitions(source_node, finals) -%} 69 | {{ return(ddl) }} 70 | 71 | {% else %} 72 | 73 | {% do return([]) %} 74 | 75 | {% endif %} 76 | 77 | {% endmacro %} 78 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/create_external_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__create_external_schema(source_node) %} 2 | 3 | {% set schema_exists_query %} 4 | show terse schemas like '{{ source_node.schema }}' in database {{ source_node.database }} limit 1; 5 | {% endset %} 6 | {% if execute %} 7 | {% set schema_exists = run_query(schema_exists_query)|length > 0 %} 8 | {% else %} 9 | {% set schema_exists = false %} 10 | {% endif %} 11 | 12 | {% if schema_exists %} 13 | {% set ddl %} 14 | select 'Schema {{ source_node.schema }} exists' from dual; 15 | {% endset %} 16 | {% else %} 17 | {% set fqn %} 18 | {% if source_node.database %} 19 | {{ source_node.database }}.{{ source_node.schema }} 20 | {% else %} 21 | {{ source_node.schema }} 22 | {% endif %} 23 | {% endset %} 24 | 25 | {% set ddl %} 26 | create schema if not exists {{ fqn }}; 27 | {% endset %} 28 | {% endif %} 29 | 30 | {% do return(ddl) %} 31 | 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__create_external_table(source_node) %} 2 | 3 | {%- set columns = source_node.columns.values() -%} 4 | {%- set external = source_node.external -%} 5 | {%- set partitions = external.partitions -%} 6 | {%- set infer_schema = external.infer_schema -%} 7 | {%- set ignore_case = external.ignore_case or false -%} 8 | 9 | {% if infer_schema %} 10 | {% set query_infer_schema %} 11 | select * from table( infer_schema( location=>'{{external.location}}', file_format=>'{{external.file_format}}', ignore_case=> {{ ignore_case }}) ) 12 | {% endset %} 13 | {% if execute %} 14 | {% set columns_infer = run_query(query_infer_schema) %} 15 | {% endif %} 16 | {% endif %} 17 | 18 | {%- set is_csv = dbt_external_tables.is_csv(external.file_format) -%} 19 | 20 | {# https://docs.snowflake.net/manuals/sql-reference/sql/create-external-table.html #} 21 | {# This assumes you have already created an external stage #} 22 | 23 | {% set ddl %} 24 | create or replace external table {{source(source_node.source_name, source_node.name)}} 25 | {%- if columns or partitions or infer_schema -%} 26 | ( 27 | {%- if partitions -%}{%- for partition in partitions %} 28 | {{partition.name}} {{partition.data_type}} as {{partition.expression}}{{- ',' if not loop.last or columns|length > 0 or infer_schema -}} 29 | {%- endfor -%}{%- endif -%} 30 | {%- if not infer_schema -%} 31 | {%- for column in columns %} 32 | {%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %} 33 | {%- set column_alias -%} 34 | {%- if 'alias' in column and column.quote -%} 35 | {{adapter.quote(column.alias)}} 36 | {%- elif 'alias' in column -%} 37 | {{column.alias}} 38 | {%- else -%} 39 | {{column_quoted}} 40 | {%- endif -%} 41 | {%- endset %} 42 | {%- set col_expression -%} 43 | {%- if column.expression -%} 44 | {{column.expression}} 45 | {%- else -%} 46 | {%- if ignore_case -%} 47 | {%- set col_id = 'value:c' ~ loop.index if is_csv else 'GET_IGNORE_CASE($1, ' ~ "'"~ column_quoted ~"'"~ ')' -%} 48 | {%- else -%} 49 | {%- set col_id = 'value:c' ~ loop.index if is_csv else 'value:' ~ column_quoted -%} 50 | {%- endif -%} 51 | (case when is_null_value({{col_id}}) or lower({{col_id}}) = 'null' then null else {{col_id}} end) 52 | {%- endif -%} 53 | {%- endset %} 54 | {{column_alias}} {{column.data_type}} as ({{col_expression}}::{{column.data_type}}) 55 | {{- ',' if not loop.last -}} 56 | {% endfor %} 57 | {% else %} 58 | {%- for column in columns_infer %} 59 | {%- set col_expression -%} 60 | {%- if ignore_case -%} 61 | {%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column[0] ~"'"~ ')' -%} 62 | {%- else -%} 63 | {%- set col_id = 'value:' ~ column[0] -%} 64 | {%- endif -%} 65 | (case when is_null_value({{col_id}}) or lower({{col_id}}) = 'null' then null else {{col_id}} end) 66 | {%- endset %} 67 | {{column[0]}} {{column[1]}} as ({{col_expression}}::{{column[1]}}) 68 | {{- ',' if not loop.last -}} 69 | {% endfor %} 70 | {%- endif -%} 71 | ) 72 | {%- endif -%} 73 | {% if partitions %} partition by ({{partitions|map(attribute='name')|join(', ')}}) {% endif %} 74 | location = {{external.location}} {# stage #} 75 | {% if external.auto_refresh in (true, false) -%} 76 | auto_refresh = {{external.auto_refresh}} 77 | {%- endif %} 78 | {% if external.refresh_on_create in (true, false) -%} 79 | refresh_on_create = {{external.refresh_on_create}} 80 | {%- endif %} 81 | {% if external.aws_sns_topic -%} 82 | aws_sns_topic = '{{external.aws_sns_topic}}' 83 | {%- endif %} 84 | {% if external.table_format | lower == "delta" %} 85 | refresh_on_create = false 86 | {% endif %} 87 | {% if external.pattern -%} pattern = '{{external.pattern}}' {%- endif %} 88 | {% if external.integration -%} integration = '{{external.integration}}' {%- endif %} 89 | file_format = {{external.file_format}} 90 | {% if external.table_format -%} table_format = '{{external.table_format}}' {%- endif %} 91 | {% endset %} 92 | {# {{ log('ddl: ' ~ ddl, info=True) }} #} 93 | {{ ddl }}; 94 | {% endmacro %} 95 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__get_external_build_plan(source_node) %} 2 | 3 | {% set build_plan = [] %} 4 | 5 | {% set old_relation = adapter.get_relation( 6 | database = source_node.database, 7 | schema = source_node.schema, 8 | identifier = source_node.identifier 9 | ) %} 10 | 11 | {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %} 12 | 13 | {% elif source_node.external.get('snowpipe', none) is not none %} 14 | 15 | {% if create_or_replace %} 16 | {% set build_plan = build_plan + [ 17 | dbt_external_tables.create_external_schema(source_node), 18 | dbt_external_tables.snowflake_create_empty_table(source_node), 19 | dbt_external_tables.snowflake_get_copy_sql(source_node, explicit_transaction=true), 20 | dbt_external_tables.snowflake_create_snowpipe(source_node) 21 | ] %} 22 | {% else %} 23 | {% set build_plan = build_plan + dbt_external_tables.snowflake_refresh_snowpipe(source_node) %} 24 | {% endif %} 25 | 26 | {% else %} 27 | 28 | {% if create_or_replace %} 29 | {% set build_plan = build_plan + [ 30 | dbt_external_tables.create_external_schema(source_node), 31 | dbt_external_tables.create_external_table(source_node) 32 | ] %} 33 | {% else %} 34 | {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} 35 | {% endif %} 36 | 37 | {% endif %} 38 | 39 | {% do return(build_plan) %} 40 | 41 | {% endmacro %} 42 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/helpers/is_csv.sql: -------------------------------------------------------------------------------- 1 | {% macro is_csv(file_format) %} 2 | 3 | {# From https://docs.snowflake.net/manuals/sql-reference/sql/create-external-table.html: 4 | 5 | Important: The external table does not inherit the file format, if any, in the 6 | stage definition. You must explicitly specify any file format options for the 7 | external table using the FILE_FORMAT parameter. 8 | 9 | Note: FORMAT_NAME and TYPE are mutually exclusive; to avoid unintended behavior, 10 | you should only specify one or the other when creating an external table. 11 | 12 | #} 13 | 14 | {% set ff_ltrimmed = file_format|lower|replace(' ','') %} 15 | 16 | {% if 'type=' in ff_ltrimmed %} 17 | 18 | {% if 'type=csv' in ff_ltrimmed %} 19 | 20 | {{return(true)}} 21 | 22 | {% else %} 23 | 24 | {{return(false)}} 25 | 26 | {% endif %} 27 | 28 | {% else %} 29 | 30 | {% set ff_standardized = ff_ltrimmed 31 | | replace('(','') | replace(')','') 32 | | replace('format_name=','') %} 33 | {% set fqn = ff_standardized.split('.') %} 34 | 35 | {% if fqn | length == 3 %} 36 | {% set ff_database, ff_schema, ff_identifier = fqn[0], fqn[1], fqn[2] %} 37 | {% elif fqn | length == 2 %} 38 | {% set ff_database, ff_schema, ff_identifier = target.database, fqn[0], fqn[1] %} 39 | {% else %} 40 | {% set ff_database, ff_schema, ff_identifier = target.database, target.schema, fqn[0] %} 41 | {% endif %} 42 | 43 | {% call statement('get_file_format', fetch_result = True) %} 44 | show file formats in {{ff_database}}.{{ff_schema}} 45 | {% endcall %} 46 | 47 | {% set ffs = load_result('get_file_format').table %} 48 | 49 | {% for ff in ffs %} 50 | 51 | {% if ff['name']|lower == ff_identifier and ff['type']|lower == 'csv' %} 52 | 53 | {{return(true)}} 54 | 55 | {% endif %} 56 | 57 | {% endfor %} 58 | 59 | {{return(false)}} 60 | 61 | {% endif %} 62 | 63 | {% endmacro %} 64 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/refresh_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake__refresh_external_table(source_node) %} 2 | 3 | {% set external = source_node.external %} 4 | {% set snowpipe = source_node.external.get('snowpipe', none) %} 5 | 6 | {% set auto_refresh = external.get('auto_refresh', false) %} 7 | {% set partitions = external.get('partitions', none) %} 8 | {% set delta_format = (external.table_format | lower == "delta") %} 9 | 10 | {% set manual_refresh = not auto_refresh %} 11 | 12 | {% if manual_refresh %} 13 | 14 | {% set ddl %} 15 | begin; 16 | alter external table {{source(source_node.source_name, source_node.name)}} refresh; 17 | commit; 18 | {% endset %} 19 | 20 | {% do return([ddl]) %} 21 | 22 | {% else %} 23 | 24 | {% do return([]) %} 25 | 26 | {% endif %} 27 | 28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/snowpipe/create_empty_table.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake_create_empty_table(source_node) %} 2 | 3 | {%- set columns = source_node.columns.values() %} 4 | 5 | create or replace table {{source(source_node.source_name, source_node.name)}} ( 6 | {% if columns|length == 0 %} 7 | value variant, 8 | {% else -%} 9 | {%- for column in columns -%} 10 | {{column.name}} {{column.data_type}}, 11 | {% endfor -%} 12 | {% endif %} 13 | metadata_filename varchar, 14 | metadata_file_row_number bigint, 15 | metadata_file_last_modified timestamp, 16 | _dbt_copied_at timestamp 17 | ); 18 | 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/snowpipe/create_snowpipe.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake_create_snowpipe(source_node) %} 2 | 3 | {%- set external = source_node.external -%} 4 | {%- set snowpipe = external.snowpipe -%} 5 | 6 | {# https://docs.snowflake.com/en/sql-reference/sql/create-pipe.html #} 7 | create or replace pipe {{source(source_node.source_name, source_node.name)}} 8 | {% if snowpipe.auto_ingest -%} auto_ingest = {{snowpipe.auto_ingest}} {%- endif %} 9 | {% if snowpipe.aws_sns_topic -%} aws_sns_topic = '{{snowpipe.aws_sns_topic}}' {%- endif %} 10 | {% if snowpipe.integration -%} integration = '{{snowpipe.integration}}' {%- endif %} 11 | {% if snowpipe.error_integration -%} error_integration = '{{snowpipe.error_integration}}' {%- endif %} 12 | as {{ dbt_external_tables.snowflake_get_copy_sql(source_node) }} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/snowpipe/get_copy_sql.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake_get_copy_sql(source_node, explicit_transaction=false) %} 2 | {# This assumes you have already created an external stage #} 3 | 4 | {%- set columns = source_node.columns.values() -%} 5 | {%- set external = source_node.external -%} 6 | {%- set is_csv = dbt_external_tables.is_csv(external.file_format) %} 7 | {%- set copy_options = external.snowpipe.get('copy_options', none) -%} 8 | 9 | {%- if explicit_transaction -%} begin; {%- endif %} 10 | 11 | copy into {{source(source_node.source_name, source_node.name)}} 12 | from ( 13 | select 14 | {% if columns|length == 0 %} 15 | $1::variant as value, 16 | {% else -%} 17 | {%- for column in columns -%} 18 | {%- set col_expression -%} 19 | {%- if is_csv -%}nullif(${{loop.index}},''){# special case: get columns by ordinal position #} 20 | {%- else -%}nullif($1:{{column.name}},''){# standard behavior: get columns by name #} 21 | {%- endif -%} 22 | {%- endset -%} 23 | {{col_expression}}::{{column.data_type}} as {{column.name}}, 24 | {% endfor -%} 25 | {% endif %} 26 | metadata$filename::varchar as metadata_filename, 27 | metadata$file_row_number::bigint as metadata_file_row_number, 28 | metadata$file_last_modified::timestamp as metadata_file_last_modified, 29 | metadata$start_scan_time::timestamp as _dbt_copied_at 30 | from {{external.location}} {# stage #} 31 | ) 32 | file_format = {{external.file_format}} 33 | {% if external.pattern -%} pattern = '{{external.pattern}}' {%- endif %} 34 | {% if copy_options %} {{copy_options}} {% endif %}; 35 | 36 | {% if explicit_transaction -%} commit; {%- endif -%} 37 | 38 | {% endmacro %} 39 | -------------------------------------------------------------------------------- /macros/plugins/snowflake/snowpipe/refresh_snowpipe.sql: -------------------------------------------------------------------------------- 1 | {% macro snowflake_refresh_snowpipe(source_node) %} 2 | 3 | {% set snowpipe = source_node.external.snowpipe %} 4 | {% set auto_ingest = snowpipe.get('auto_ingest', false) if snowpipe is mapping %} 5 | 6 | {% if auto_ingest is true %} 7 | 8 | {% do return([]) %} 9 | 10 | {% else %} 11 | 12 | {% set ddl %} 13 | alter pipe {{source(source_node.source_name, source_node.name)}} refresh 14 | {% endset %} 15 | 16 | {{ return([ddl]) }} 17 | 18 | {% endif %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /macros/plugins/spark/create_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__create_external_table(source_node) %} 2 | 3 | {%- set columns = source_node.columns.values() -%} 4 | {%- set external = source_node.external -%} 5 | {%- set partitions = external.partitions -%} 6 | {%- set options = external.options -%} 7 | 8 | {%- set columns_and_partitions = columns | list -%} 9 | {%- if partitions -%} 10 | {%- for i in partitions -%} 11 | {%- if i.name not in columns_and_partitions | list | map(attribute='name') -%} 12 | {%- do columns_and_partitions.append(i) -%} 13 | {%- endif -%} 14 | {%- endfor -%} 15 | {%- endif -%} 16 | 17 | {# https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html #} 18 | create table {{source(source_node.source_name, source_node.name)}} 19 | {%- if columns | length > 0 %} ( 20 | {% for column in columns_and_partitions %} 21 | {{column.name}} {{column.data_type}} 22 | {{- ',' if not loop.last -}} 23 | {% endfor %} 24 | ) {% endif -%} 25 | {% if external.using %} using {{external.using}} {%- endif %} 26 | {% if options -%} options ( 27 | {%- for key, value in options.items() -%} 28 | '{{ key }}' = '{{value}}' {{- ', \n' if not loop.last -}} 29 | {%- endfor -%} 30 | ) {%- endif %} 31 | {% if partitions -%} partitioned by ( 32 | {%- for partition in partitions -%} 33 | {{partition.name}}{{', ' if not loop.last}} 34 | {%- endfor -%} 35 | ) {%- endif %} 36 | {% if external.row_format -%} row format {{external.row_format}} {%- endif %} 37 | {% if external.file_format -%} stored as {{external.file_format}} {%- endif %} 38 | {% if external.location -%} location '{{external.location}}' {%- endif %} 39 | {% if external.table_properties -%} tblproperties {{ external.table_properties }} {%- endif -%} 40 | 41 | {% endmacro %} 42 | -------------------------------------------------------------------------------- /macros/plugins/spark/get_external_build_plan.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__get_external_build_plan(source_node) %} 2 | 3 | {% set build_plan = [] %} 4 | 5 | {% set old_relation = adapter.get_relation( 6 | database = none, 7 | schema = source_node.schema, 8 | identifier = source_node.identifier 9 | ) %} 10 | 11 | {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %} 12 | 13 | {% if create_or_replace %} 14 | {% set build_plan = build_plan + [ 15 | dbt_external_tables.create_external_schema(source_node), 16 | dbt_external_tables.dropif(source_node), 17 | dbt_external_tables.create_external_table(source_node) 18 | ] %} 19 | {% else %} 20 | {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %} 21 | {% endif %} 22 | 23 | {% set recover_partitions = dbt_external_tables.recover_partitions(source_node) %} 24 | {% if recover_partitions %} 25 | {% set build_plan = build_plan + [ 26 | recover_partitions 27 | ] %} 28 | {% endif %} 29 | 30 | {% do return(build_plan) %} 31 | 32 | {% endmacro %} 33 | -------------------------------------------------------------------------------- /macros/plugins/spark/helpers/dropif.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__dropif(node) %} 2 | 3 | {% set ddl %} 4 | drop table if exists {{source(node.source_name, node.name)}} 5 | {% endset %} 6 | 7 | {{return(ddl)}} 8 | 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /macros/plugins/spark/helpers/recover_partitions.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__recover_partitions(source_node) %} 2 | {# https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-table.html #} 3 | 4 | {%- if source_node.external.partitions and source_node.external.using and source_node.external.using|lower != 'delta' -%} 5 | {% set ddl %} 6 | ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS 7 | {% endset %} 8 | {%- else -%} 9 | {% set ddl = none %} 10 | {%- endif -%} 11 | 12 | {{return(ddl)}} 13 | 14 | {% endmacro %} 15 | 16 | {% macro recover_partitions(source_node) %} 17 | {{ return(adapter.dispatch('recover_partitions', 'dbt_external_tables')(source_node)) }} 18 | {% endmacro %} 19 | 20 | {% macro default__recover_partitions(source_node) %} 21 | /*{# 22 | We're dispatching this macro so that users can override it if required on other adapters 23 | but this will work for spark/databricks. 24 | #}*/ 25 | 26 | {{ exceptions.raise_not_implemented('recover_partitions macro not implemented for adapter ' + adapter.type()) }} 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /macros/plugins/spark/refresh_external_table.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__refresh_external_table(source_node) %} 2 | 3 | {% set refresh %} 4 | refresh table {{source(source_node.source_name, source_node.name)}} 5 | {% endset %} 6 | 7 | {% do return([refresh]) %} 8 | 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "dbt-external-tables" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "dbt-bigquery>=1.9.0", 9 | "dbt-core>=1.9.1", 10 | "dbt-redshift>=1.9.0", 11 | "dbt-snowflake>=1.9.0", 12 | "dbt-synapse>=1.8.2", 13 | "tox>=4.23.2", 14 | ] 15 | -------------------------------------------------------------------------------- /run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Setting up virtual environment for dbt-$1" 4 | VENV="venv/bin/activate" 5 | 6 | if [[ ! -f $VENV ]]; then 7 | python3.8 -m venv venv 8 | . $VENV 9 | pip install --upgrade pip setuptools 10 | if [ $1 == 'databricks' ] 11 | then 12 | echo "Installing dbt-spark" 13 | pip install dbt-spark[ODBC] --upgrade --pre 14 | elif [ $1 == 'azuresql' ] 15 | then 16 | echo "Installing dbt-sqlserver" 17 | pip install dbt-sqlserver --upgrade --pre 18 | else 19 | echo "Installing dbt-$1" 20 | pip install dbt-$1 --upgrade --pre 21 | # remove the protobuf installation when all the dbt-provider packaged are updated with dbt core 1.7.9 22 | pip install protobuf==4.25.3 23 | fi 24 | fi 25 | 26 | . $VENV 27 | echo "Changing working directory: integration_tests" 28 | cd integration_tests 29 | 30 | if [[ ! -e ~/.dbt/profiles.yml ]]; then 31 | echo "Copying sample profile" 32 | mkdir -p ~/.dbt 33 | cp ci/sample.profiles.yml ~/.dbt/profiles.yml 34 | fi 35 | 36 | echo "Starting integration tests" 37 | set -eo pipefail 38 | dbt deps --target $1 39 | dbt seed --full-refresh --target $1 40 | dbt run-operation prep_external --target $1 41 | dbt run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target $1 42 | dbt run-operation dbt_external_tables.stage_external_sources --target $1 43 | dbt test --target $1 44 | -------------------------------------------------------------------------------- /sample_analysis/external_sources_dry_run.sql: -------------------------------------------------------------------------------- 1 | {%- for node in graph.nodes.values() -%} 2 | 3 | {%- if node.resource_type == 'source' and node.external.location != none -%} 4 | 5 | {{ 'Staging external source ' ~ node.schema ~ '.' ~ node.identifier }} 6 | 7 | {%- set run_queue = dbt_external_tables.get_external_build_plan(node) -%} 8 | 9 | {%- for q in run_queue %} 10 | {{ q }} 11 | 12 | ---------- 13 | 14 | {% endfor -%} 15 | 16 | {%- endif %} 17 | 18 | {%- endfor -%} 19 | -------------------------------------------------------------------------------- /sample_sources/bigquery.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: snowplow 5 | database: analytics 6 | loader: gcloud storage 7 | 8 | tables: 9 | - name: event 10 | description: "External table of Snowplow events, stored as CSV files in Cloud Storage" 11 | external: 12 | location: 'gs://bucket/path/*' 13 | options: 14 | format: csv 15 | skip_leading_rows: 1 16 | 17 | # if you want a partitioned table, file paths MUST be Hive-style: 18 | # 'gs://bucket/path/collector_hour=2020-01-01/' 19 | # 'gs://bucket/path/collector_hour=2020-01-02/' (etc) 20 | hive_partition_uri_prefix: 'gs://bucket/path/' 21 | partitions: 22 | - name: collector_date 23 | data_type: date 24 | 25 | columns: 26 | - name: app_id 27 | data_type: varchar(255) 28 | description: "Application ID" 29 | - name: domain_sessionidx 30 | data_type: int 31 | description: "A visit / session index" 32 | - name: etl_tstamp 33 | data_type: timestamp 34 | description: "Timestamp event began ETL" 35 | - name: contexts 36 | data_type: variant 37 | description: "Contexts attached to event by Tracker" 38 | 39 | # alternatively, BigQuery can infer your schema (columns + partitions) 40 | - name: event_inferred 41 | external: 42 | location: 'gs://bucket/path/*' 43 | options: 44 | format: csv 45 | skip_leading_rows: 1 46 | hive_partition_uri_prefix: 'gs://bucket/path/' 47 | 48 | # optionally, BigQuery can pull data from multiple GCS paths, instead of just one 49 | - name: event_multiple_paths 50 | external: 51 | location: this is still a required property, but it will be ignored 52 | options: 53 | format: csv 54 | skip_leading_rows: 1 55 | 56 | # list all file paths with relevant source data 57 | uris: 58 | - 'gs://bucket_a/path/*' 59 | - 'gs://bucket_b/path/*' 60 | - 'gs://bucket_c/more/specific/path/file.csv' 61 | -------------------------------------------------------------------------------- /sample_sources/redshift.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: snowplow 5 | database: analytics 6 | schema: snowplow_external 7 | loader: S3 8 | loaded_at_field: collector_tstamp 9 | 10 | tables: 11 | - name: event 12 | external: 13 | location: "s3://bucket/path" # Amazon S3 path prefix 14 | row_format: > # Hive specification 15 | serde 'org.openx.data.jsonserde.JsonSerDe' 16 | with serdeproperties ( 17 | 'strip.outer.array'='false' 18 | ) 19 | partitions: 20 | - name: appId 21 | data_type: varchar(255) 22 | vals: # list of values 23 | - dev 24 | - prod 25 | path_macro: dbt_external_tables.key_value 26 | # Macro to convert partition value to file path specification. 27 | # This "helper" macro is defined in the package, but you can use 28 | # any custom macro that takes keyword arguments 'name' + 'value' 29 | # and returns the path as a string 30 | # If multiple partitions, order matters for compiling S3 path 31 | - name: collector_date 32 | data_type: date 33 | vals: # macro w/ keyword args to generate list of values 34 | macro: dbt.dates_in_range 35 | args: 36 | start_date_str: '2019-08-01' 37 | end_date_str: '{{modules.datetime.date.today().strftime("%Y-%m-%d")}}' 38 | in_fmt: "%Y-%m-%d" 39 | out_fmt: "%Y-%m-%d" 40 | path_macro: dbt_external_tables.year_month_day 41 | 42 | # specify ALL columns to extract, unnest, or otherwise parse from the source files. 43 | # all Redshift external tables natively include `$path` and `$size` pseudocolumns, 44 | # so there is no need to specify those here. 45 | columns: 46 | - name: app_id 47 | data_type: varchar(255) 48 | description: "Application ID" 49 | - name: domain_sessionidx 50 | data_type: int 51 | description: "A visit / session index" 52 | 53 | # Spectrum timestamp columns *must* be in the format `yyyy-MM-dd HH:mm:ss.SSSSSS` 54 | # (e.g. '2017-05-01 11:30:59.000000'). Otherwise, load as varchar and 55 | # parse/cast in a staging model. 56 | - name: etl_tstamp 57 | data_type: varchar(32) 58 | description: "Timestamp event began ETL" 59 | 60 | # Spectrum columns with nested values require Hive-style specifications. 61 | # I usually give up, make them big varchars, and parse in a staging model. 62 | - name: contexts 63 | data_type: varchar(65000) 64 | description: "Contexts attached to event by Tracker" 65 | -------------------------------------------------------------------------------- /sample_sources/snowflake.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: snowplow 5 | database: analytics 6 | schema: snowplow_external 7 | loader: S3 8 | loaded_at_field: collector_hour 9 | 10 | tables: 11 | - name: event_ext_tbl 12 | description: "External table of Snowplow events stored as JSON files" 13 | external: 14 | location: "@raw.snowplow.snowplow" # reference an existing external stage 15 | file_format: "( type = json )" # fully specified here, or reference an existing file format 16 | auto_refresh: true # requires configuring an event notification from Amazon S3 or Azure 17 | refresh_on_create: false # default is true, useful when refresh after table creation needs to be skipped 18 | partitions: 19 | - name: collector_hour 20 | data_type: timestamp 21 | expression: to_timestamp(substr(metadata$filename, 8, 13), 'YYYY/MM/DD/HH24') 22 | 23 | # all Snowflake external tables natively include a `metadata$filename` pseudocolumn 24 | # and a `value` column (JSON blob-ified version of file contents), so there is no need to specify 25 | # them here. you may optionally specify columns to unnest or parse from the file: 26 | columns: 27 | - name: app_id 28 | data_type: varchar(255) 29 | description: "Application ID" 30 | - name: domain_sessionidx 31 | data_type: int 32 | description: "A visit / session index" 33 | - name: etl_tstamp 34 | data_type: timestamp 35 | description: "Timestamp event began ETL" 36 | - name: etl timestamp 37 | # Use double-quoted identifiers for name and identifier 38 | quote: true 39 | # Specifying alias lets us rename etl timestamp to "etl_timestamp" 40 | alias: etl_timestamp 41 | data_type: timestamp 42 | description: "Timestamp event began ETL with a double quoted identifier" 43 | - name: etl_date 44 | data_type: date 45 | description: "Date event began ETL" 46 | # Expressions can manipulate the variant value prior to casting to data_type. 47 | expression: TRY_TO_DATE(VALUE:etl_tstamp::VARCHAR, 'YYYYMMDD') 48 | - name: contexts 49 | data_type: variant 50 | description: "Contexts attached to event by Tracker" 51 | 52 | 53 | - name: event_snowpipe 54 | description: "Table of Snowplow events, stored as JSON files, loaded in near-real time via Snowpipe" 55 | loader: S3 + snowpipe # this is just for your reference 56 | external: 57 | location: "@raw.snowplow.snowplow" 58 | file_format: "{{ target.schema }}.my_json_file_format" 59 | pattern: ".*[.]json" # Optional object key pattern 60 | 61 | # Instead of an external tables, create an empty table, backfill it, and pipe new data 62 | snowpipe: 63 | auto_ingest: true # requires either `aws_sns_topic` or `integration` 64 | aws_sns_topic: # Amazon S3 65 | integration: # Google Cloud or Azure 66 | copy_options: "on_error = continue, enforce_length = false" # e.g. 67 | 68 | # dbt will include three metadata columns in addition to any `columns` 69 | # specified for a snowpiped table: 70 | # `metadata_filename`: the file from which this row was loaded 71 | # `metadata_file_row_number`: the numbered row this was in that file 72 | # `_dbt_copied_at`: the current_timestamp when this row was loaded (backfilled or piped) 73 | # 74 | # if you do not specify *any* columns for a snowpiped table, dbt will also 75 | # include `value`, the JSON blob of all file contents. 76 | 77 | - name: delta_tbl 78 | description: "External table using Delta files" 79 | external: 80 | location: "@stage" # reference an existing external stage 81 | file_format: "( type = parquet )" # fully specified here, or reference an existing file format 82 | table_format: delta # specify the table format 83 | auto_refresh: false # requires configuring an event notification from Amazon S3 or Azure 84 | 85 | 86 | - name: parquet_with_inferred_schema 87 | description: "External table using Parquet and inferring the schema" 88 | external: 89 | location: "@stage" # reference an existing external stage 90 | file_format: "my_file_format" # we need a named file format for infer to work 91 | infer_schema: true # parameter to tell Snowflake we want to infer the table schema 92 | partitions: 93 | - name: section # we can define partitions on top of the schema columns 94 | data_type: varchar(64) 95 | expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" 96 | columns: # columns can still be listed for documentation/testing purpose 97 | - name: id 98 | description: this is an id 99 | - name: name 100 | description: and this is a name 101 | 102 | - name: aws_sns_refresh_tbl 103 | description: "External table using AWS SNS for auto-refresh" 104 | external: 105 | location: "@stage" # reference an existing external stage 106 | file_format: "( type = csv )" 107 | # auto_refresh is assumed, setting to false is not supported 108 | aws_sns_topic: "arn:aws:sns:us-east-1:123456789012:my_topic" # SNS topic ARN 109 | -------------------------------------------------------------------------------- /sample_sources/spark.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: snowplow 5 | tables: 6 | - name: event 7 | description: "Snowplow events stored as CSV files in HDFS" 8 | external: 9 | location: 'hdfs://.../event.csv' # hdfs://, s3://, azure://, dbfs://, ... 10 | using: csv # file type: csv, json, parquet, delta, ... 11 | options: # as needed 12 | sep: '|' 13 | header: 'true' 14 | timestampFormat: 'yyyy-MM-dd HH:mm' 15 | partitions: 16 | - name: year 17 | data_type: int 18 | - name: month 19 | data_type: int 20 | - name: day 21 | data_type: int 22 | 23 | columns: 24 | - name: app_id 25 | data_type: string 26 | description: "Application ID" 27 | - name: domain_sessionidx 28 | data_type: int 29 | description: "A visit / session index" 30 | - name: etl_tstamp 31 | data_type: timestamp 32 | description: "Timestamp event began ETL" 33 | 34 | # depending on the complexity of nested columns, it may be preferable to 35 | # register them as strings here and parse in a model: 36 | # `from_json(contexts, 'schema string, data array>'`` 37 | - name: contexts 38 | data_type: string 39 | description: "Contexts attached to event by Tracker" 40 | -------------------------------------------------------------------------------- /sample_sources/synapse.yml: -------------------------------------------------------------------------------- 1 | # Creates query given below 2 | 3 | version: 2 4 | 5 | sources: 6 | - name: marketo 7 | schema: source_marketo 8 | loader: ADLSblob 9 | tables: 10 | - name: lead_activities 11 | description: | 12 | from raw DW. 13 | external: 14 | # Delimited Files in Blob/Lake 15 | # External Data Source name (created prior) 16 | data_source: SynapseContainer # made with TYPE= 'HADOOP' 17 | location: /marketing/Marketo/LeadActivities/ # path on above data source 18 | # External File Format name (created prior) 19 | file_format: CommaDelimited 20 | reject_type: VALUE 21 | reject_value: 0 22 | ansi_nulls: true 23 | quoted_identifier: true 24 | 25 | # Cross database query (i.e. RDBMS) Azure SQL ONLY 26 | data_source: AEDW # made with TYPE= 'RDBMS' 27 | schema_name: Business 28 | object_name: LeadActivities 29 | 30 | columns: 31 | - name: id 32 | description: unique Activity ID 33 | data_type: int 34 | - name: leadId 35 | description: Lead ID 36 | data_type: int 37 | - name: activityDate 38 | description: date of activity 39 | data_type: varchar(255) 40 | - name: activityTypeId 41 | description: unique identifier for type of activity 42 | data_type: int 43 | - name: campaignId 44 | description: Campaign under which activity took place 45 | data_type: int 46 | - name: primaryAttributeValueId 47 | description: the main attribute for given activity type 48 | data_type: int 49 | - name: primaryAttributeValue 50 | description: what value was taken 51 | data_type: varchar(255) 52 | 53 | # SET ANSI_NULLS ON; 54 | # SET QUOTED_IDENTIFIER ON; 55 | 56 | # CREATE EXTERNAL TABLE [source].[lead_activities] 57 | # ( 58 | # [id] [int] NOT NULL, 59 | # [leadId] [int] NOT NULL, 60 | # [activityDate] [varchar](255) NOT NULL, 61 | # [activityTypeId] [int] NOT NULL, 62 | # [campaignId] [int] NOT NULL, 63 | # [primaryAttributeValueId] [int] NOT NULL, 64 | # [primaryAttributeValue] [varchar](255) NOT NULL 65 | # ) 66 | # WITH (DATA_SOURCE = [SynapseContainer], LOCATION = N'/marketing/Marketo/LeadActivities/LeadActivities.csv', FILE_FORMAT = [CommaDelimited], REJECT_TYPE = VALUE, REJECT_VALUE = 0 ); 67 | -------------------------------------------------------------------------------- /supported_adapters.env: -------------------------------------------------------------------------------- 1 | SUPPORTED_ADAPTERS=snowflake,redshift,bigquery 2 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = lint_all, testenv 4 | 5 | [testenv] 6 | passenv = 7 | # redshift 8 | REDSHIFT_HOST 9 | REDSHIFT_USER 10 | DBT_ENV_SECRET_REDSHIFT_PASS 11 | REDSHIFT_DATABASE 12 | REDSHIFT_SCHEMA 13 | REDSHIFT_PORT 14 | REDSHIFT_SPECTRUM_IAM_ROLE 15 | # snowflake 16 | SNOWFLAKE_ACCOUNT 17 | SNOWFLAKE_USER 18 | DBT_ENV_SECRET_SNOWFLAKE_PASS 19 | SNOWFLAKE_ROLE 20 | SNOWFLAKE_DATABASE 21 | SNOWFLAKE_WAREHOUSE 22 | SNOWFLAKE_SCHEMA 23 | # bigquery 24 | BIGQUERY_KEYFILE_JSON 25 | BIGQUERY_PROJECT 26 | BIGQUERY_SCHEMA 27 | # synapse 28 | SYNAPSE_DRIVER 29 | SYNAPSE_HOST 30 | SYNAPSE_PORT 31 | SYNAPSE_DATABASE 32 | SYNAPSE_AUTHENTICATION 33 | SYNAPSE_TENANT_ID 34 | SYNAPSE_CLIENT_ID 35 | DBT_ENV_SECRET_SYNAPSE_CLIENT_SECRET 36 | 37 | # run dbt commands directly, assumes dbt is already installed in environment 38 | [testenv:dbt_integration_redshift] 39 | changedir = integration_tests 40 | allowlist_externals = 41 | dbt 42 | skip_install = true 43 | commands = 44 | dbt deps --target redshift 45 | dbt seed --full-refresh --target redshift 46 | dbt run --target redshift 47 | dbt run-operation prep_external --target redshift 48 | dbt run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target redshift 49 | dbt run-operation dbt_external_tables.stage_external_sources --target redshift 50 | dbt test --target redshift 51 | 52 | # run dbt commands directly, assumes dbt is already installed in environment 53 | [testenv:dbt_integration_snowflake] 54 | changedir = integration_tests 55 | allowlist_externals = 56 | dbt 57 | skip_install = true 58 | commands = 59 | dbt deps --target snowflake 60 | dbt seed --full-refresh --target snowflake 61 | dbt run --target snowflake 62 | dbt run-operation prep_external --target snowflake 63 | dbt run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target snowflake 64 | dbt run-operation dbt_external_tables.stage_external_sources --target snowflake 65 | dbt test --target snowflake 66 | 67 | # run dbt commands directly, assumes dbt is already installed in environment 68 | [testenv:dbt_integration_bigquery] 69 | changedir = integration_tests 70 | allowlist_externals = 71 | dbt 72 | skip_install = true 73 | commands = 74 | dbt deps --target bigquery 75 | dbt seed --full-refresh --target bigquery 76 | dbt run --target bigquery 77 | dbt run-operation prep_external --target bigquery 78 | dbt run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target bigquery 79 | dbt run-operation dbt_external_tables.stage_external_sources --target bigquery 80 | dbt test --target bigquery 81 | 82 | # run dbt commands directly, assumes dbt is already installed in environment 83 | # temporarily removed from CI testing until we can get the cluster hooked up to the blob correctly 84 | [testenv:dbt_integration_synapse] 85 | changedir = integration_tests 86 | allowlist_externals = 87 | dbt 88 | skip_install = true 89 | commands = 90 | dbt deps --target synapse 91 | dbt seed --full-refresh --target synapse 92 | dbt run --target synapse 93 | dbt run-operation prep_external --target synapse 94 | dbt run-operation dbt_external_tables.stage_external_sources --vars 'ext_full_refresh: true' --target synapse 95 | dbt run-operation dbt_external_tables.stage_external_sources --target synapse 96 | dbt test --target synapse --------------------------------------------------------------------------------