├── .circleci
    └── config.yml
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── pull_request_template.md
    └── workflows
    │   ├── ci.yml
    │   ├── stale.yml
    │   └── triage-labels.yml
├── .gitignore
├── .vscode
    └── settings.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── RELEASE.md
├── dbt_project.yml
├── integration_tests
    ├── .env
    │   └── postgres.env
    ├── analyses
    │   ├── compare_column_values_smoke_test.sql
    │   └── compare_relation_columns_smoke_test.sql
    ├── dbt_project.yml
    ├── macros
    │   └── unit_tests
    │   │   └── struct_generation_macros.sql
    ├── models
    │   ├── data_tests
    │   │   ├── compare_all_columns_concat_pk_with_summary.sql
    │   │   ├── compare_all_columns_concat_pk_without_summary.sql
    │   │   ├── compare_all_columns_where_clause.sql
    │   │   ├── compare_all_columns_with_summary.sql
    │   │   ├── compare_all_columns_with_summary_and_exclude.sql
    │   │   ├── compare_all_columns_without_summary.sql
    │   │   ├── compare_and_classify_query_results.sql
    │   │   ├── compare_queries.sql
    │   │   ├── compare_queries_concat_pk_without_summary.sql
    │   │   ├── compare_queries_with_summary.sql
    │   │   ├── compare_queries_without_summary.sql
    │   │   ├── compare_relation_columns.sql
    │   │   ├── compare_relations_concat_pk_without_summary.sql
    │   │   ├── compare_relations_with_exclude.sql
    │   │   ├── compare_relations_with_summary.sql
    │   │   ├── compare_relations_without_exclude.sql
    │   │   ├── compare_relations_without_summary.sql
    │   │   ├── compare_row_counts.sql
    │   │   ├── compare_which_columns_differ.sql
    │   │   ├── compare_which_columns_differ_exclude_cols.sql
    │   │   └── schema.yml
    │   ├── unit_test_placeholder_models
    │   │   ├── unit_test_model_a.sql
    │   │   ├── unit_test_model_b.sql
    │   │   ├── unit_test_struct_model_a.sql
    │   │   └── unit_test_struct_model_b.sql
    │   └── unit_test_wrappers
    │   │   ├── unit_compare_classify.sql
    │   │   ├── unit_compare_classify.yml
    │   │   ├── unit_compare_classify_struct.sql
    │   │   ├── unit_compare_classify_struct.yml
    │   │   ├── unit_compare_queries.sql
    │   │   ├── unit_compare_queries.yml
    │   │   ├── unit_compare_which_query_columns_differ.sql
    │   │   ├── unit_compare_which_query_columns_differ.yml
    │   │   ├── unit_ensure_all_pks_are_in_column_set.sql
    │   │   ├── unit_ensure_all_pks_are_in_column_set.yml
    │   │   ├── unit_quick_are_queries_identical.sql
    │   │   └── unit_quick_are_queries_identical.yml
    ├── package-lock.yml
    ├── packages.yml
    ├── profiles.yml
    ├── seeds
    │   ├── data_compare_all_columns__albertsons_produce.csv
    │   ├── data_compare_all_columns__albertsons_produce__concat_pk.csv
    │   ├── data_compare_all_columns__market_of_choice_produce.csv
    │   ├── data_compare_all_columns__market_of_choice_produce__concat_pk.csv
    │   ├── data_compare_relation_columns_a.csv
    │   ├── data_compare_relation_columns_b.csv
    │   ├── data_compare_relations__a_relation.csv
    │   ├── data_compare_relations__b_relation.csv
    │   ├── data_compare_which_columns_differ_a.csv
    │   ├── data_compare_which_columns_differ_b.csv
    │   ├── expected_results__compare_all_columns_concat_pk_with_summary.csv
    │   ├── expected_results__compare_all_columns_concat_pk_without_summary.csv
    │   ├── expected_results__compare_all_columns_where_clause.csv
    │   ├── expected_results__compare_all_columns_with_summary.csv
    │   ├── expected_results__compare_all_columns_with_summary_and_exclude.csv
    │   ├── expected_results__compare_all_columns_without_summary.csv
    │   ├── expected_results__compare_relation_columns.csv
    │   ├── expected_results__compare_relations_with_exclude.csv
    │   ├── expected_results__compare_relations_without_exclude.csv
    │   ├── expected_results__compare_row_counts.csv
    │   ├── expected_results__compare_which_columns_differ.csv
    │   ├── expected_results__compare_which_columns_differ_exclude_cols.csv
    │   ├── expected_results__compare_with_summary.csv
    │   └── expected_results__compare_without_summary.csv
    └── tests
    │   └── fixtures
    │       ├── complex_struct.sql
    │       ├── complex_struct_different_order.sql
    │       ├── complex_struct_different_value.sql
    │       ├── simple_struct.sql
    │       ├── simple_struct_different_order.sql
    │       └── simple_struct_removed_key.sql
├── macros
    ├── compare_all_columns.sql
    ├── compare_and_classify_query_results.sql
    ├── compare_and_classify_relation_rows.sql
    ├── compare_column_values.sql
    ├── compare_column_values_verbose.sql
    ├── compare_queries.sql
    ├── compare_relation_columns.sql
    ├── compare_relations.sql
    ├── compare_row_counts.sql
    ├── compare_which_query_columns_differ.sql
    ├── compare_which_relation_columns_differ.sql
    ├── quick_are_queries_identical.sql
    ├── quick_are_relations_identical.sql
    └── utils
    │   ├── _classify_audit_row_status.sql
    │   ├── _count_num_rows_in_status.sql
    │   ├── _ensure_all_pks_are_in_column_set.sql
    │   ├── _generate_null_safe_sk.sql
    │   ├── _generate_set_results.sql
    │   ├── _get_comparison_bounds.sql
    │   └── _get_intersecting_columns_from_relations.sql
├── packages.yml
├── supported_adapters.env
└── tox.ini


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | 
  2 | version: 2
  3 | 
  4 | jobs:
  5 |   build:
  6 |     docker:
  7 |       - image: cimg/python:3.9.9
  8 |       - image: cimg/postgres:14.0
  9 |         auth:
 10 |           username: dbt-labs
 11 |           password: ''
 12 |         environment:
 13 |           POSTGRES_HOST: localhost
 14 |           POSTGRES_USER: root
 15 |           POSTGRES_PORT: 5432
 16 |           POSTGRES_DATABASE: circle_test
 17 |           POSTGRES_SCHEMA: dbt_utils_integration_tests_postgres
 18 |           DBT_ENV_SECRET_POSTGRES_PASS: ''
 19 | 
 20 |     steps:
 21 |       - checkout
 22 | 
 23 |       - run:
 24 |           name: setup_creds
 25 |           command: |
 26 |             echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json
 27 | 
 28 |       - restore_cache:
 29 |           key: deps1-{{ .Branch }}
 30 | 
 31 |       - run:
 32 |           name: "Setup dbt"
 33 |           command: | 
 34 |             set -x
 35 | 
 36 |             python -m venv dbt_venv
 37 |             . dbt_venv/bin/activate
 38 | 
 39 |             python -m pip install --upgrade pip setuptools
 40 |             python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
 41 | 
 42 |       - run:
 43 |           name: "Run Tests - Postgres"
 44 |           environment:
 45 |             POSTGRES_HOST: localhost
 46 |             POSTGRES_USER: root
 47 |             POSTGRES_PORT: 5432
 48 |             POSTGRES_DATABASE: circle_test
 49 |             POSTGRES_SCHEMA: dbt_utils_integration_tests_postgres
 50 |             DBT_ENV_SECRET_POSTGRES_PASS: ''
 51 |           command: |
 52 |             . dbt_venv/bin/activate
 53 |             cd integration_tests
 54 |             dbt deps --target postgres
 55 |             dbt seed --target postgres --full-refresh
 56 |             dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
 57 |             dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
 58 | 
 59 |       - run:
 60 |           name: "Run Tests - Redshift"
 61 |           command: |
 62 |             . dbt_venv/bin/activate
 63 |             echo `pwd`
 64 |             cd integration_tests
 65 |             dbt deps --target redshift
 66 |             dbt seed --target redshift --full-refresh
 67 |             dbt run --target redshift --exclude tag:skip+ tag:temporary_skip+
 68 |             dbt test --target redshift --exclude tag:skip+ tag:temporary_skip+
 69 | 
 70 |       - run:
 71 |           name: "Run Tests - Snowflake"
 72 |           command: |
 73 |             . dbt_venv/bin/activate
 74 |             echo `pwd`
 75 |             cd integration_tests
 76 |             dbt deps --target snowflake
 77 |             dbt seed --target snowflake --full-refresh
 78 |             dbt run --target snowflake --exclude tag:skip+ tag:temporary_skip+
 79 |             dbt test --target snowflake --exclude tag:skip+ tag:temporary_skip+
 80 | 
 81 |       - run:
 82 |           name: "Run Tests - BigQuery"
 83 |           environment:
 84 |               BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json"
 85 | 
 86 |           command: |
 87 |             . dbt_venv/bin/activate
 88 |             echo `pwd`
 89 |             cd integration_tests
 90 |             dbt deps --target bigquery
 91 |             dbt seed --target bigquery --full-refresh
 92 |             dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
 93 |             dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
 94 | 
 95 |       - run:
 96 |           name: "Run Tests - Databricks"
 97 |           command: |
 98 |             . dbt_venv/bin/activate
 99 |             echo `pwd`
100 |             cd integration_tests
101 |             dbt deps --target databricks
102 |             dbt seed --target databricks --full-refresh
103 |             dbt run --target databricks --exclude tag:skip+ tag:temporary_skip+
104 |             dbt test --target databricks --exclude tag:skip+ tag:temporary_skip+
105 | 
106 |       - save_cache:
107 |           key: deps1-{{ .Branch }}
108 |           paths:
109 |             - "dbt_venv"
110 | 
111 |             
112 |       - store_artifacts:
113 |           path: integration_tests/logs
114 |       - store_artifacts:
115 |           path: integration_tests/target
116 | 
117 | workflows:
118 |   version: 2
119 |   test-all:
120 |     jobs:
121 |       - build:
122 |           context:
123 |             - profile-redshift
124 |             - profile-snowflake
125 |             - profile-bigquery
126 |             - profile-databricks
127 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @clrcrl
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report a bug or an issue you've found with this package
 4 | title: ''
 5 | labels: bug, triage
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### Describe the bug
11 | <!---
12 | A clear and concise description of what the bug is. You can also use the issue title to do this
13 | --->
14 | 
15 | ### Steps to reproduce
16 | <!---
17 | In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here.
18 | --->
19 | 
20 | ### Expected results
21 | <!---
22 | A clear and concise description of what you expected to happen.
23 | --->
24 | 
25 | ### Actual results
26 | <!---
27 | A clear and concise description of what you expected to happen.
28 | --->
29 | 
30 | ### Screenshots and log output
31 | <!---
32 | If applicable, add screenshots or log output to help explain your problem.
33 | --->
34 | 
35 | ### System information
36 | **The contents of your `packages.yml` file:**
37 | 
38 | **Which database are you using dbt with?**
39 | - [ ] postgres
40 | - [ ] redshift
41 | - [ ] bigquery
42 | - [ ] snowflake
43 | - [ ] other (specify: ____________)
44 | 
45 | 
46 | **The output of `dbt --version`:**
47 | ```
48 | <output goes here>
49 | ```
50 | 
51 | **The operating system you're using:**
52 | 
53 | **The output of `python --version`:**
54 | 
55 | ### Additional context
56 | <!---
57 | Add any other context about the problem here. For example, if you think you know which line of code is causing the issue.
58 | --->
59 | 
60 | ### Are you interested in contributing the fix?
61 | <!---
62 | Let us know if you want to contribute the fix, and whether would need a hand getting started
63 | --->
64 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this package
 4 | title: ''
 5 | labels: enhancement, triage
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ### Describe the feature
11 | A clear and concise description of what you want to happen.
12 | 
13 | ### Describe alternatives you've considered
14 | A clear and concise description of any alternative solutions or features you've considered.
15 | 
16 | ### Additional context
17 | Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here.
18 | 
19 | ### Who will this benefit?
20 | What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
21 | 
22 | ### Are you interested in contributing this feature?
23 | <!---
24 | Let us know if you want to contribute the feature, and whether would need a hand getting started
25 | --->
26 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Description & motivation
 2 | <!---
 3 | Describe your changes, and why you're making them.
 4 | -->
 5 | 
 6 | ## Checklist
 7 | - [ ] I have verified that these changes work locally
 8 | - [ ] I have updated the README.md (if applicable)
 9 | - [ ] I have added tests & descriptions to my models (and macros if applicable)
10 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # **what?**
 2 | # Run tests for dbt-codegen against supported adapters
 3 | 
 4 | # **why?**
 5 | # To ensure that dbt-codegen works as expected with all supported adapters
 6 | 
 7 | # **when?**
 8 | # On every PR, and every push to main and when manually triggered
 9 | 
10 | name: Package Integration Tests
11 | 
12 | on:
13 |     push:
14 |         branches:
15 |             - main
16 |     pull_request_target:
17 |     workflow_dispatch:
18 | 
19 | jobs:
20 |   run-tests:
21 |       uses: dbt-labs/dbt-package-testing/.github/workflows/run_tox.yml@v1
22 |       # this just tests with postgres so no variables need to be passed through.
23 |       # When it's time to add more adapters you will need to pass through inputs for
24 |       # the other adapters as shown in the below example for redshift
25 |     #   with:
26 |     #     # redshift
27 |     #     REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }}
28 |     #     REDSHIFT_USER: ${{ vars.REDSHIFT_USER }}
29 |     #     REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }}
30 |     #     REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}"
31 |     #     REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }}
32 |     #   secrets:
33 |     #     DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }}
34 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | # **what?**
 2 | # For issues that have been open for awhile without activity, label
 3 | # them as stale with a warning that they will be closed out. If
 4 | # anyone comments to keep the issue open, it will automatically
 5 | # remove the stale label and keep it open.
 6 | 
 7 | # Stale label rules:
 8 | # awaiting_response, more_information_needed -> 90 days
 9 | # good_first_issue, help_wanted -> 360 days (a year)
10 | # tech_debt -> 720 (2 years)
11 | # all else defaults -> 180 days (6 months)
12 | 
13 | # **why?**
14 | # To keep the repo in a clean state from issues that aren't relevant anymore
15 | 
16 | # **when?**
17 | # Once a day
18 | 
19 | name: "Close stale issues and PRs"
20 | on:
21 |   schedule:
22 |     - cron: "30 1 * * *"
23 | 
24 | permissions:
25 |   issues: write
26 |   pull-requests: write
27 | 
28 | jobs:
29 |   stale:
30 |     uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main
31 | 


--------------------------------------------------------------------------------
/.github/workflows/triage-labels.yml:
--------------------------------------------------------------------------------
 1 | # **what?**
 2 | # When the maintenance team triages, we sometimes need more information from the issue creator.  In
 3 | # those cases we remove the `triage` label and add the `awaiting_response` label.  Once we
 4 | # recieve a response in the form of a comment, we want the `awaiting_response` label removed
 5 | # in favor of the `triage` label so we are aware that the issue needs action.
 6 | 
 7 | # **why?**
 8 | # To help with out team triage issue tracking
 9 | 
10 | # **when?**
11 | # This will run when a comment is added to an issue and that issue has the `awaiting_response` label.
12 | 
13 | name: Update Triage Label
14 | 
15 | on: issue_comment
16 | 
17 | defaults:
18 |   run:
19 |     shell: bash
20 | 
21 | permissions:
22 |   issues: write
23 | 
24 | jobs:
25 |   triage_label:
26 |     if: contains(github.event.issue.labels.*.name, 'awaiting_response')
27 |     uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main
28 |     with:
29 |       add_label: "triage"
30 |       remove_label: "awaiting_response"
31 |     secrets: inherit
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | dbt_packages/
3 | logs/
4 | logfile
5 | .DS_Store
6 | package-lock.yml
7 | integration_tests/package-lock.yml
8 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {    
 2 |     "yaml.schemas": {
 3 |         "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [
 4 |             "/**/*.yml",
 5 |             "!profiles.yml",
 6 |             "!dbt_project.yml",
 7 |             "!packages.yml",
 8 |             "!selectors.yml",
 9 |             "!profile_template.yml"
10 |         ],
11 |         "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [
12 |             "dbt_project.yml"
13 |         ],
14 |         "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [
15 |             "selectors.yml"
16 |         ],
17 |         "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [
18 |             "packages.yml"
19 |         ]
20 |     },
21 | }


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # audit_helper 0.6.0
 2 | 🚨 This version requires dbt Core 1.2 or above, and is ready for dbt utils 1.0.
 3 | 
 4 | Changed:
 5 | * add column_name to output of compare_column_values by @leoebfolsom in https://github.com/dbt-labs/dbt-audit-helper/pull/47
 6 | * Easier switching between summary and details by @christineberger in https://github.com/dbt-labs/dbt-audit-helper/pull/52
 7 | * Removes references to dbt_utils for cross-db macros
 8 | 
 9 | New features:
10 | * dbt Cloud instructions for compare_column_values by @SamHarting in https://github.com/dbt-labs/dbt-audit-helper/pull/45
11 | * Compare all columns macro by @leoebfolsom in https://github.com/dbt-labs/dbt-audit-helper/pull/50
12 | 
13 | 
14 | # audit_helper 0.5.0
15 | This version brings full compatibility with dbt-core 1.0. It requires any version (minor and patch) of v1, which means far less need for compatibility releases in the future.
16 | 
17 | # audit_helper 0.4.1
18 | 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉). Projects using this version with dbt-core v1.0.x can expect to see a deprecation warning. This will be resolved in the next minor release.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dbt-audit-helper
  2 | 
  3 | Useful macros when performing data audits
  4 | 
  5 | ## Contents
  6 | 
  7 | - [Installation instructions](#installation-instructions)
  8 | - [Compare Data Outputs](#compare-data-outputs)
  9 |   - Compare and classify:  
 10 |     - [compare_and_classify_query_results](#compare_and_classify_query_results-source)
 11 |     - [compare_and_classify_relation_rows](#compare_and_classify_relation_rows-source)
 12 |   - Quick identical check:
 13 |     - [quick_are_queries_identical](#quick_are_queries_identical-source)
 14 |     - [quick_are_relations_identical](#quick_are_relations_identical-source)
 15 |   - [compare\_row\_counts](#compare_row_counts-source)
 16 | - [Compare Columns](#compare-columns)
 17 |   - [compare\_column\_values](#compare_column_values-source)
 18 |   - [compare\_all\_columns](#compare_all_columns-source)
 19 |   - Compare which columns differ:
 20 |     - [compare\_which\_query\_columns\_differ](#compare_which_query_columns_differ-source)
 21 |     - [compare\_which\_relation\_columns\_differ](#compare_which_relation_columns_differ-source)
 22 |   - [compare\_relation\_columns](#compare_relation_columns-source)
 23 | - [Advanced Usage](#advanced-usage)
 24 |   - [Print Output To Logs](#print-output-to-logs)
 25 |   - [Use Output For Custom Singular Test](#use-output-for-custom-singular-test)
 26 | - [Legacy Macros](#legacy-macros)
 27 |   - [compare\_queries](#compare_queries-source)
 28 |   - [compare\_relations](#compare_relations-source)
 29 | - [Internal Macros](#internal-macros)
 30 | 
 31 | ## Installation instructions
 32 | 
 33 | New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/).
 34 | 
 35 | 1. Include this package in your `packages.yml` file — check [here](https://hub.getdbt.com/dbt-labs/audit_helper/latest/) for the latest version number.
 36 | 2. Run `dbt deps` to install the package.
 37 | 
 38 | ## Compare Data Outputs
 39 | 
 40 | ### compare_and_classify_query_results ([source](macros/compare_and_classify_query_results.sql))
 41 | 
 42 | Generates a row-by-row comparison of two queries, as well as summary stats of added, removed, identical and modified records. This prevents you from having to query your comparison tables multiple times to get raw data and summary data.
 43 | 
 44 | #### Output
 45 | 
 46 | | order_id | order_date | status    | dbt_audit_in_a | dbt_audit_in_b | dbt_audit_row_status | dbt_audit_num_rows_in_status | dbt_audit_sample_number |
 47 | |----------|------------|-----------|----------------|----------------|----------------------|------------------------------|-------------------------|
 48 | | 1        | 2024-01-01 | completed | True           | True           | identical            | 1                            | 1                       |
 49 | | 2        | 2024-01-02 | completed | True           | False          | modified             | 2                            | 1                       |
 50 | | 2        | 2024-01-02 | returned  | False          | True           | modified             | 2                            | 1                       |
 51 | | 3        | 2024-01-03 | completed | True           | False          | modified             | 2                            | 2                       |
 52 | | 3        | 2024-01-03 | completed | False          | True           | modified             | 2                            | 2                       |
 53 | | 4        | 2024-01-04 | completed | False          | True           | added                | 1                            | 1                       |
 54 | 
 55 | Note that there are 4 rows with the `modified` status, but `dbt_audit_num_rows_in_status` says 2. This is because it is counting each primary key only once.
 56 | 
 57 | #### Arguments
 58 | 
 59 | - `a_query` and `b_query`: The queries you want to compare.
 60 | - `primary_key_columns` (required): A list of primary key column(s) used to join the queries together for comparison.
 61 | - `columns` (required): The columns present in the two queries you want to compare.
 62 | - `sample_limit`: Number of sample records to return per status. Defaults to 20.
 63 | 
 64 | #### Usage
 65 | 
 66 | ```sql
 67 | 
 68 | {% set old_query %}
 69 |   select
 70 |     id as order_id,
 71 |     amount,
 72 |     customer_id
 73 |   from old_database.old_schema.fct_orders
 74 | {% endset %}
 75 | 
 76 | {% set new_query %}
 77 |   select
 78 |     order_id,
 79 |     amount,
 80 |     customer_id
 81 |   from {{ ref('fct_orders') }}
 82 | {% endset %}
 83 | 
 84 | {{ 
 85 |   audit_helper.compare_and_classify_query_results(
 86 |     old_query, 
 87 |     new_query, 
 88 |     primary_key_columns=['order_id'], 
 89 |     columns=['order_id', 'amount', 'customer_id']
 90 |   )
 91 | }}
 92 | 
 93 | ```
 94 | 
 95 | ### compare_and_classify_relation_rows ([source](macros/compare_and_classify_relation_rows.sql))
 96 | 
 97 | A wrapper to `compare_which_query_columns_differ`, except it takes two [Relations](https://docs.getdbt.com/reference/dbt-classes#relation) (instead of two queries).
 98 | 
 99 | Each relation must have the same columns with the same names, but they do not have to be in the same order.
100 | 
101 | #### Arguments
102 | 
103 | - `a_relation` and `b_relation`: The [relations](https://docs.getdbt.com/reference/dbt-classes#relation) you want to compare.
104 | - `primary_key_columns` (required): A list of primary key column(s) used to join the queries together for comparison.
105 | - `columns` (optional): The columns present in the two queries you want to compare. Build long lists with a few exclusions with `dbt_utils.get_filtered_columns_in_relation`, or pass `None` and the macro will find all intersecting columns automatically.
106 | - `sample_limit`: Number of sample records to return per status. Defaults to 20.
107 | 
108 | #### Usage
109 | 
110 | ```sql
111 | 
112 | {% set old_relation = adapter.get_relation(
113 |       database = "old_database",
114 |       schema = "old_schema",
115 |       identifier = "fct_orders"
116 | ) -%}
117 | 
118 | {% set dbt_relation = ref('fct_orders') %}
119 | 
120 | {{ audit_helper.compare_and_classify_relation_rows(
121 |     a_relation = old_relation,
122 |     b_relation = dbt_relation,
123 |     primary_key_columns = ["order_id"],
124 |     columns = None
125 | ) }}
126 | 
127 | ```
128 | 
129 | ### quick_are_queries_identical ([source](macros/quick_are_queries_identical.sql))
130 | 
131 | On supported adapters (currently Snowflake and BigQuery), take a hash of all rows in two queries and compare them.
132 | 
133 | This can be calculated relatively quickly compared to other macros in this package and can efficiently provide reassurance that a refactor introduced no changes.
134 | 
135 | #### Output
136 | 
137 | | are_tables_identical |
138 | |----------------------|
139 | | true                 |
140 | 
141 | #### Arguments
142 | 
143 | - `a_query` and `b_query`: The queries you want to compare.
144 | - `columns` (required): The columns present in the two queries you want to compare.
145 | 
146 | #### Usage
147 | 
148 | ```sql
149 | 
150 | {% set old_query %}
151 |     select * from old_database.old_schema.dim_product
152 | {% endset %}
153 | 
154 | {% set new_query %}
155 |     select * from {{ ref('dim_product') }}
156 | {% endset %}
157 | 
158 | {{ audit_helper.quick_are_queries_identical(
159 |     query_a = old_query,
160 |     query_b = new_query,
161 |     columns=['order_id', 'amount', 'customer_id']
162 |   ) 
163 | }}
164 | 
165 | ```
166 | 
167 | ### quick_are_relations_identical ([source](macros/quick_are_relations_identical.sql))
168 | 
169 | A wrapper to `quick_are_queries_identical`, except it takes two [Relations](https://docs.getdbt.com/reference/dbt-classes#relation) (instead of two queries).
170 | 
171 | Each relation must have the same columns with the same names, but they do not have to be in the same order. Build long lists with a few exclusions with `dbt_utils.get_filtered_columns_in_relation`, or pass `None` and the macro will find all intersecting columns automatically.
172 | 
173 | #### Usage
174 | 
175 | ```sql
176 | 
177 | {% set old_relation = adapter.get_relation(
178 |       database = "old_database",
179 |       schema = "old_schema",
180 |       identifier = "fct_orders"
181 | ) -%}
182 | 
183 | {% set dbt_relation = ref('fct_orders') %}
184 | 
185 | {{ audit_helper.quick_are_relations_identical(
186 |     a_relation = old_relation,
187 |     b_relation = dbt_relation,
188 |     columns = None
189 | ) }}
190 | 
191 | ```
192 | 
193 | ### compare_row_counts ([source](macros/compare_row_counts.sql))
194 | 
195 | This macro does a simple comparison of the row counts in two relations.
196 | 
197 | #### Output
198 | 
199 | Calling this macro on two different relations will return a very simple table comparing the row counts in each relation.
200 | 
201 | | relation_name                                | total_records  |
202 | |----------------------------------------------|---------------:|
203 | | target_database.target_schema.my_a_relation  |     34,231     |
204 | | target_database.target_schema.my_b_relation  |     24,789     |
205 | 
206 | #### Arguments
207 | 
208 | - `a_relation` and `b_relation`: The [Relations](https://docs.getdbt.com/reference/dbt-classes#relation) you want to compare.
209 | 
210 | #### Usage
211 | 
212 | ```sql
213 | 
214 | {% set old_relation = adapter.get_relation(
215 |       database = "old_database",
216 |       schema = "old_schema",
217 |       identifier = "fct_orders"
218 | ) -%}
219 | 
220 | {% set dbt_relation = ref('fct_orders') %}
221 | 
222 | {{ audit_helper.compare_row_counts(
223 |     a_relation = old_relation,
224 |     b_relation = dbt_relation
225 | ) }}
226 | 
227 | ```
228 | 
229 | ## Compare Columns
230 | 
231 | ### compare_which_query_columns_differ ([source](macros/compare_which_query_columns_differ.sql))
232 | 
233 | This macro generates SQL that can be used to detect which columns returned by two queries contain _any_ value level changes.
234 | 
235 | It does not return the magnitude of the change, only whether or not a difference has occurred. Only records that exist in both queries (as determined by the primary key) are considered.
236 | 
237 | #### Output
238 | 
239 | The generated query returns whether or not each column has any differences:
240 | 
241 | | column_name | has_difference |
242 | |-------------|----------------|
243 | | order_id    | False          |
244 | | customer_id | False          |
245 | | order_date  | True           |
246 | | status      | False          |
247 | | amount      | True           |
248 | 
249 | #### Arguments
250 | 
251 | - `a_query` and `b_query`: The queries to compare
252 | - `primary_key_columns` (required): A list of primary key column(s) used to join the queries together for comparison.
253 | - `columns` (required): The columns present in the two queries you want to compare.
254 | 
255 | ### compare_which_relation_columns_differ ([source](macros/compare_which_relation_columns_differ.sql))
256 | 
257 | A wrapper to `compare_which_query_columns_differ`, except it takes two [Relations](https://docs.getdbt.com/reference/dbt-classes#relation) (instead of two queries).
258 | 
259 | Each relation must have the same columns with the same names, but they do not have to be in the same order. Build long lists with a few exclusions with `dbt_utils.get_filtered_columns_in_relation`, or pass `None` and the macro will find all intersecting columns automatically.
260 | 
261 | #### Usage
262 | 
263 | ```sql
264 | 
265 | {% set old_relation = adapter.get_relation(
266 |       database = "old_database",
267 |       schema = "old_schema",
268 |       identifier = "fct_orders"
269 | ) -%}
270 | 
271 | {% set dbt_relation = ref('fct_orders') %}
272 | 
273 | {{ audit_helper.compare_which_relation_columns_differ(
274 |     a_relation = old_relation,
275 |     b_relation = dbt_relation,
276 |     primary_key_columns = ["order_id"],
277 |     columns = None
278 | ) }}
279 | 
280 | ```
281 | 
282 | ```sql
283 | 
284 | {% set old_relation = adapter.get_relation(
285 |       database = "old_database",
286 |       schema = "old_schema",
287 |       identifier = "fct_orders"
288 | ) -%}
289 | 
290 | {% set dbt_relation = ref('fct_orders') %}
291 | 
292 | {% set columns = dbt_utils.get_filtered_columns_in_relation(old_relation, exclude=["loaded_at"]) %}
293 | 
294 | {{ audit_helper.compare_which_relation_columns_differ(
295 |     a_relation = old_relation,
296 |     b_relation = dbt_relation,
297 |     primary_key_columns = ["order_id"],
298 |     columns = columns
299 | ) }}
300 | 
301 | ```
302 | 
303 | ### compare_column_values ([source](macros/compare_column_values.sql))
304 | 
305 | This macro generates SQL that can be used to compare a column's values across two queries. This macro is useful when you've used the `compare_which_query_columns_differ` macro to identify a column with differing values and want to understand how many discrepancies are caused by that column.
306 | 
307 | #### Output
308 | 
309 | The generated query returns a summary of the count of rows where the column's values:
310 | 
311 | - match perfectly
312 | - differ
313 | - are null in `a` or `b` or both
314 | - are missing from `a` or `b`
315 | 
316 | | match_status                | count  | percent_of_total |
317 | |-----------------------------|-------:|-----------------:|
318 | | ✅: perfect match            | 37,721 | 79.03            |
319 | | ✅: both are null            | 5,789  | 12.13            |
320 | | 🤷: missing from a          | 5     | 0.01             |
321 | | 🤷: missing from b          | 20     | 0.04             |
322 | | 🤷: value is null in a only | 59     | 0.12             |
323 | | 🤷: value is null in b only | 73     | 0.15             |
324 | | ❌: ‍values do not match    | 4,064  | 8.51             |
325 | 
326 | #### Arguments
327 | 
328 | - `a_query` and `b_query`: The queries you want to compare.
329 | - `primary_key`: The primary key of the model. Used to sort unmatched results for row-by-row validation. Must be a unique key (unqiue and never `null`) in both tables, otherwise the join won't work as expected.
330 | - `column_to_compare`: The column you want to compare.
331 | - `emojis` (optional): Boolean argument that defaults to `true` and displays ✅, 🤷 and ❌ for easier visual scanning. If you don't want to include emojis in the output, set it to `false`.
332 | - `a_relation_name` and `b_relation_name` (optional): Names of the queries you want displayed in the output. Default is `a` and `b`.
333 | 
334 | #### Usage
335 | 
336 | ```sql
337 | 
338 | {% set old_query %}
339 |     select * from old_database.old_schema.dim_product
340 |     where is_latest
341 | {% endset %}
342 | 
343 | {% set new_query %}
344 |     select * from {{ ref('dim_product') }}
345 | {% endset %}
346 | 
347 | {{ audit_helper.compare_column_values(
348 |     a_query = old_query,
349 |     b_query = new_query,
350 |     primary_key = "product_id",
351 |     column_to_compare = "status"
352 | ) }}
353 | 
354 | ```
355 | 
356 | ### compare_all_columns ([source](macros/compare_all_columns.sql))
357 | 
358 | Similar to `compare_column_values`, except it can be used to compare _all_ columns' values across two _relations_. This macro is useful when you've used the `compare_queries` macro and found that a significant number of your records don't match and want to understand how many discrepancies are caused by each column.
359 | 
360 | #### Output
361 | 
362 | By default, the generated query returns a summary of the count of rows where the each column's values:
363 | 
364 | - match perfectly
365 | - differ
366 | - are null in `a` or `b` or both
367 | - are missing from `a` or `b`
368 | 
369 | | column_name  | perfect_match  | null_in_a | null_in_b | missing_from_a | missing_from_b | conflicting_values |
370 | |-------|-------:|------:|-----------------:|------:|------:|------:|
371 | | order_id  | 10 | 0 | 0 | 0 | 0 | 0 |
372 | | order_date  | 2 | 0 | 0 | 0 | 0 | 8 |
373 | | order_status | 6 | 4 | 4 | 0 | 0 | 0 |
374 | 
375 | Setting the `summarize` argument to `false` lets you check the match status of a specific column value of a specifc row:
376 | 
377 | | primary_key | column_name | perfect_match  | null_in_a | null_in_b | missing_from_a | missing_from_b | conflicting_values |
378 | |-------|-------|-------:|------:|-----------------:|------:|------:|------:|
379 | | 1 | order_id | true | false | false | false | false | false |
380 | | 1 | order_date | false | false | false | false | false | true |
381 | | 1 | order_status | false | true | true | false | false | false |
382 | | ... | ... | ... | ... | ... | ... | ... | ... |
383 | 
384 | #### Arguments
385 | 
386 | - `a_relation` and `b_relation`: The [relations](https://docs.getdbt.com/reference/dbt-classes#relation) you want to compare. Any two relations that have the same columns can be used.
387 | - `primary_key`: The primary key of the model (or concatenated sql to create the primary key). Used to sort unmatched results for row-by-row validation. Must be a unique key (unique and never `null`) in both tables, otherwise the join won't work as expected.
388 | - `exclude_columns` (optional): Any columns you wish to exclude from the validation.
389 | - `summarize` (optional): Allows you to switch between a summary or detailed view of the compared data. Defaults to `true`.
390 | 
391 | #### Usage
392 | 
393 | ```sql
394 | 
395 | {% set old_relation = adapter.get_relation(
396 |       database = "old_database",
397 |       schema = "old_schema",
398 |       identifier = "fct_orders"
399 | ) -%}
400 | 
401 | {% set dbt_relation = ref('fct_orders') %}
402 | 
403 | {{ audit_helper.compare_all_columns(
404 |     a_relation = old_relation,
405 |     b_relation = dbt_relation,
406 |     primary_key = "order_id"
407 | ) }}
408 | 
409 | ```
410 | 
411 | ### compare_relation_columns ([source](macros/compare_relation_columns.sql))
412 | 
413 | This macro generates SQL that can be used to compare the schema (ordinal position and data types of columns) of two relations. This is especially useful when:
414 | 
415 | - Comparing a new version of a relation with an old one, to make sure that the structure is the same
416 | - Helping figure out why a `union` of two relations won't work (often because the data types are different)
417 | 
418 | #### Output
419 | 
420 | | column_name | a_ordinal_position | b_ordinal_position | a_data_type       | b_data_type       | has_ordinal_position_match | has_data_type_match | in_a_only | in_b_only | in_both |
421 | |-------------|--------------------|--------------------|-------------------|-------------------| -------------------------- | ------------------- | --------- | --------- | ------- |
422 | | order_id    | 1                  | 1                  | integer           | integer           |                       True |                True |     False |     False |    True |
423 | | customer_id | 2                  | 2                  | integer           | integer           |                       True |                True |     False |     False |    True |
424 | | order_date  | 3                  | 3                  | timestamp         | date              |                       True |               False |     False |     False |    True |
425 | | status      | 4                  | 5                  | character varying | character varying |                      False |                True |     False |     False |    True |
426 | | amount      | 5                  | 4                  | bigint            | bigint            |                      False |                True |     False |     False |    True |
427 | 
428 | Note: For adapters other than BigQuery, Postgres, Redshift, and Snowflake, the ordinal position is inferred based on the response from dbt Core's `adapter.get_columns_in_relation()`, as opposed to being loaded from the information schema.
429 | 
430 | #### Arguments
431 | 
432 | - `a_relation` and `b_relation`: The [relations](https://docs.getdbt.com/reference/dbt-classes#relation) you want to compare.
433 | 
434 | #### Usage
435 | 
436 | ```sql
437 | 
438 | {% set old_relation = adapter.get_relation(
439 |       database = "old_database",
440 |       schema = "old_schema",
441 |       identifier = "fct_orders"
442 | ) -%}
443 | 
444 | {% set dbt_relation = ref('fct_orders') %}
445 | 
446 | {{ audit_helper.compare_relation_columns(
447 |     a_relation=old_relation,
448 |     b_relation=dbt_relation
449 | ) }}
450 | 
451 | ```
452 | 
453 | ## Advanced Usage
454 | 
455 | ### Print Output To Logs
456 | 
457 | You may want to print the output of the query generated by an audit helper macro to your logc (instead of previewing the results).
458 | 
459 | To do so, you can alternatively store the results of your query and print it to the logs.
460 | 
461 | For example, using the `compare_column_values` macro:
462 | 
463 | ```sql
464 | {% set old_query %}
465 |     select * from old_database.old_schema.dim_product
466 |     where is_latest
467 | {% endset %}
468 | 
469 | {% set new_query %}
470 |     select * from {{ ref('dim_product') }}
471 | {% endset %}
472 | 
473 | {% set audit_query = audit_helper.compare_column_values(
474 |     a_query = old_query,
475 |     b_query = new_query,
476 |     primary_key = "product_id",
477 |     column_to_compare = "status"
478 | ) %}
479 | 
480 | {% set audit_results = run_query(audit_query) %}
481 | 
482 | {% if execute %}
483 | {% do audit_results.print_table() %}
484 | {% endif %}
485 | ```
486 | 
487 | The `.print_table()` function is not compatible with dbt Cloud, so an adjustment needs to be made in order to print the results. Add the following code to a new macro file:
488 | 
489 | ```sql
490 | {% macro print_audit_output() %}
491 | {%- set columns_to_compare=adapter.get_columns_in_relation(ref('fct_orders'))  -%}
492 | 
493 | {% set old_etl_relation_query %}
494 |     select * from public.dim_product
495 | {% endset %}
496 | 
497 | {% set new_etl_relation_query %}
498 |     select * from {{ ref('fct_orders') }}
499 | {% endset %}
500 | 
501 | {% if execute %}
502 |     {% for column in columns_to_compare %}
503 |         {{ log('Comparing column "' ~ column.name ~'"', info=True) }}
504 |         {% set audit_query = audit_helper.compare_column_values(
505 |                 a_query=old_etl_relation_query,
506 |                 b_query=new_etl_relation_query,
507 |                 primary_key="order_id",
508 |                 column_to_compare=column.name
509 |         ) %}
510 | 
511 |         {% set audit_results = run_query(audit_query) %}
512 | 
513 |         {% do log(audit_results.column_names, info=True) %}
514 |             {% for row in audit_results.rows %}
515 |                   {% do log(row.values(), info=True) %}
516 |             {% endfor %}
517 |     {% endfor %}
518 | {% endif %}
519 | 
520 | {% endmacro %}
521 | ```
522 | 
523 | To run the macro, execute `dbt run-operation print_audit_output()` in the command bar.
524 | 
525 | ### Use Output For Custom Singular Test
526 | 
527 | If desired, you can use the audit helper macros to add a dbt test to your project to protect against unwanted changes to your data outputs.
528 | 
529 | For example, using the `compare_all_columns` macro, you could set up a test that will fail if any column values do not match.
530 | 
531 | Users can configure what exactly constitutes a value match or failure. If there is a test failure, results can be inspected in the warehouse. The primary key and the column name can be included in the test output that gets written to the warehouse. This enables the user to join test results to relevant tables in your dev or prod schema to investigate the error.
532 | 
533 | _Note: this test should only be used on (and will only work on) models that have a primary key that is reliably `unique` and `not_null`. [Generic dbt tests](https://docs.getdbt.com/docs/building-a-dbt-project/tests#generic-tests) should be used to ensure the model being tested meets the requirements of `unique` and `not_null`._
534 | 
535 | To create a test for the `stg_customers` model, create a custom test
536 | in the `tests` subdirectory of your dbt project that looks like this:
537 | 
538 | ```sql
539 | {{ 
540 |   audit_helper.compare_all_columns(
541 |     a_relation=ref('stg_customers'), -- in a test, this ref will compile as your dev or PR schema.
542 |     b_relation=api.Relation.create(database='dbt_db', schema='analytics_prod', identifier='stg_customers'), -- you can explicitly write a relation to select your production schema, or any other db/schema/table you'd like to use for comparison testing.
543 |     exclude_columns=['updated_at'], 
544 |     primary_key='id'
545 |   ) 
546 | }}
547 | where not perfect_match
548 | ```
549 | 
550 | The `where not perfect_match` statement is an example of a filter you can apply to define whatconstitutes a test failure. The test will fail if any rows don't meet the requirement of a perfect match. Failures would include:
551 | 
552 | - If the primary key exists in both relations, but one model has a null value in a column.
553 | - If a primary key is missing from one relation.
554 | - If the primary key exists in both relations, but the value conflicts.
555 | 
556 | If you'd like the test to only fail when there are conflicting values, you could configure it like this:
557 | 
558 | ```sql
559 | {{ 
560 |   audit_helper.compare_all_columns(
561 |     a_relation=ref('stg_customers'), 
562 |     b_relation=api.Relation.create(database='dbt_db', schema='analytics_prod', identifier='stg_customers'),
563 |     primary_key='id'
564 |   ) 
565 | }}
566 | where conflicting_values
567 | ```
568 | 
569 | If you want to create test results that include columns from the model itself for easier inspection, that can be written into the test:
570 | 
571 | ```sql
572 | {{ 
573 |   audit_helper.compare_all_columns(
574 |     a_relation=ref('stg_customers'),
575 |     b_relation=api.Relation.create(database='dbt_db', schema='analytics_prod', identifier='stg_customers'), 
576 |     exclude_columns=['updated_at'], 
577 |     primary_key='id'
578 |   ) 
579 | }}
580 | left join {{ ref('stg_customers') }} using(id)
581 | ```
582 | 
583 | This structure also allows for the test to group or filter by any attribute in the model or in the macro's output as part of the test, for example:
584 | 
585 | ```sql
586 | with base_test_cte as (
587 |   {{ 
588 |     audit_helper.compare_all_columns(
589 |       a_relation=ref('stg_customers'),
590 |       b_relation=api.Relation.create(database='dbt_db', schema='analytics_prod', identifier='stg_customers'), 
591 |       exclude_columns=['updated_at'], 
592 |       primary_key='id'
593 |     ) 
594 |   }}
595 |   left join {{ ref('stg_customers') }} using(id)
596 |   where conflicting_values
597 | )
598 | select
599 |   status, -- assume there's a "status" column in stg_customers
600 |   count(distinct case when conflicting_values then id end) as conflicting_values
601 | from base_test_cte
602 | group by 1
603 | ```
604 | 
605 | You can write a `compare_all_columns` test on individual table; and the test will be run as part of a full test suite run - `dbt test --select stg_customers`.
606 | 
607 | If you want to [store results in the warehouse for further analysis](https://docs.getdbt.com/docs/building-a-dbt-project/tests#storing-test-failures), add the `--store-failures` flag.
608 | 
609 | ## Legacy Macros
610 | 
611 | ### compare_queries ([source](macros/compare_queries.sql))
612 | 
613 | > [!TIP]
614 | > Consider `compare_and_classify_query_results` instead
615 | 
616 | This macro generates SQL that can be used to do a row-by-row comparison of two queries. This macro is particularly useful when you want to check that a refactored model (or a model that you are moving over from a legacy system) are identical. `compare_queries` provides flexibility when:
617 | 
618 | - You need to filter out records from one of the relations.
619 | - You need to rename or recast some columns to get them to match up.
620 | - You only want to compare a small number of columns, so it's easier to write the columns you want to compare, rather than the columns you want to exclude.
621 | 
622 | #### Output
623 | 
624 | By default, the generated query returns a summary of the count of rows that are unique to `a`, unique to `b`, and identical:
625 | 
626 | | in_a  | in_b  | count | percent_of_total |
627 | |-------|-------|------:|-----------------:|
628 | | True  | True  | 6870  | 99.74            |
629 | | True  | False | 9     | 0.13             |
630 | | False | True  | 9     | 0.13             |
631 | 
632 | Setting the `summarize` argument to `false` lets you check which rows do not match between relations:
633 | 
634 | | order_id | order_date | status    | in_a  | in_b  |
635 | |----------|------------|-----------|-------|-------|
636 | | 1        | 2018-01-01 | completed | True  | False |
637 | | 1        | 2018-01-01 | returned  | False | True  |
638 | | 2        | 2018-01-02 | completed | True  | False |
639 | | 2        | 2018-01-02 | returned  | False | True  |
640 | 
641 | #### Arguments
642 | 
643 | - `a_query` and `b_query`: The queries you want to compare.
644 | - `primary_key` (optional): The primary key of the model (or concatenated sql to create the primary key). Used to sort unmatched results for row-by-row validation.
645 | - `summarize` (optional): Allows you to switch between a summary or detailed view of the compared data. Accepts `true` or `false` values. Defaults to `true`.
646 | - `limit` (optional): Allows you to limit the number of rows returned when `summarize = False`. Defaults to `None` (no limit).
647 | 
648 | #### Usage
649 | 
650 | ```sql
651 | 
652 | {% set old_query %}
653 |   select
654 |     id as order_id,
655 |     amount,
656 |     customer_id
657 |   from old_database.old_schema.fct_orders
658 | {% endset %}
659 | 
660 | {% set new_query %}
661 |   select
662 |     order_id,
663 |     amount,
664 |     customer_id
665 |   from {{ ref('fct_orders') }}
666 | {% endset %}
667 | 
668 | {{ audit_helper.compare_queries(
669 |     a_query = old_query,
670 |     b_query = new_query,
671 |     primary_key = "order_id"
672 | ) }}
673 | 
674 | ```
675 | 
676 | ### compare_relations ([source](macros/compare_relations.sql))
677 | 
678 | > [!TIP]
679 | > Consider `compare_and_classify_relation_rows` instead
680 | 
681 | A wrapper to `compare_queries`, except it takes two [Relations](https://docs.getdbt.com/reference/dbt-classes#relation) (instead of two queries).
682 | 
683 | Each relation must have the same columns with the same names, but they do not have to be in the same order. Use `exclude_columns` if some columns only exist in one relation.
684 | 
685 | #### Arguments
686 | 
687 | - `a_relation` and `b_relation`: The [relations](https://docs.getdbt.com/reference/dbt-classes#relation) you want to compare.
688 | - `primary_key` (optional): The primary key of the model (or concatenated sql to create the primary key). Used to sort unmatched results for row-by-row validation.
689 | - `exclude_columns` (optional): Any columns you wish to exclude from the validation.
690 | - `summarize` (optional): Allows you to switch between a summary or detailed view of the compared data. Accepts `true` or `false` values. Defaults to `true`.
691 | - `limit` (optional): Allows you to limit the number of rows returned when `summarize = False`. Defaults to `None` (no limit).
692 | 
693 | #### Usage
694 | 
695 | ```sql
696 | 
697 | {% set old_relation = adapter.get_relation(
698 |       database = "old_database",
699 |       schema = "old_schema",
700 |       identifier = "fct_orders"
701 | ) -%}
702 | 
703 | {% set dbt_relation = ref('fct_orders') %}
704 | 
705 | {{ audit_helper.compare_relations(
706 |     a_relation = old_relation,
707 |     b_relation = dbt_relation,
708 |     exclude_columns = ["loaded_at"],
709 |     primary_key = "order_id"
710 | ) }}
711 | 
712 | ```
713 | 
714 | ## Internal Macros
715 | 
716 | Macros prefixed with an `_` (such as those in the `utils/` subdirectory) are for private use. They are not documented or contracted and can change without notice.
717 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # dbt-audit-helper releases
 2 | 
 3 | ## When do we release?
 4 | There's a few scenarios that might prompt a release:
 5 | 
 6 | | Scenario                                   | Release type |
 7 | |--------------------------------------------|--------------|
 8 | | Breaking changes to existing macros        | minor        |
 9 | | New functionality                          | minor        |
10 | | Fixes to existing macros                   | patch        |
11 | 
12 | ## Release process
13 | 
14 | 1. Begin a new release by clicking [here](https://github.com/dbt-labs/dbt-audit-helper/releases/new)
15 | 1. Click "Choose a tag", then paste your version number (with no "v" in the name), then click "Create new tag: x.y.z. on publish"
16 |     - The “Release title” will be identical to the tag name
17 | 1. Click the "Generate release notes" button
18 | 1. Copy and paste the generated release notes into `CHANGELOG.md`, commit, and merge into the `main` branch
19 | 1. Click the "Publish release" button
20 |     - This will automatically create an "Assets" section containing:
21 |         - Source code (zip)
22 |         - Source code (tar.gz)
23 | 


--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'audit_helper'
 2 | version: '0.4.0'
 3 | config-version: 2
 4 | 
 5 | require-dbt-version: [">=1.2.0", "<2.0.0"]
 6 | 
 7 | target-path: "target"
 8 | clean-targets: ["target", "dbt_packages"]
 9 | macro-paths: ["macros"]
10 | log-path: "logs"
11 | 


--------------------------------------------------------------------------------
/integration_tests/.env/postgres.env:
--------------------------------------------------------------------------------
1 | POSTGRES_HOST=localhost
2 | POSTGRES_USER=root
3 | DBT_ENV_SECRET_POSTGRES_PASS=password
4 | POSTGRES_PORT=5432
5 | POSTGRES_DATABASE=audit_helper_test
6 | POSTGRES_SCHEMA=audit_helper_integration_tests_postgres
7 | 


--------------------------------------------------------------------------------
/integration_tests/analyses/compare_column_values_smoke_test.sql:
--------------------------------------------------------------------------------
 1 | {% set a_query %}
 2 |     select * from {{ ref('data_compare_relations__a_relation') }}
 3 | {% endset %}
 4 | 
 5 | {% set audit_query = audit_helper.compare_column_values(
 6 |     a_query=a_query,
 7 |     b_query=a_query,
 8 |     primary_key="col_a",
 9 |     column_to_compare="col_b"
10 | ) %}
11 | 
12 | {{ audit_query }}
13 | 
14 | {% if execute %}
15 | 
16 | {% set audit_results = run_query(audit_query) %}
17 | 
18 | {% do audit_results.print_table() %}
19 | 
20 | {% endif %}
21 | 


--------------------------------------------------------------------------------
/integration_tests/analyses/compare_relation_columns_smoke_test.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation') %}
 2 | 
 3 | {% set compare_relation_columns_sql = audit_helper.compare_relation_columns(
 4 |     a_relation,
 5 |     a_relation
 6 | ) %}
 7 | 
 8 | {{ compare_relation_columns_sql }}
 9 | 
10 | {% if execute %}
11 | 
12 | {% set results = run_query(compare_relation_columns_sql) %}
13 | {% do results.print_table()  %}
14 | 
15 | {% endif %}
16 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'audit_helper_integration_tests'
 2 | version: '1.0'
 3 | config-version: 2
 4 | 
 5 | profile: 'integration_tests'
 6 | 
 7 | model-paths: ["models"]
 8 | analysis-paths: ["analyses"]
 9 | test-paths: ["tests"]
10 | seed-paths: ["seeds"]
11 | macro-paths: ["macros"]
12 | 
13 | target-path: "target"  # directory which will store compiled SQL files
14 | clean-targets:         # directories to be removed by `dbt clean`
15 |     - "target"
16 |     - "dbt_packages"
17 | 
18 | seeds:
19 |   +quote_columns: false
20 | 
21 | vars:
22 |   compare_queries_summarize: true
23 |   primary_key_columns_var: ['col1']
24 |   columns_var: ['col1']
25 |   event_time_var:
26 |   quick_are_queries_identical_cols: ['col1']
27 | 
28 | flags:
29 |   send_anonymous_usage_stats: False
30 |   use_colors: True


--------------------------------------------------------------------------------
/integration_tests/macros/unit_tests/struct_generation_macros.sql:
--------------------------------------------------------------------------------
 1 | {%- macro _basic_json_function() -%}
 2 |     {%- if target.type == 'snowflake' -%}
 3 |         object_construct
 4 |     {%- elif target.type == 'bigquery' -%}
 5 |         json_object
 6 |     {%- elif target.type == 'databricks' -%}
 7 |         map
 8 |     {%- elif execute -%}
 9 |         {# Only raise exception if it's actually being called, not during parsing #}
10 |         {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'") -%}
11 |     {%- endif -%}
12 | {%- endmacro -%}
13 | 
14 | {% macro _complex_json_function(json) %}
15 | 
16 |     {% if target.type == 'redshift' %}
17 |         json_parse({{ json }})
18 |     {% elif target.type == 'databricks' %}
19 |         from_json({{ json }}, schema_of_json({{ json }}))
20 |     {% elif target.type in ['snowflake', 'bigquery'] %}
21 |         parse_json({{ json }})
22 |     {% elif execute %}
23 |         {# Only raise exception if it's actually being called, not during parsing #}
24 |         {%- do exceptions.raise_compiler_error("Unknown adapter '"~ target.type ~ "'") -%}    
25 |     {% endif %}
26 | {% endmacro %}


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_concat_pk_with_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce__concat_pk')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce__concat_pk') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key=dbt_utils.generate_surrogate_key(['produce_category', 'id'])
 9 | ) }}
10 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_concat_pk_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce__concat_pk')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce__concat_pk') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key=dbt_utils.generate_surrogate_key(['produce_category', 'id']),
 9 |     summarize=false
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_where_clause.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="id",
 9 |     summarize=false
10 | ) }}
11 | where not perfect_match


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_with_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="id"
 9 | ) }}
10 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_with_summary_and_exclude.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="id",
 9 |     exclude_columns=['ripeness']
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_all_columns_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_all_columns__market_of_choice_produce')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_all_columns__albertsons_produce') %}
 4 | 
 5 | {{ audit_helper.compare_all_columns(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="id",
 9 |     summarize=false
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_and_classify_query_results.sql:
--------------------------------------------------------------------------------
 1 | -- this has no tests, it's just making sure that the introspecive queries for event_time actually run
 2 | 
 3 | {{
 4 |     audit_helper.compare_and_classify_query_results(
 5 |         a_query="select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
 6 |         b_query="select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
 7 |         primary_key_columns=['id'],
 8 |         columns=['id', 'col1', 'col2'],
 9 |         event_time='created_at'
10 |     )
11 | }}


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_queries.sql:
--------------------------------------------------------------------------------
 1 | {% set a_query %}
 2 |   select * from {{ ref('data_compare_relations__a_relation') }}
 3 | {% endset %}
 4 | 
 5 | {% set b_query %}
 6 |   select * from {{ ref('data_compare_relations__b_relation') }}
 7 | {% endset %}
 8 | 
 9 | {{ audit_helper.compare_queries(
10 |     a_query=a_query,
11 |     b_query=b_query,
12 |     primary_key="order_id"
13 | ) }}
14 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_queries_concat_pk_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_query %}
 2 |   select * from {{ ref('data_compare_relations__a_relation') }}
 3 | {% endset %}
 4 | 
 5 | {% set b_query %}
 6 |   select * from {{ ref('data_compare_relations__b_relation') }}
 7 | {% endset %}
 8 | 
 9 | {{ audit_helper.compare_queries(
10 |     a_query=a_query,
11 |     b_query=b_query,
12 |     primary_key=dbt_utils.generate_surrogate_key(['col_a', 'col_b']),
13 |     summarize=false
14 | ) }}
15 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_queries_with_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_query %}
 2 |   select * from {{ ref('data_compare_relations__a_relation') }}
 3 | {% endset %}
 4 | 
 5 | {% set b_query %}
 6 |   select * from {{ ref('data_compare_relations__b_relation') }}
 7 | {% endset %}
 8 | 
 9 | {{ audit_helper.compare_queries(
10 |     a_query=a_query,
11 |     b_query=b_query,
12 |     primary_key="col_a"
13 | ) }}


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_queries_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_query %}
 2 |   select * from {{ ref('data_compare_relations__a_relation') }}
 3 | {% endset %}
 4 | 
 5 | {% set b_query %}
 6 |   select * from {{ ref('data_compare_relations__b_relation') }}
 7 | {% endset %}
 8 | 
 9 | {{ audit_helper.compare_queries(
10 |     a_query=a_query,
11 |     b_query=b_query,
12 |     primary_key="col_a",
13 |     summarize=false
14 | ) }}


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relation_columns.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with audit_helper_results as (
 3 |     {{ audit_helper.compare_relation_columns(
 4 |         a_relation=ref('data_compare_relation_columns_a'),
 5 |         b_relation=ref('data_compare_relation_columns_b')
 6 |     ) }}
 7 | )
 8 | 
 9 | select 
10 |     --These need to be cast, otherwise they are technically typed as "sql_identifier" or "cardinal_number" on Redshift
11 |     {{ "lower(" if target.type == 'snowflake' }} cast(column_name as {{ dbt.type_string() }}) {{ ")" if target.type == 'snowflake' }} as column_name, 
12 |     cast(a_ordinal_position as {{ dbt.type_int() }}) as a_ordinal_position,
13 |     cast(b_ordinal_position as {{ dbt.type_int() }}) as b_ordinal_position,
14 |     --not checking the specific datatypes, as long as they match/don't match as expected then that's still checking the audit behaviour
15 |     has_ordinal_position_match,
16 |     has_data_type_match
17 | from audit_helper_results


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relations_concat_pk_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
 4 | 
 5 | {{ audit_helper.compare_relations(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key=dbt_utils.generate_surrogate_key(['col_a', 'col_b']),
 9 |     summarize=false
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relations_with_exclude.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
 4 | 
 5 | {{ audit_helper.compare_relations(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     exclude_columns=['col_b'],
 9 |     primary_key="col_a"
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relations_with_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
 4 | 
 5 | {{ audit_helper.compare_relations(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="col_a"
 9 | ) }}
10 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relations_without_exclude.sql:
--------------------------------------------------------------------------------
1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
2 | 
3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
4 | 
5 | {{ audit_helper.compare_relations(
6 |     a_relation=a_relation,
7 |     b_relation=b_relation
8 | ) }}
9 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_relations_without_summary.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
 4 | 
 5 | {{ audit_helper.compare_relations(
 6 |     a_relation=a_relation,
 7 |     b_relation=b_relation,
 8 |     primary_key="col_a",
 9 |     summarize=false
10 | ) }}
11 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_row_counts.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_relations__a_relation')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_relations__b_relation') %}
 4 | 
 5 | select 
 6 |     case
 7 |         when relation_name = '{{ a_relation }}'
 8 |             then 'a'
 9 |         else 'b'
10 |     end as relation_name, 
11 |     total_records
12 | 
13 | from (
14 | 
15 |     {{ audit_helper.compare_row_counts(
16 |         a_relation=a_relation,
17 |         b_relation=b_relation
18 |     ) }}
19 | 
20 | ) as base_query 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_which_columns_differ.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_which_columns_differ_a')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_which_columns_differ_b') %}
 4 | 
 5 | -- lowercase for CI
 6 | 
 7 | select 
 8 |     lower(column_name) as column_name,
 9 |     has_difference
10 | from (
11 | 
12 |     {{ audit_helper.compare_which_relation_columns_differ(
13 |         a_relation=a_relation,
14 |         b_relation=b_relation,
15 |         primary_key_columns=["id"]
16 |     ) }}
17 | ) as macro_output
18 | 


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/compare_which_columns_differ_exclude_cols.sql:
--------------------------------------------------------------------------------
 1 | {% set a_relation=ref('data_compare_which_columns_differ_a')%}
 2 | 
 3 | {% set b_relation=ref('data_compare_which_columns_differ_b') %}
 4 | 
 5 | {% set pk_cols = ['id'] %}
 6 | {% set cols = ['id','value_changes','becomes_not_null','does_not_change'] %}
 7 | 
 8 | {% if target.type == 'snowflake' %}
 9 |     {% set pk_cols = pk_cols | map("upper") | list %}
10 |     {% set cols = cols | map("upper") | list %}
11 | {% endif %}
12 | 
13 | select 
14 |     lower(column_name) as column_name,
15 |     has_difference
16 | from (
17 | 
18 |     {{ audit_helper.compare_which_relation_columns_differ(
19 |         a_relation=a_relation,
20 |         b_relation=b_relation,
21 |         primary_key_columns=pk_cols,
22 |         columns=cols
23 |     ) }}
24 | 
25 | ) as macro_output


--------------------------------------------------------------------------------
/integration_tests/models/data_tests/schema.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: compare_queries
 5 |     data_tests:
 6 |       - dbt_utils.equality:
 7 |           compare_model: ref('expected_results__compare_relations_without_exclude')
 8 | 
 9 |   - name: compare_queries_concat_pk_without_summary
10 |     data_tests:
11 |       - dbt_utils.equality:
12 |           compare_model: ref('expected_results__compare_without_summary')
13 |           
14 |   - name: compare_queries_with_summary
15 |     data_tests:
16 |       - dbt_utils.equality:
17 |           compare_model: ref('expected_results__compare_with_summary')
18 | 
19 |   - name: compare_queries_without_summary
20 |     data_tests:
21 |       - dbt_utils.equality:
22 |           compare_model: ref('expected_results__compare_without_summary')
23 |           
24 |   - name: compare_relations_with_summary
25 |     data_tests:
26 |       - dbt_utils.equality:
27 |           compare_model: ref('expected_results__compare_with_summary')
28 | 
29 |   - name: compare_relations_without_summary
30 |     data_tests:
31 |       - dbt_utils.equality:
32 |           compare_model: ref('expected_results__compare_without_summary')
33 | 
34 |   - name: compare_relations_with_exclude
35 |     data_tests:
36 |       - dbt_utils.equality:
37 |           compare_model: ref('expected_results__compare_relations_with_exclude')
38 | 
39 |   - name: compare_relations_without_exclude
40 |     data_tests:
41 |       - dbt_utils.equality:
42 |           compare_model: ref('expected_results__compare_relations_without_exclude')
43 | 
44 |   - name: compare_all_columns_with_summary
45 |     data_tests:
46 |       - dbt_utils.equality:
47 |           compare_model: ref('expected_results__compare_all_columns_with_summary')
48 | 
49 |   - name: compare_all_columns_without_summary
50 |     data_tests:
51 |       - dbt_utils.equality:
52 |           compare_model: ref('expected_results__compare_all_columns_without_summary')
53 | 
54 |   - name: compare_all_columns_concat_pk_with_summary
55 |     data_tests:
56 |       - dbt_utils.equality:
57 |           compare_model: ref('expected_results__compare_all_columns_concat_pk_with_summary')
58 | 
59 |   - name: compare_all_columns_concat_pk_without_summary
60 |     data_tests:
61 |       - dbt_utils.equality:
62 |           compare_model: ref('expected_results__compare_all_columns_concat_pk_without_summary')
63 | 
64 |   - name: compare_all_columns_with_summary_and_exclude
65 |     data_tests:
66 |       - dbt_utils.equality:
67 |           compare_model: ref('expected_results__compare_all_columns_with_summary_and_exclude')
68 |   
69 |   - name: compare_all_columns_where_clause
70 |     data_tests:
71 |       - dbt_utils.equality:
72 |           compare_model: ref('expected_results__compare_all_columns_where_clause')
73 | 
74 |   - name: compare_relation_columns
75 |     data_tests:
76 |       - dbt_utils.equality:
77 |           compare_model: ref('expected_results__compare_relation_columns')
78 | 
79 |   - name: compare_relations_concat_pk_without_summary
80 |     data_tests:
81 |       - dbt_utils.equality:
82 |           compare_model: ref('expected_results__compare_without_summary')
83 | 
84 |   - name: compare_which_columns_differ
85 |     data_tests:
86 |       - dbt_utils.equality:
87 |           compare_model: ref('expected_results__compare_which_columns_differ')
88 | 
89 |   - name: compare_which_columns_differ_exclude_cols
90 |     data_tests:
91 |       - dbt_utils.equality:
92 |           compare_model: ref('expected_results__compare_which_columns_differ_exclude_cols')
93 |           
94 |   - name: compare_row_counts
95 |     data_tests:
96 |       - dbt_utils.equality:
97 |           compare_model: ref('expected_results__compare_row_counts')
98 | 


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_placeholder_models/unit_test_model_a.sql:
--------------------------------------------------------------------------------
1 | select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_placeholder_models/unit_test_model_b.sql:
--------------------------------------------------------------------------------
1 | select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_a.sql:
--------------------------------------------------------------------------------
 1 | {{ config(tags=['skip' if (target.type in ['postgres']) else 'runnable']) }}
 2 | 
 3 | {% if target.name != 'redshift' %}
 4 | 
 5 | select 
 6 |     1 as id, 
 7 |     'John Doe' as col1, 
 8 |     {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
 9 | 
10 | {% else %}
11 | 
12 | select 
13 |   1 AS id, 
14 |   'John Doe' AS col1, 
15 |   json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
16 | {% endif %}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_placeholder_models/unit_test_struct_model_b.sql:
--------------------------------------------------------------------------------
 1 | {{ config(tags=['skip' if (target.type in ['postgres']) else 'runnable']) }}
 2 | 
 3 | {% if target.name != 'redshift' %}
 4 | 
 5 | select 
 6 |     1 as id, 
 7 |     'John Doe' as col1, 
 8 |     {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
 9 | 
10 | {% else %}
11 | 
12 | select 
13 |   1 AS id, 
14 |   'John Doe' AS col1, 
15 |   json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
16 | {% endif %}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_classify.sql:
--------------------------------------------------------------------------------
1 | {{ 
2 |     audit_helper.compare_and_classify_query_results(
3 |         "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
4 |         "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
5 |         primary_key_columns=var('primary_key_columns_var'),
6 |         columns=var('columns_var'),
7 |         event_time=var('event_time_var')
8 |     ) 
9 | }}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_classify.yml:
--------------------------------------------------------------------------------
  1 | unit_tests:
  2 |   - name: compare_classify_identical_tables
  3 |     model: unit_compare_classify
  4 |     
  5 |     given:
  6 |       - input: ref('unit_test_model_a')
  7 |         rows:
  8 |           - { "id": 1, "col1": "abc", "col2": "def" }
  9 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 10 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 11 |       - input: ref('unit_test_model_b')
 12 |         rows:
 13 |           - { "id": 1, "col1": "abc", "col2": "def" }
 14 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 15 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 16 |         
 17 |     expect:
 18 |       rows:
 19 |         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
 20 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
 21 |         - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
 22 | 
 23 |     overrides:
 24 |       vars:
 25 |         columns_var: ['id', 'col1', 'col2']
 26 |         event_time_var:
 27 |         primary_key_columns_var: ['id']
 28 | 
 29 |   - name: compare_classify_identical_tables_event_time_filter
 30 |     model: unit_compare_classify
 31 |     overrides:
 32 |       vars:
 33 |         columns_var: ['id', 'col1', 'col2', 'created_at']
 34 |         event_time_var: 'created_at'
 35 |         primary_key_columns_var: ['id']
 36 |       macros: 
 37 |         audit_helper._get_comparison_bounds:
 38 |           "min_event_time": "2024-01-02"
 39 |           "max_event_time": "2024-01-03"
 40 |           "event_time": 'created_at'
 41 |       
 42 |     given:
 43 |       - input: ref('unit_test_model_a')
 44 |         rows:
 45 |           - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
 46 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
 47 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
 48 |       - input: ref('unit_test_model_b')
 49 |         rows:
 50 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
 51 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
 52 |         
 53 |     expect:
 54 |       rows:
 55 |         - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 2}
 56 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 2}
 57 |     
 58 |   - name: compare_classify_all_statuses
 59 |     model: unit_compare_classify
 60 |     overrides:
 61 |       vars:
 62 |         columns_var: ['id', 'col1', 'col2']
 63 |         event_time_var:
 64 |         primary_key_columns_var: ['id']
 65 |     given:
 66 |       - input: ref('unit_test_model_a')
 67 |         rows:
 68 |           - { "id": 1, "col1": "abc", "col2": "def" }
 69 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 70 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 71 |       - input: ref('unit_test_model_b')
 72 |         rows:
 73 |           - { "id": 1, "col1": "abc", "col2": "def" }
 74 |           - { "id": 2, "col1": "changed", "col2": "values" }
 75 |           - { "id": 4, "col1": "nop", "col2": "qrs" }
 76 |         
 77 |     expect:
 78 |       rows:
 79 |         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
 80 |         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
 81 |         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
 82 |         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
 83 |         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
 84 |     config:
 85 |       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
 86 | 
 87 |   - name: compare_classify_identical_tables_multiple_pk_cols
 88 |     model: unit_compare_classify
 89 |     overrides:
 90 |       vars:
 91 |         columns_var: ['id', 'id_2', 'col1', 'col2']
 92 |         event_time_var:
 93 |         primary_key_columns_var: ['id', 'id_2']
 94 |     given:
 95 |       - input: ref('unit_test_model_a')
 96 |         rows:
 97 |           - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
 98 |           - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
 99 |           - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }
100 |       - input: ref('unit_test_model_b')
101 |         rows:
102 |           - { "id": 12, "id_2": 3, "col1": "abc", "col2": "def" }
103 |           - { "id": 1, "id_2": 23, "col1": "hij", "col2": "klm" }
104 |           - { "id": 3, "id_2": 4, "col1": "nop", "col2": "qrs" }        
105 |     expect:
106 |       rows:
107 |         - {"dbt_audit_row_status": 'identical', 'id': 12, "id_2": 3, "dbt_audit_num_rows_in_status": 3}
108 |         - {"dbt_audit_row_status": 'identical', 'id': 1, "id_2": 23, "dbt_audit_num_rows_in_status": 3}
109 |         - {"dbt_audit_row_status": 'identical', 'id': 3, "id_2": 4, "dbt_audit_num_rows_in_status": 3}
110 | 
111 |   - name: compare_classify_identical_tables_single_null_pk
112 |     model: unit_compare_classify
113 |     description: "`nonunique_pk` status checks whether a PK is unique. It's intended to avoid arbitrary comparisons, not protect against null records (that's what constraints or tests are for)."
114 |     
115 |     given:
116 |       - input: ref('unit_test_model_a')
117 |         rows:
118 |           - { "id": , "col1": "abc", "col2": "def" }
119 |           - { "id": 2, "col1": "hij", "col2": "klm" }
120 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
121 |       - input: ref('unit_test_model_b')
122 |         rows:
123 |           - { "id": , "col1": "abc", "col2": "def" }
124 |           - { "id": 2, "col1": "hij", "col2": "klm" }
125 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
126 |         
127 |     expect:
128 |       rows:
129 |         - {"dbt_audit_row_status": 'identical', 'id': , dbt_audit_num_rows_in_status: 3}
130 |         - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
131 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
132 | 
133 |     overrides:
134 |       vars:
135 |         columns_var: ['id', 'col1', 'col2']
136 |         event_time_var:
137 |         primary_key_columns_var: ['id']
138 |     config:
139 |       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
140 | 
141 |   - name: compare_classify_identical_tables_multiple_null_pk
142 |     model: unit_compare_classify
143 |     
144 |     given:
145 |       - input: ref('unit_test_model_a')
146 |         rows:
147 |           - { "id": , "col1": "abc", "col2": "def" }
148 |           - { "id": , "col1": "hij", "col2": "klm" }
149 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
150 |       - input: ref('unit_test_model_b')
151 |         rows:
152 |           - { "id": , "col1": "abc", "col2": "def" }
153 |           - { "id": , "col1": "hij", "col2": "klm" }
154 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
155 |         
156 |     expect:
157 |       rows:
158 |         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
159 |         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 2}
160 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
161 | 
162 |     overrides:
163 |       vars:
164 |         columns_var: ['id', 'col1', 'col2']
165 |         event_time_var:
166 |         primary_key_columns_var: ['id']
167 |     config:
168 |       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
169 | 
170 |   - name: compare_classify_identical_tables_multi_null_pk_dupe_rows
171 |     description: All rows with a null ID are identical. They should be returned as individual rows instead of being combined
172 |     model: unit_compare_classify
173 |     
174 |     given:
175 |       - input: ref('unit_test_model_a')
176 |         rows:
177 |           - { "id": , "col1": "abc", "col2": "def" }
178 |           - { "id": , "col1": "abc", "col2": "def" }
179 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
180 |       - input: ref('unit_test_model_b')
181 |         rows:
182 |           - { "id": , "col1": "abc", "col2": "def" }
183 |           - { "id": , "col1": "abc", "col2": "def" }
184 |           - { "id": , "col1": "abc", "col2": "def" }
185 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
186 |         
187 |     expect:
188 |       rows:
189 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 1}
190 |         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
191 |         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
192 |         - {"dbt_audit_row_status": 'nonunique_pk', 'id': , dbt_audit_num_rows_in_status: 3}
193 | 
194 |     overrides:
195 |       vars:
196 |         columns_var: ['id', 'col1', 'col2']
197 |         event_time_var:
198 |         primary_key_columns_var: ['id']
199 |     config:
200 |       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-redshift/issues/821
201 | 
202 |   - name: compare_classify_all_statuses_different_column_set
203 |     model: unit_compare_classify
204 |     overrides:
205 |       vars:
206 |         primary_key_columns_var: ['id']
207 |         columns_var: ['id', 'col1']
208 |         event_time_var:
209 |     given:
210 |       - input: ref('unit_test_model_a')
211 |         rows:
212 |           - { "id": 1, "col1": "abc", "col2": "def" }
213 |           - { "id": 2, "col1": "hij", "col2": "klm" }
214 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
215 |       - input: ref('unit_test_model_b')
216 |         rows:
217 |           - { "id": 1, "col1": "abc" }
218 |           - { "id": 2, "col1": "ddd" }
219 |           - { "id": 4, "col1": "nop" }
220 |         
221 |     expect:
222 |       rows:
223 |         - {"dbt_audit_row_status": 'added', 'id': 4, dbt_audit_num_rows_in_status: 1}
224 |         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 1}
225 |         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
226 |         - {"dbt_audit_row_status": 'modified', 'id': 2, dbt_audit_num_rows_in_status: 1}
227 |         - {"dbt_audit_row_status": 'removed', 'id': 3, dbt_audit_num_rows_in_status: 1}
228 |     config:
229 |       tags: "{{ 'temporary_skip' if (target.type in ['redshift']) else 'runnable' }}" #https://github.com/dbt-labs/dbt-core/issues/10167
230 | 
231 |   - name: compare_classify_identical_tables_without_pk_in_cols_list
232 |     model: unit_compare_classify
233 |     
234 |     given:
235 |       - input: ref('unit_test_model_a')
236 |         rows:
237 |           - { "id": 1, "col1": "abc", "col2": "def" }
238 |           - { "id": 2, "col1": "hij", "col2": "klm" }
239 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
240 |       - input: ref('unit_test_model_b')
241 |         rows:
242 |           - { "id": 1, "col1": "abc", "col2": "def" }
243 |           - { "id": 2, "col1": "hij", "col2": "klm" }
244 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
245 |         
246 |     expect:
247 |       rows:
248 |         - {"dbt_audit_row_status": 'identical', 'id': 1, dbt_audit_num_rows_in_status: 3}
249 |         - {"dbt_audit_row_status": 'identical', 'id': 3, dbt_audit_num_rows_in_status: 3}
250 |         - {"dbt_audit_row_status": 'identical', 'id': 2, dbt_audit_num_rows_in_status: 3}
251 | 
252 |     overrides:
253 |       vars:
254 |         columns_var: ['col1', 'col2']
255 |         event_time_var:
256 |         primary_key_columns_var: ['id']
257 | 


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.sql:
--------------------------------------------------------------------------------
1 | {{ 
2 |     audit_helper.compare_and_classify_query_results(
3 |         "select * from " ~ ref('unit_test_struct_model_a') ~ " where 1=1",
4 |         "select * from " ~ ref('unit_test_struct_model_b') ~ " where 1=1",
5 |         primary_key_columns=var('primary_key_columns_var'),
6 |         columns=var('columns_var'),
7 |         event_time=var('event_time_var')
8 |     ) 
9 | }}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_classify_struct.yml:
--------------------------------------------------------------------------------
  1 | unit_tests:
  2 |   - name: compare_classify_simple_struct
  3 |     model: unit_compare_classify_struct
  4 |     given:
  5 |       - input: ref('unit_test_struct_model_a')
  6 |         format: sql
  7 |         fixture: simple_struct
  8 |       - input: ref('unit_test_struct_model_b')
  9 |         format: sql
 10 |         fixture: simple_struct
 11 |     expect:
 12 |       rows:
 13 |         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
 14 |     overrides:
 15 |       vars:
 16 |         columns_var: ['id', 'col1', 'col2']
 17 |         event_time_var:
 18 |         primary_key_columns_var: ['id']
 19 | 
 20 |   - name: unit_compare_classify_struct_identical_values_different_order
 21 |     model: unit_compare_classify_struct
 22 |     description: Objects' keys are generally sorted alphabetically, so sort order is ignored.
 23 |     given:
 24 |       - input: ref('unit_test_struct_model_a')
 25 |         format: sql
 26 |         fixture: simple_struct
 27 |       - input: ref('unit_test_struct_model_b')
 28 |         format: sql
 29 |         fixture: simple_struct_different_order
 30 |     expect:
 31 |       rows:
 32 |         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
 33 |     overrides:
 34 |       vars:
 35 |         columns_var: ['id', 'col1', 'col2']
 36 |         event_time_var:
 37 |         primary_key_columns_var: ['id']
 38 |     config:
 39 |       #Databricks cares about the order and considers it a difference. We're not trying to have identical behaviour across warehouses so that's OK.
 40 |       tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}"
 41 | 
 42 |   - name: unit_compare_classify_struct_identical_values_different_order_dbx
 43 |     model: unit_compare_classify_struct
 44 |     description: Most platforms don't care about sort order. Databricks does.
 45 |     given:
 46 |       - input: ref('unit_test_struct_model_a')
 47 |         format: sql
 48 |         fixture: simple_struct
 49 |       - input: ref('unit_test_struct_model_b')
 50 |         format: sql
 51 |         fixture: simple_struct_different_order
 52 |     expect:
 53 |       rows:
 54 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
 55 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
 56 |     overrides:
 57 |       vars:
 58 |         columns_var: ['id', 'col1', 'col2']
 59 |         event_time_var:
 60 |         primary_key_columns_var: ['id']
 61 |     config:
 62 |       #Only for databricks
 63 |       tags: "{{ 'skip' if (target.type not in ['databricks']) else 'runnable' }}"
 64 | 
 65 |   - name: unit_compare_classify_struct_removed_key
 66 |     model: unit_compare_classify_struct
 67 |     given:
 68 |       - input: ref('unit_test_struct_model_a')
 69 |         format: sql
 70 |         fixture: simple_struct
 71 |       - input: ref('unit_test_struct_model_b')
 72 |         format: sql
 73 |         fixture: simple_struct_removed_key
 74 |     expect:
 75 |       rows:
 76 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
 77 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
 78 |     overrides:
 79 |       vars:
 80 |         columns_var: ['id', 'col1', 'col2']
 81 |         event_time_var:
 82 |         primary_key_columns_var: ['id']
 83 |     # config:
 84 |     #   tags: "{{ 'skip' if (target.type in ['databricks']) else 'runnable' }}" #Can't do set operations on even simple JSON cols
 85 | 
 86 |   - name: compare_classify_complex_struct
 87 |     model: unit_compare_classify_struct
 88 |     given:
 89 |       - input: ref('unit_test_struct_model_a')
 90 |         format: sql
 91 |         fixture: complex_struct
 92 |       - input: ref('unit_test_struct_model_b')
 93 |         format: sql
 94 |         fixture: complex_struct
 95 |     expect:
 96 |       rows:
 97 |         - {"id": 1, "dbt_audit_row_status": "identical", "dbt_audit_num_rows_in_status": 1}
 98 |     overrides:
 99 |       vars:
100 |         columns_var: ['id', 'col1', 'col2']
101 |         event_time_var:
102 |         primary_key_columns_var: ['id']
103 |     # config:
104 |     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
105 | 
106 |   - name: compare_classify_complex_struct_different_values
107 |     model: unit_compare_classify_struct
108 |     given:
109 |       - input: ref('unit_test_struct_model_a')
110 |         format: sql
111 |         fixture: complex_struct
112 |       - input: ref('unit_test_struct_model_b')
113 |         format: sql
114 |         fixture: complex_struct_different_value
115 |           
116 |     expect:
117 |       rows:
118 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
119 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
120 |     overrides:
121 |       vars:
122 |         columns_var: ['id', 'col1', 'col2']
123 |         event_time_var:
124 |         primary_key_columns_var: ['id']
125 |     # config:
126 |     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
127 | 
128 |   - name: unit_compare_classify_complex_struct_identical_values_different_order
129 |     model: unit_compare_classify_struct
130 |     description: Snowflake sorts objects' keys alphabetically, but respects the order items are added to arrays so differences are detected.
131 |     given:
132 |       - input: ref('unit_test_struct_model_a')
133 |         format: sql
134 |         fixture: complex_struct
135 |       - input: ref('unit_test_struct_model_b')
136 |         format: sql
137 |         fixture: complex_struct_different_order
138 |     expect:
139 |       rows:
140 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
141 |         - {"id": 1, "dbt_audit_row_status": "modified", "dbt_audit_num_rows_in_status": 1}
142 |     overrides:
143 |       vars:
144 |         columns_var: ['id', 'col1', 'col2']
145 |         event_time_var:
146 |         primary_key_columns_var: ['id']
147 |     # config:
148 |     #   tags: "{{ 'skip' if (target.type in ['redshift', 'databricks']) else 'runnable' }}" #haven't ported these to be multi-warehouse yet
149 | 


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_queries.sql:
--------------------------------------------------------------------------------
1 | 
2 | {{ 
3 |     audit_helper.compare_queries(
4 |         "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
5 |         "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
6 |         summarize = var('compare_queries_summarize')
7 |     ) 
8 | }}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_queries.yml:
--------------------------------------------------------------------------------
 1 | unit_tests:
 2 |   - name: identical_records_compare_queries
 3 |     model: unit_compare_queries
 4 |     description: The world's most basic unit test. 
 5 | 
 6 |     given:
 7 |       - input: ref('unit_test_model_a')
 8 |         rows:
 9 |           - { "id": 1, "col1": "abc", "col2": "def" }
10 |           - { "id": 2, "col1": "hij", "col2": "klm" }
11 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
12 |       - input: ref('unit_test_model_b')
13 |         rows:
14 |           - { "id": 1, "col1": "abc", "col2": "def" }
15 |           - { "id": 2, "col1": "hij", "col2": "klm" }
16 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
17 |     
18 |     expect:
19 |       rows:
20 |         - {"in_a": true, "in_b": true}
21 | 
22 |     overrides:
23 |       vars:
24 |         compare_queries_summarize: true
25 | 
26 |   - name: identical_records_compare_queries_no_summarize
27 |     model: unit_compare_queries
28 |     description: The world's second most basic unit test.
29 | 
30 |     given:
31 |       - input: ref('unit_test_model_a')
32 |         rows:
33 |           - { "id": 1, "col1": "abc", "col2": "def" }
34 |           - { "id": 2, "col1": "hij", "col2": "klm" }
35 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
36 |       - input: ref('unit_test_model_b')
37 |         rows:
38 |           - { "id": 1, "col1": "abc", "col2": "def" }
39 |           - { "id": 2, "col1": "hij", "col2": "klm" }
40 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
41 |         
42 |     expect:
43 |       rows: []
44 | 
45 |     overrides:
46 |       vars:
47 |         compare_queries_summarize: false
48 | 


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.sql:
--------------------------------------------------------------------------------
 1 | {% set pk_cols = var('primary_key_columns_var') %}
 2 | {% set cols = var('columns_var') %}
 3 | 
 4 | {% if target.type == 'snowflake' and flags.WHICH == 'run' %}
 5 |     {% set pk_cols = pk_cols | map("upper") | list %}
 6 |     {% set cols = cols | map("upper") | list %}
 7 | {% endif %}
 8 | 
 9 | {{ 
10 |     audit_helper.compare_which_query_columns_differ(
11 |         a_query = "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
12 |         b_query = "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
13 |         primary_key_columns = pk_cols, 
14 |         columns = cols,
15 |         event_time = var('event_time_var')
16 |     )
17 | }}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_compare_which_query_columns_differ.yml:
--------------------------------------------------------------------------------
  1 | unit_tests:
  2 |   - name: compare_cols_identical_tables
  3 |     model: unit_compare_which_query_columns_differ
  4 |     
  5 |     given:
  6 |       - input: ref('unit_test_model_a')
  7 |         rows:
  8 |           - { "id": 1, "col1": "abc", "col2": "def" }
  9 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 10 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 11 |       - input: ref('unit_test_model_b')
 12 |         rows:
 13 |           - { "id": 1, "col1": "abc", "col2": "def" }
 14 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 15 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 16 |         
 17 |     expect:
 18 |       rows:
 19 |         - {"column_name": 'id', 'has_difference': false}
 20 |         - {"column_name": 'col1', 'has_difference': false}
 21 |         - {"column_name": 'col2', 'has_difference': false}
 22 | 
 23 |     overrides:
 24 |       vars:
 25 |         columns_var: ['id', 'col1', 'col2']
 26 |         event_time_var:
 27 |         primary_key_columns_var: ['id']
 28 |     config:
 29 |       tags: "{{ 'skip' if (target.type in ['snowflake']) else 'runnable' }}" #Case sensitivity 
 30 | 
 31 |   - name: compare_cols_identical_tables_event_time_filter
 32 |     model: unit_compare_which_query_columns_differ
 33 |     overrides:
 34 |       vars:
 35 |         columns_var: ['id', 'col1', 'col2', 'created_at']
 36 |         event_time_var: 'created_at'
 37 |         primary_key_columns_var: ['id']
 38 |       macros: 
 39 |         audit_helper._get_comparison_bounds:
 40 |           "min_event_time": "2024-01-02"
 41 |           "max_event_time": "2024-01-03"
 42 |           "event_time": 'created_at'
 43 |       
 44 |     given:
 45 |       - input: ref('unit_test_model_a')
 46 |         rows:
 47 |           - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
 48 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
 49 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
 50 |       - input: ref('unit_test_model_b')
 51 |         rows:
 52 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
 53 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
 54 |         
 55 |     expect:
 56 |       rows:
 57 |         - {"column_name": 'id', "has_difference": false}
 58 |         - {"column_name": 'col1', "has_difference": false}
 59 |         - {"column_name": 'col2', "has_difference": false}
 60 |         - {"column_name": 'created_at', "has_difference": false}
 61 |     config:
 62 |       tags: "{{ 'skip' if (target.type in ['snowflake']) else 'runnable' }}" #Case sensitivity 
 63 | 
 64 |   - name: compare_cols_identical_tables_snowflake
 65 |     model: unit_compare_which_query_columns_differ
 66 |     
 67 |     given:
 68 |       - input: ref('unit_test_model_a')
 69 |         rows:
 70 |           - { "id": 1, "col1": "abc", "col2": "def" }
 71 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 72 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 73 |       - input: ref('unit_test_model_b')
 74 |         rows:
 75 |           - { "id": 1, "col1": "abc", "col2": "def" }
 76 |           - { "id": 2, "col1": "hij", "col2": "klm" }
 77 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
 78 |         
 79 |     expect:
 80 |       rows:
 81 |         - {"column_name": 'ID', 'has_difference': false}
 82 |         - {"column_name": 'COL1', 'has_difference': false}
 83 |         - {"column_name": 'COL2', 'has_difference': false}
 84 | 
 85 |     overrides:
 86 |       vars:
 87 |         columns_var: ['ID', 'COL1', 'COL2']
 88 |         event_time_var:
 89 |         primary_key_columns_var: ['ID']
 90 |     config:
 91 |       tags: "{{ 'skip' if (target.type not in ['snowflake']) else 'runnable' }}" #Case sensitivity 
 92 | 
 93 |   - name: compare_cols_identical_tables_event_time_filter_snowflake
 94 |     model: unit_compare_which_query_columns_differ
 95 |     overrides:
 96 |       vars:
 97 |         columns_var: ['ID', 'COL1', 'COL2', 'CREATED_AT']
 98 |         event_time_var: 'CREATED_AT'
 99 |         primary_key_columns_var: ['ID']
100 |       macros: 
101 |         audit_helper._get_comparison_bounds:
102 |           "min_event_time": "2024-01-02"
103 |           "max_event_time": "2024-01-03"
104 |           "event_time": 'created_at'
105 |       
106 |     given:
107 |       - input: ref('unit_test_model_a')
108 |         rows:
109 |           - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
110 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
111 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
112 |       - input: ref('unit_test_model_b')
113 |         rows:
114 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
115 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
116 |         
117 |     expect:
118 |       rows:
119 |         - {"column_name": 'ID', "has_difference": false}
120 |         - {"column_name": 'COL1', "has_difference": false}
121 |         - {"column_name": 'COL2', "has_difference": false}
122 |         - {"column_name": 'CREATED_AT', "has_difference": false}
123 |     config:
124 |       tags: "{{ 'skip' if (target.type not in ['snowflake']) else 'runnable' }}" #Case sensitivity 


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.sql:
--------------------------------------------------------------------------------
 1 | {% set results = 
 2 |     audit_helper._ensure_all_pks_are_in_column_set(
 3 |         primary_key_columns=var('primary_key_columns_var', ['a_column_with_a_large_unwieldy_name']),
 4 |         columns=var('columns_var', ['b_column_with_a_large_unwieldy_name']),
 5 |     )
 6 | %}
 7 | 
 8 | {% if (var('primary_key_columns_var') | length == 0) and (var('columns_var') | length == 0) %}
 9 | -- need to still provide a table shape
10 | select 'abcdefabcdef' as col, 1 as row_index
11 | limit 0
12 | {% endif %}
13 | 
14 | {% for result in results %}
15 |     select '{{ result }}' as col, {{ loop.index }} as row_index
16 |     {% if not loop.last %}
17 |     union all 
18 |     {% endif %}
19 | {% endfor %}


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_ensure_all_pks_are_in_column_set.yml:
--------------------------------------------------------------------------------
  1 | unit_tests:
  2 |   - name: ensure_all_pks_in_columns
  3 |     model: unit_ensure_all_pks_are_in_column_set
  4 |     given: []
  5 |     overrides:
  6 |       vars:
  7 |         primary_key_columns_var: ['pk1', 'pk2']
  8 |         columns_var: ['pk1', 'pk2', 'column_a', 'column_b']
  9 |         
 10 |     expect:
 11 |       rows:
 12 |         - {"col": 'pk1', "row_index": 1}
 13 |         - {"col": 'pk2', "row_index": 2}
 14 |         - {"col": 'column_a', "row_index": 3}
 15 |         - {"col": 'column_b', "row_index": 4}
 16 |   
 17 |   - name: ensure_all_pks_in_columns_pks_at_end
 18 |     model: unit_ensure_all_pks_are_in_column_set
 19 |     description: PKs are specified in `columns` so should be at end of list
 20 |     given: []
 21 |     overrides:
 22 |       vars:
 23 |         primary_key_columns_var: ['pk1', 'pk2']
 24 |         columns_var: ['column_a', 'column_b', 'pk1', 'pk2']
 25 |         
 26 |     expect:
 27 |       rows:
 28 |         - {"col": 'column_a', "row_index": 1}
 29 |         - {"col": 'column_b', "row_index": 2}
 30 |         - {"col": 'pk1', "row_index": 3}
 31 |         - {"col": 'pk2', "row_index": 4}
 32 |       
 33 |   - name: ensure_all_pks_in_columns_one_missing_pk
 34 |     model: unit_ensure_all_pks_are_in_column_set
 35 |     description: PK specified in `columns` should be at end of list, missing PK will be added at front
 36 |     given: []
 37 |     overrides:
 38 |       vars:
 39 |         primary_key_columns_var: ['pk1', 'pk2']
 40 |         columns_var: ['column_a', 'column_b', 'pk2']
 41 |         
 42 |     expect:
 43 |       rows:
 44 |         - {"col": 'pk1', "row_index": 1}
 45 |         - {"col": 'column_a', "row_index": 2}
 46 |         - {"col": 'column_b', "row_index": 3}
 47 |         - {"col": 'pk2', "row_index": 4}
 48 |       
 49 |   - name: ensure_all_pks_in_columns_empty_sets
 50 |     model: unit_ensure_all_pks_are_in_column_set
 51 |     given: []
 52 |     overrides:
 53 |       vars:
 54 |         primary_key_columns_var: []
 55 |         columns_var: []
 56 |         
 57 |     expect:
 58 |       rows: []
 59 | 
 60 |   - name: ensure_all_pks_in_columns_no_pks
 61 |     model: unit_ensure_all_pks_are_in_column_set
 62 |     given: []
 63 |     overrides:
 64 |       vars:
 65 |         primary_key_columns_var: []
 66 |         columns_var: ['column_a', 'column_b']
 67 |         
 68 |     expect:
 69 |       rows:
 70 |         - {"col": 'column_a', "row_index": 1}
 71 |         - {"col": 'column_b', "row_index": 2}
 72 | 
 73 |   - name: ensure_all_pks_in_columns_no_cols
 74 |     model: unit_ensure_all_pks_are_in_column_set
 75 |     given: []
 76 |     overrides:
 77 |       vars:
 78 |         primary_key_columns_var: ['pk1', 'pk2']
 79 |         columns_var: []
 80 |         
 81 |     expect:
 82 |       rows:
 83 |         - {"col": 'pk1', "row_index": 1}
 84 |         - {"col": 'pk2', "row_index": 2}
 85 | 
 86 |   - name: ensure_all_pks_in_columns_caps_pk
 87 |     model: unit_ensure_all_pks_are_in_column_set
 88 |     given: []
 89 |     overrides:
 90 |       vars:
 91 |         primary_key_columns_var: ['pk2', 'PK1']
 92 |         columns_var: ['pk1', 'pk2', 'column_a', 'column_b']
 93 |         
 94 |     expect:
 95 |       rows:
 96 |         - {"col": 'pk1', "row_index": 1}
 97 |         - {"col": 'pk2', "row_index": 2}
 98 |         - {"col": 'column_a', "row_index": 3}
 99 |         - {"col": 'column_b', "row_index": 4}
100 |   
101 |   - name: ensure_all_pks_in_columns_caps_col
102 |     model: unit_ensure_all_pks_are_in_column_set
103 |     given: []
104 |     overrides:
105 |       vars:
106 |         primary_key_columns_var: ['pk2', 'pk1']
107 |         columns_var: ['pk1', 'pk2', 'COLUMN_A', 'column_b']
108 |         
109 |     expect:
110 |       rows:
111 |         - {"col": 'pk1', "row_index": 1}
112 |         - {"col": 'pk2', "row_index": 2}
113 |         - {"col": 'COLUMN_A', "row_index": 3}
114 |         - {"col": 'column_b', "row_index": 4}
115 |   
116 |   - name: ensure_all_pks_in_columns_caps_pk_in_both
117 |     model: unit_ensure_all_pks_are_in_column_set
118 |     given: []
119 |     overrides:
120 |       vars:
121 |         primary_key_columns_var: ['pk2', 'PK1']
122 |         columns_var: ['PK1', 'pk2', 'column_a', 'column_b']
123 |         
124 |     expect:
125 |       rows:
126 |         - {"col": 'PK1', "row_index": 1}
127 |         - {"col": 'pk2', "row_index": 2}
128 |         - {"col": 'column_a', "row_index": 3}
129 |         - {"col": 'column_b', "row_index": 4}
130 |   


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.sql:
--------------------------------------------------------------------------------
 1 | {{ config(tags=['skip' if (target.type in ['redshift', 'postgres', 'databricks']) else 'runnable']) }}
 2 | 
 3 | {{ 
 4 |     audit_helper.quick_are_queries_identical(
 5 |         "select * from " ~ ref('unit_test_model_a') ~ " where 1=1",
 6 |         "select * from " ~ ref('unit_test_model_b') ~ " where 1=1",
 7 |         columns=var('quick_are_queries_identical_cols'),
 8 |         event_time=var('event_time_var')
 9 |     ) 
10 | }}  


--------------------------------------------------------------------------------
/integration_tests/models/unit_test_wrappers/unit_quick_are_queries_identical.yml:
--------------------------------------------------------------------------------
 1 | unit_tests:
 2 |   - name: quick_are_queries_identical_identical_tables
 3 |     model: unit_quick_are_queries_identical
 4 |     
 5 |     given:
 6 |       - input: ref('unit_test_model_a')
 7 |         rows:
 8 |           - { "id": 1, "col1": "abc", "col2": "def" }
 9 |           - { "id": 2, "col1": "hij", "col2": "klm" }
10 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
11 |       - input: ref('unit_test_model_b')
12 |         rows:
13 |           - { "id": 1, "col1": "abc", "col2": "def" }
14 |           - { "id": 2, "col1": "hij", "col2": "klm" }
15 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
16 |         
17 |     expect:
18 |       rows:
19 |         - {"are_tables_identical": true}
20 | 
21 |     overrides:
22 |       vars:
23 |         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
24 |         event_time_var:
25 | 
26 |   - name: quick_are_queries_identical_identical_tables_event_time_filter
27 |     model: unit_quick_are_queries_identical
28 |     overrides:
29 |       vars:
30 |         quick_are_queries_identical_cols: ['id', 'col1', 'col2', 'created_at']
31 |         event_time_var: 'created_at'
32 |       macros: 
33 |         audit_helper._get_comparison_bounds:
34 |           "min_event_time": "2024-01-02"
35 |           "max_event_time": "2024-01-03"
36 |           "event_time": 'created_at'
37 |       
38 |     given:
39 |       - input: ref('unit_test_model_a')
40 |         rows:
41 |           - { "id": 1, "col1": "abc", "col2": "def", "created_at": '2024-01-01' }
42 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
43 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
44 |       - input: ref('unit_test_model_b')
45 |         rows:
46 |           - { "id": 2, "col1": "hij", "col2": "klm", "created_at": '2024-01-02' }
47 |           - { "id": 3, "col1": "nop", "col2": "qrs", "created_at": '2024-01-03' }
48 |         
49 |     expect:
50 |       rows:
51 |         - {"are_tables_identical": true}
52 | 
53 |   - name: quick_are_queries_identical_differences
54 |     model: unit_quick_are_queries_identical
55 |     overrides:
56 |       vars:
57 |         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
58 |         event_time_var:
59 |     given:
60 |       - input: ref('unit_test_model_a')
61 |         rows:
62 |           - { "id": 1, "col1": "abc", "col2": "def" }
63 |           - { "id": 2, "col1": "hij", "col2": "klm" }
64 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
65 |       - input: ref('unit_test_model_b')
66 |         rows:
67 |           - { "id": 1, "col1": "abc", "col2": "def" }
68 |           - { "id": 2, "col1": "changed", "col2": "values" }
69 |           - { "id": 4, "col1": "nop", "col2": "qrs" }
70 |         
71 |     expect:
72 |       rows:
73 |         - {"are_tables_identical": false}
74 | 
75 |   - name: quick_are_queries_identical_identical_tables_with_null_pks
76 |     model: unit_quick_are_queries_identical
77 |     
78 |     given:
79 |       - input: ref('unit_test_model_a')
80 |         rows:
81 |           - { "id":, "col1": "abc", "col2": "def" }
82 |           - { "id":, "col1": "hij", "col2": "klm" }
83 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
84 |       - input: ref('unit_test_model_b')
85 |         rows:
86 |           - { "id":, "col1": "abc", "col2": "def" }
87 |           - { "id":, "col1": "hij", "col2": "klm" }
88 |           - { "id": 3, "col1": "nop", "col2": "qrs" }
89 |         
90 |     expect:
91 |       rows:
92 |         - {"are_tables_identical": true}
93 | 
94 |     overrides:
95 |       vars:
96 |         quick_are_queries_identical_cols: ['id', 'col1', 'col2']
97 |         event_time_var:
98 | 


--------------------------------------------------------------------------------
/integration_tests/package-lock.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - local: ../
3 | - package: dbt-labs/dbt_utils
4 |   version: 1.1.1
5 | sha1_hash: de2deba3d66ce03d8c02949013650cc9b94f6030
6 | 


--------------------------------------------------------------------------------
/integration_tests/packages.yml:
--------------------------------------------------------------------------------
1 | 
2 | packages:
3 |     - local: ../
4 | 


--------------------------------------------------------------------------------
/integration_tests/profiles.yml:
--------------------------------------------------------------------------------
 1 | integration_tests:
 2 |   target: postgres
 3 |   outputs:
 4 |     postgres:
 5 |       type: "postgres"
 6 |       host: "{{ env_var('POSTGRES_HOST') }}"
 7 |       user: "{{ env_var('POSTGRES_USER') }}"
 8 |       pass: "{{ env_var('DBT_ENV_SECRET_POSTGRES_PASS') }}"
 9 |       port: "{{ env_var('POSTGRES_PORT') | as_number }}"
10 |       dbname: "{{ env_var('POSTGRES_DATABASE') }}"
11 |       schema: "{{ env_var('POSTGRES_SCHEMA') }}"
12 |       threads: 5
13 | 
14 |     redshift:
15 |       type: redshift
16 |       host: "{{ env_var('REDSHIFT_TEST_HOST') }}"
17 |       user: "{{ env_var('REDSHIFT_TEST_USER') }}"
18 |       pass: "{{ env_var('REDSHIFT_TEST_PASS') }}"
19 |       dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
20 |       port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
21 |       schema: audit_helper_integration_tests_redshift
22 |       threads: 8
23 | 
24 |     bigquery:
25 |       type: bigquery
26 |       method: service-account
27 |       keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
28 |       project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
29 |       schema: audit_helper_integration_tests_bigquery
30 |       threads: 8
31 | 
32 |     snowflake:
33 |       type: snowflake
34 |       account: "{{ env_var('SNOWFLAKE_TEST_ACCOUNT') }}"
35 |       user: "{{ env_var('SNOWFLAKE_TEST_USER') }}"
36 |       password: "{{ env_var('SNOWFLAKE_TEST_PASSWORD') }}"
37 |       role: "{{ env_var('SNOWFLAKE_TEST_ROLE') }}"
38 |       database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
39 |       warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
40 |       schema: audit_helper_integration_tests_snowflake
41 |       threads: 8
42 | 
43 |     databricks:
44 |       type: databricks
45 |       schema: dbt_project_evaluator_integration_tests_databricks
46 |       host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
47 |       http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
48 |       token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}"
49 |       threads: 10
50 |   
51 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_all_columns__albertsons_produce.csv:
--------------------------------------------------------------------------------
 1 | id,fruit,ripeness
 2 | 1,banana,yellow
 3 | 2,banana,brown
 4 | 3,banana,brown
 5 | 4,orange,green
 6 | 5,orange,orange
 7 | 6,,brown
 8 | 7,orange,orange
 9 | 9,apple,mushy
10 | 10,apple,


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_all_columns__albertsons_produce__concat_pk.csv:
--------------------------------------------------------------------------------
 1 | produce_category,id,produce,ripeness
 2 | vegetable,1,spinach,wilted
 3 | fruit,1,banana,yellow
 4 | fruit,2,banana,brown
 5 | fruit,3,banana,brown
 6 | fruit,4,orange,green
 7 | fruit,5,orange,orange
 8 | fruit,6,,brown
 9 | fruit,7,orange,orange
10 | fruit,9,apple,mushy
11 | fruit,10,apple,


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_all_columns__market_of_choice_produce.csv:
--------------------------------------------------------------------------------
1 | id,fruit,ripeness
2 | 1,banana,yellow
3 | 2,banana,green
4 | 3,banana,brown
5 | 4,orange,green
6 | 5,orange,orange
7 | 6,orange,brown
8 | 7,orange,
9 | 8,apple,mushy


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_all_columns__market_of_choice_produce__concat_pk.csv:
--------------------------------------------------------------------------------
 1 | produce_category,id,produce,ripeness
 2 | vegetable,1,spinach,wilted
 3 | fruit,1,banana,yellow
 4 | fruit,2,banana,green
 5 | fruit,3,banana,brown
 6 | fruit,4,orange,green
 7 | fruit,5,orange,orange
 8 | fruit,6,orange,brown
 9 | fruit,7,orange,
10 | fruit,8,apple,mushy


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_relation_columns_a.csv:
--------------------------------------------------------------------------------
1 | awesome_column,zany_column,brave_column,young_column,cool_column,xcellent_column
2 | testing_is_fun,2022-02-22,1234,9.8765,false,2020-01-01T21:08:17


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_relation_columns_b.csv:
--------------------------------------------------------------------------------
1 | magnificent_column,zany_column,brave_column,young_column,cool_column,xpeditionary_column,awesome_column
2 | 2022-02-22,my_string_here,1234,9.8765,true,2020-01-01T21:08:17,testing_is_fun


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_relations__a_relation.csv:
--------------------------------------------------------------------------------
1 | col_a,col_b
2 | 1,a
3 | 2,b
4 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_relations__b_relation.csv:
--------------------------------------------------------------------------------
1 | col_a,col_b
2 | 1,a
3 | 2,c
4 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_which_columns_differ_a.csv:
--------------------------------------------------------------------------------
1 | id,value_changes,becomes_null,becomes_not_null,does_not_change
2 | 1,pink,22,a,dave
3 | 2,blue,33,,dave
4 | 3,green,44,c,dave
5 | 4,yellow,55,d,dave


--------------------------------------------------------------------------------
/integration_tests/seeds/data_compare_which_columns_differ_b.csv:
--------------------------------------------------------------------------------
1 | id,value_changes,becomes_null,becomes_not_null,does_not_change
2 | 1,red,22,a,dave
3 | 2,blue,,b,dave
4 | 3,green,44,c,dave
5 | 4,yellow,55,d,dave


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_concat_pk_with_summary.csv:
--------------------------------------------------------------------------------
1 | column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
2 | ID,8,0,0,2,1,0
3 | PRODUCE,7,0,1,2,1,1
4 | PRODUCE_CATEGORY,8,0,0,2,1,0
5 | RIPENESS,6,1,1,2,1,2


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_concat_pk_without_summary.csv:
--------------------------------------------------------------------------------
 1 | primary_key,column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
 2 | 00f0200cfb8e8443dfa3566bd60170a7,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
 3 | 00f0200cfb8e8443dfa3566bd60170a7,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
 4 | 00f0200cfb8e8443dfa3566bd60170a7,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
 5 | 00f0200cfb8e8443dfa3566bd60170a7,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
 6 | 0262eff11e473d76cf5e71ba1bb9adde,ID,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
 7 | 0262eff11e473d76cf5e71ba1bb9adde,PRODUCE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
 8 | 0262eff11e473d76cf5e71ba1bb9adde,PRODUCE_CATEGORY,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
 9 | 0262eff11e473d76cf5e71ba1bb9adde,RIPENESS,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
10 | 231ee7461c22557b0b811bc510df9c3f,PRODUCE_CATEGORY,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
11 | 231ee7461c22557b0b811bc510df9c3f,PRODUCE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
12 | 231ee7461c22557b0b811bc510df9c3f,RIPENESS,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE
13 | 231ee7461c22557b0b811bc510df9c3f,ID,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE
14 | 51f71ec6b715b6071a0b6a9647bce8a7,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
15 | 51f71ec6b715b6071a0b6a9647bce8a7,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
16 | 51f71ec6b715b6071a0b6a9647bce8a7,PRODUCE,FALSE,FALSE,TRUE,FALSE,FALSE,TRUE
17 | 51f71ec6b715b6071a0b6a9647bce8a7,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
18 | 559c0b59e42ff35a37de91977b660800,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
19 | 559c0b59e42ff35a37de91977b660800,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
20 | 559c0b59e42ff35a37de91977b660800,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
21 | 559c0b59e42ff35a37de91977b660800,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
22 | 5f3bd1bba6beca5a23d4cde34a9bd96b,RIPENESS,FALSE,TRUE,FALSE,FALSE,FALSE,TRUE
23 | 5f3bd1bba6beca5a23d4cde34a9bd96b,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
24 | 5f3bd1bba6beca5a23d4cde34a9bd96b,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
25 | 5f3bd1bba6beca5a23d4cde34a9bd96b,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
26 | a971c9a048ccd0fd4d282cc2a55734bc,PRODUCE_CATEGORY,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE
27 | a971c9a048ccd0fd4d282cc2a55734bc,RIPENESS,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE
28 | a971c9a048ccd0fd4d282cc2a55734bc,PRODUCE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE
29 | a971c9a048ccd0fd4d282cc2a55734bc,ID,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE
30 | bd483dcfa375c6fd78c89072de1eea20,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
31 | bd483dcfa375c6fd78c89072de1eea20,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
32 | bd483dcfa375c6fd78c89072de1eea20,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
33 | bd483dcfa375c6fd78c89072de1eea20,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
34 | cfd3543ee591403d825bf0a1618b1709,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
35 | cfd3543ee591403d825bf0a1618b1709,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
36 | cfd3543ee591403d825bf0a1618b1709,RIPENESS,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE
37 | cfd3543ee591403d825bf0a1618b1709,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
38 | eae3b305c437133aebdd66788f38e262,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
39 | eae3b305c437133aebdd66788f38e262,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
40 | eae3b305c437133aebdd66788f38e262,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
41 | eae3b305c437133aebdd66788f38e262,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
42 | f2448f021cb149747e9ada2531d5116d,PRODUCE_CATEGORY,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
43 | f2448f021cb149747e9ada2531d5116d,RIPENESS,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
44 | f2448f021cb149747e9ada2531d5116d,PRODUCE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE
45 | f2448f021cb149747e9ada2531d5116d,ID,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_where_clause.csv:
--------------------------------------------------------------------------------
 1 | primary_key,column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
 2 | 2,RIPENESS,false,false,false,false,false,true
 3 | 6,FRUIT,false,false,true,false,false,true
 4 | 7,RIPENESS,false,true,false,false,false,true
 5 | 8,ID,false,false,false,false,true,false
 6 | 8,FRUIT,false,false,false,false,true,false
 7 | 8,RIPENESS,false,false,false,false,true,false
 8 | 9,ID,false,false,false,true,false,false
 9 | 9,FRUIT,false,false,false,true,false,false
10 | 9,RIPENESS,false,false,false,true,false,false
11 | 10,ID,false,false,false,true,false,false
12 | 10,FRUIT,false,false,false,true,false,false
13 | 10,RIPENESS,false,false,true,true,false,false


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_with_summary.csv:
--------------------------------------------------------------------------------
1 | column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
2 | FRUIT,6,0,1,2,1,1
3 | ID,7,0,0,2,1,0
4 | RIPENESS,5,1,1,2,1,2


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_with_summary_and_exclude.csv:
--------------------------------------------------------------------------------
1 | column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
2 | FRUIT,6,0,1,2,1,1
3 | ID,7,0,0,2,1,0


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_all_columns_without_summary.csv:
--------------------------------------------------------------------------------
 1 | primary_key,column_name,perfect_match,null_in_a,null_in_b,missing_from_a,missing_from_b,conflicting_values
 2 | 1,ID,true,false,false,false,false,false
 3 | 2,ID,true,false,false,false,false,false
 4 | 3,ID,true,false,false,false,false,false
 5 | 4,ID,true,false,false,false,false,false
 6 | 5,ID,true,false,false,false,false,false
 7 | 6,ID,true,false,false,false,false,false
 8 | 7,ID,true,false,false,false,false,false
 9 | 8,ID,false,false,false,false,true,false
10 | 9,ID,false,false,false,true,false,false
11 | 10,ID,false,false,false,true,false,false
12 | 1,FRUIT,true,false,false,false,false,false
13 | 2,FRUIT,true,false,false,false,false,false
14 | 3,FRUIT,true,false,false,false,false,false
15 | 4,FRUIT,true,false,false,false,false,false
16 | 5,FRUIT,true,false,false,false,false,false
17 | 6,FRUIT,false,false,true,false,false,true
18 | 7,FRUIT,true,false,false,false,false,false
19 | 8,FRUIT,false,false,false,false,true,false
20 | 9,FRUIT,false,false,false,true,false,false
21 | 10,FRUIT,false,false,false,true,false,false
22 | 1,RIPENESS,true,false,false,false,false,false
23 | 2,RIPENESS,false,false,false,false,false,true
24 | 3,RIPENESS,true,false,false,false,false,false
25 | 4,RIPENESS,true,false,false,false,false,false
26 | 5,RIPENESS,true,false,false,false,false,false
27 | 6,RIPENESS,true,false,false,false,false,false
28 | 7,RIPENESS,false,true,false,false,false,true
29 | 8,RIPENESS,false,false,false,false,true,false
30 | 9,RIPENESS,false,false,false,true,false,false
31 | 10,RIPENESS,false,false,true,true,false,false


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_relation_columns.csv:
--------------------------------------------------------------------------------
1 | COLUMN_NAME,A_ORDINAL_POSITION,B_ORDINAL_POSITION,HAS_ORDINAL_POSITION_MATCH,HAS_DATA_TYPE_MATCH
2 | awesome_column,1,7,false,true
3 | magnificent_column,,1,false,false
4 | zany_column,2,2,true,false
5 | brave_column,3,3,true,true
6 | young_column,4,4,true,true
7 | cool_column,5,5,true,true
8 | xpeditionary_column,,6,false,false
9 | xcellent_column,6,,false,false


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_relations_with_exclude.csv:
--------------------------------------------------------------------------------
1 | in_a,in_b,count,percent_of_total
2 | True,True,2,100
3 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_relations_without_exclude.csv:
--------------------------------------------------------------------------------
1 | in_a,in_b,count,percent_of_total
2 | True,True,1,33.33
3 | True,False,1,33.33
4 | False,True,1,33.33
5 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_row_counts.csv:
--------------------------------------------------------------------------------
1 | relation_name,total_records
2 | a,2
3 | b,2


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_which_columns_differ.csv:
--------------------------------------------------------------------------------
1 | column_name,has_difference
2 | id,false
3 | value_changes,true
4 | becomes_null,true
5 | becomes_not_null,true
6 | does_not_change,false


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_which_columns_differ_exclude_cols.csv:
--------------------------------------------------------------------------------
1 | column_name,has_difference
2 | id,false
3 | value_changes,true
4 | becomes_not_null,true
5 | does_not_change,false


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_with_summary.csv:
--------------------------------------------------------------------------------
1 | in_a,in_b,count,percent_of_total
2 | True,True,1,33.33
3 | True,False,1,33.33
4 | False,True,1,33.33


--------------------------------------------------------------------------------
/integration_tests/seeds/expected_results__compare_without_summary.csv:
--------------------------------------------------------------------------------
1 | ﻿col_a,col_b,in_a,in_b
2 | 2,b,true,false
3 | 2,c,false,true


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/complex_struct.sql:
--------------------------------------------------------------------------------
1 | {% set json %}
2 |     '{"emails":["john.doe@example.com","john.d@example.com"],"phones":[{"number":"123-456-7890","type":"home"},{"number":"987-654-3210","type":"work"}]}'
3 | {% endset %}
4 | 
5 | select 
6 |     1 as id, 
7 |     'John Doe' as col1, 
8 |     {{ audit_helper_integration_tests._complex_json_function(json) }} as col2


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/complex_struct_different_order.sql:
--------------------------------------------------------------------------------
1 | {% set json %}
2 |     '{"emails":["john.doe@example.com","john.d@example.com"],"phones":[{"number":"987-654-3210","type":"work"}, {"number":"123-456-7890","type":"home"}]}'
3 | {% endset %}
4 | 
5 | select 
6 |     1 as id, 
7 |     'John Doe' as col1, 
8 |     {{ audit_helper_integration_tests._complex_json_function(json) }} as col2


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/complex_struct_different_value.sql:
--------------------------------------------------------------------------------
1 | {% set json %}
2 | '{"emails":["john.smith@example.com","john.s@example.com"],"phones":[{"number":"123-456-7890","type":"home"},{"number":"987-654-3210","type":"work"}]}'
3 | {% endset %}
4 | 
5 | select 
6 |     1 as id, 
7 |     'John Doe' as col1, 
8 |     {{ audit_helper_integration_tests._complex_json_function(json) }} as col2


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/simple_struct.sql:
--------------------------------------------------------------------------------
 1 | {% if target.name != 'redshift' %}
 2 | 
 3 | select 
 4 |     1 as id, 
 5 |     'John Doe' as col1, 
 6 |     {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
 7 | 
 8 | {% else %}
 9 | 
10 | select 
11 |   1 AS id, 
12 |   'John Doe' AS col1, 
13 |   json_parse('{"street": "123 Main St", "city": "Anytown", "state": "CA"}') AS col2
14 | {% endif %}


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/simple_struct_different_order.sql:
--------------------------------------------------------------------------------
 1 | {% if target.name != 'redshift' %}
 2 | 
 3 | select 
 4 |     1 as id, 
 5 |     'John Doe' as col1, 
 6 |     {{ audit_helper_integration_tests._basic_json_function() -}}( 'state', 'CA', 'street', '123 Main St', 'city', 'Anytown') as col2
 7 | 
 8 | {% else %}
 9 | 
10 | select 
11 |   1 AS id, 
12 |   'John Doe' AS col1, 
13 |   json_parse('{"state": "CA", "street": "123 Main St", "city": "Anytown"}') AS col2
14 | {% endif %}


--------------------------------------------------------------------------------
/integration_tests/tests/fixtures/simple_struct_removed_key.sql:
--------------------------------------------------------------------------------
 1 | {% if target.name != 'redshift' %}
 2 | 
 3 | select 
 4 |     1 as id, 
 5 |     'John Doe' as col1, 
 6 |     {{ audit_helper_integration_tests._basic_json_function() -}}('street', '123 Main St', 'state', 'CA') as col2
 7 | 
 8 | {% else %}
 9 | 
10 | select 
11 |   1 AS id, 
12 |   'John Doe' AS col1, 
13 |   json_parse('{"street": "123 Main St", "state": "CA"}') AS col2
14 | {% endif %}


--------------------------------------------------------------------------------
/macros/compare_all_columns.sql:
--------------------------------------------------------------------------------
  1 | {% macro compare_all_columns( a_relation, b_relation, primary_key,  exclude_columns=[],summarize=true ) -%}
  2 |   {{ return(adapter.dispatch('compare_all_columns', 'audit_helper')( a_relation, b_relation, primary_key, exclude_columns, summarize )) }}
  3 | {%- endmacro %}
  4 | 
  5 | {% macro default__compare_all_columns( a_relation, b_relation, primary_key, exclude_columns=[], summarize=true ) -%}
  6 | 
  7 |   {% set column_names = dbt_utils.get_filtered_columns_in_relation(from=a_relation, except=exclude_columns) %}
  8 | 
  9 |   {# We explictly select the primary_key and rename to support any sql as the primary_key -
 10 |   a column or concatenated columns. this assumes that a_relation and b_relation do not already 
 11 |   have a field named dbt_audit_helper_pk #}
 12 | 
 13 |   {% set a_query %}      
 14 |     select
 15 |       *,
 16 |       {{ primary_key }} as dbt_audit_helper_pk
 17 |     from {{ a_relation }}
 18 |   {% endset %}
 19 | 
 20 |   {% set b_query %}
 21 |     select
 22 |       *,
 23 |       {{ primary_key }} as dbt_audit_helper_pk
 24 |     from {{ b_relation }}
 25 |   {% endset %}
 26 | 
 27 |   {% for column_name in column_names %}
 28 | 
 29 |     {% set audit_query = audit_helper.compare_column_values_verbose(
 30 |       a_query=a_query,
 31 |       b_query=b_query,
 32 |       primary_key="dbt_audit_helper_pk",
 33 |       column_to_compare=column_name
 34 |     ) %}
 35 | 
 36 |     /*  Create a query combining results from all columns so that the user, or the 
 37 |     test suite, can examine all at once.
 38 |     */
 39 |     
 40 |     {% if loop.first %}
 41 | 
 42 |     /*  Create a CTE that wraps all the unioned subqueries that are created
 43 |         in this for loop
 44 |     */
 45 |       with main as ( 
 46 | 
 47 |     {% endif %}
 48 | 
 49 |     /*  There will be one audit_query subquery for each column
 50 |     */
 51 |     ( {{ audit_query }} )
 52 | 
 53 |     {% if not loop.last %}
 54 | 
 55 |       union all
 56 | 
 57 |     {% else %}
 58 | 
 59 |     ), 
 60 |     
 61 |       {%- if summarize %}
 62 | 
 63 |         final as (
 64 |           select
 65 |             upper(column_name) as column_name,
 66 |             sum(case when perfect_match then 1 else 0 end) as perfect_match,
 67 |             sum(case when null_in_a then 1 else 0 end) as null_in_a,
 68 |             sum(case when null_in_b then 1 else 0 end) as null_in_b,
 69 |             sum(case when missing_from_a then 1 else 0 end) as missing_from_a,
 70 |             sum(case when missing_from_b then 1 else 0 end) as missing_from_b,
 71 |             sum(case when conflicting_values then 1 else 0 end) as conflicting_values
 72 |           from main
 73 |           group by 1
 74 |           order by column_name
 75 |         )
 76 | 
 77 |       {%- else %}
 78 | 
 79 |         final as (
 80 |           select
 81 |             primary_key,           
 82 |             upper(column_name) as column_name,
 83 |             perfect_match,
 84 |             null_in_a,
 85 |             null_in_b,
 86 |             missing_from_a,
 87 |             missing_from_b,
 88 |             conflicting_values
 89 |           from main    
 90 |           order by primary_key
 91 |         )
 92 | 
 93 |       {%- endif %}
 94 | 
 95 |       select * from final
 96 |     
 97 |     {% endif %}
 98 | 
 99 |   {% endfor %}
100 |     
101 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/compare_and_classify_query_results.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_and_classify_query_results(a_query, b_query, primary_key_columns=[], columns=[], event_time=None, sample_limit=20) %}
 2 |     
 3 |     {% set columns = audit_helper._ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
 4 |     {% set joined_cols = columns | join(", ") %}
 5 | 
 6 |     {% if event_time %}
 7 |         {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
 8 |     {% endif %}
 9 | 
10 |     with 
11 | 
12 |     {{ audit_helper._generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props)}}
13 |     
14 |     ,
15 | 
16 |     all_records as (
17 | 
18 |         select
19 |             *,
20 |             true as dbt_audit_in_a,
21 |             true as dbt_audit_in_b
22 |         from a_intersect_b
23 | 
24 |         union all
25 | 
26 |         select
27 |             *,
28 |             true as dbt_audit_in_a,
29 |             false as dbt_audit_in_b
30 |         from a_except_b
31 | 
32 |         union all
33 | 
34 |         select
35 |             *,
36 |             false as dbt_audit_in_a,
37 |             true as dbt_audit_in_b
38 |         from b_except_a
39 | 
40 |     ),
41 | 
42 |     classified as (
43 |         select 
44 |             *,
45 |             {{ audit_helper._classify_audit_row_status() }} as dbt_audit_row_status
46 |         from all_records
47 |     ),
48 | 
49 |     final as (
50 |         select 
51 |             *,
52 |             {{ audit_helper._count_num_rows_in_status() }} as dbt_audit_num_rows_in_status,
53 |             -- using dense_rank so that modified rows (which have a full row for both the left and right side) both get picked up in the sample. 
54 |             -- For every other type this is equivalent to a row_number()
55 |             dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num) as dbt_audit_sample_number
56 |         from classified
57 |     )
58 | 
59 |     select * from final
60 |     {% if sample_limit %}
61 |         where dbt_audit_sample_number <= {{ sample_limit }}
62 |     {% endif %}
63 |     order by dbt_audit_row_status, dbt_audit_sample_number
64 | 
65 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/compare_and_classify_relation_rows.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_and_classify_relation_rows(a_relation, b_relation, primary_key_columns=[], columns=None, event_time=None, sample_limit=20) %}
 2 |     {%- if not columns -%}
 3 |         {%- set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) -%}
 4 |     {%- endif -%}
 5 | 
 6 |     {{ 
 7 |         audit_helper.compare_and_classify_query_results(
 8 |             "select * from " ~ a_relation,
 9 |             "select * from " ~ b_relation,
10 |             primary_key_columns,
11 |             columns,
12 |             event_time,
13 |             sample_limit
14 |         )
15 |     }}
16 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/compare_column_values.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_column_values(a_query, b_query, primary_key, column_to_compare, emojis=True, a_relation_name='a', b_relation_name='b') -%}
 2 |   {{ return(adapter.dispatch('compare_column_values', 'audit_helper')(a_query, b_query, primary_key, column_to_compare, emojis, a_relation_name, b_relation_name)) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro default__compare_column_values(a_query, b_query, primary_key, column_to_compare, emojis, a_relation_name, b_relation_name) -%}
 6 | with a_query as (
 7 |     {{ a_query }}
 8 | ),
 9 | 
10 | b_query as (
11 |     {{ b_query }}
12 | ),
13 | 
14 | joined as (
15 |     select
16 |         coalesce(a_query.{{ primary_key }}, b_query.{{ primary_key }}) as {{ primary_key }},
17 |         a_query.{{ column_to_compare }} as a_query_value,
18 |         b_query.{{ column_to_compare }} as b_query_value,
19 |         case
20 |             when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then '{% if emojis %}✅: {% endif %}perfect match'
21 |             when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then '{% if emojis %}✅: {% endif %}both are null'
22 |             when a_query.{{ primary_key }} is null then '{% if emojis %}🤷: {% endif %}missing from {{ a_relation_name }}'
23 |             when b_query.{{ primary_key }} is null then '{% if emojis %}🤷: {% endif %}missing from {{ b_relation_name }}'
24 |             when a_query.{{ column_to_compare }} is null then '{% if emojis %}🤷: {% endif %}value is null in {{ a_relation_name }} only'
25 |             when b_query.{{ column_to_compare }} is null then '{% if emojis %}🤷: {% endif %}value is null in {{ b_relation_name }} only'
26 |             when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then '{% if emojis %}❌: {% endif %}‍values do not match'
27 |             else 'unknown' -- this should never happen
28 |         end as match_status,
29 |         case
30 |             when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 0
31 |             when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 1
32 |             when a_query.{{ primary_key }} is null then 2
33 |             when b_query.{{ primary_key }} is null then 3
34 |             when a_query.{{ column_to_compare }} is null then 4
35 |             when b_query.{{ column_to_compare }} is null then 5
36 |             when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 6
37 |             else 7 -- this should never happen
38 |         end as match_order
39 | 
40 |     from a_query
41 | 
42 |     full outer join b_query on a_query.{{ primary_key }} = b_query.{{ primary_key }}
43 | ),
44 | 
45 | aggregated as (
46 |     select
47 |         '{{ column_to_compare }}' as column_name,
48 |         match_status,
49 |         match_order,
50 |         count(*) as count_records
51 |     from joined
52 | 
53 |     group by column_name, match_status, match_order
54 | )
55 | 
56 | select
57 |     column_name,
58 |     match_status,
59 |     count_records,
60 |     round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total
61 | 
62 | from aggregated
63 | 
64 | order by match_order
65 | 
66 | {% endmacro %}
67 | 


--------------------------------------------------------------------------------
/macros/compare_column_values_verbose.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_column_values_verbose(a_query, b_query, primary_key, column_to_compare) -%}
 2 |   {{ return(adapter.dispatch('compare_column_values_verbose', 'audit_helper')(a_query, b_query, primary_key, column_to_compare)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__compare_column_values_verbose(a_query, b_query, primary_key, column_to_compare) -%}
 7 | with a_query as (
 8 |     {{ a_query }}
 9 | ),
10 | 
11 | b_query as (
12 |     {{ b_query }}
13 | )
14 |     select
15 |         coalesce(a_query.{{ primary_key }}, b_query.{{ primary_key }}) as primary_key,
16 | 
17 |         {% if target.name == 'postgres' or target.name == 'redshift' %}
18 |             '{{ column_to_compare }}'::text as column_name,
19 |         {% else %}
20 |             '{{ column_to_compare }}' as column_name,
21 |         {% endif %}
22 | 
23 |         coalesce(
24 |             a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} and 
25 |                 a_query.{{ primary_key }} is not null and b_query.{{ primary_key }} is not null,
26 |             (a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null),
27 |             false
28 |         ) as perfect_match,
29 |         a_query.{{ column_to_compare }} is null and a_query.{{ primary_key }} is not null as null_in_a,
30 |         b_query.{{ column_to_compare }} is null and b_query.{{ primary_key }} is not null as null_in_b,
31 |         a_query.{{ primary_key }} is null as missing_from_a,
32 |         b_query.{{ primary_key }} is null as missing_from_b,
33 |         coalesce(
34 |             a_query.{{ primary_key }} is not null and b_query.{{ primary_key }} is not null and 
35 |             -- ensure that neither value is missing before considering it a conflict
36 |             (
37 |                 a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} or -- two not-null values that do not match
38 |                 (a_query.{{ column_to_compare }} is not null and b_query.{{ column_to_compare }} is null) or -- null in b and not null in a
39 |                 (a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is not null) -- null in a and not null in b
40 |             ), 
41 |             false
42 |         ) as conflicting_values
43 |         -- considered a conflict if the values do not match AND at least one of the values is not null.
44 | 
45 |     from a_query
46 | 
47 |     full outer join b_query on (a_query.{{ primary_key }} = b_query.{{ primary_key }})
48 | 
49 | 
50 | 
51 | {% endmacro %} 
52 | 


--------------------------------------------------------------------------------
/macros/compare_queries.sql:
--------------------------------------------------------------------------------
  1 | {% macro compare_queries(a_query, b_query, primary_key=None, summarize=true, limit=None) -%}
  2 |   {{ return(adapter.dispatch('compare_queries', 'audit_helper')(a_query, b_query, primary_key, summarize, limit)) }}
  3 | {%- endmacro %}
  4 | 
  5 | {% macro default__compare_queries(a_query, b_query, primary_key=None, summarize=true, limit=None) %}
  6 | 
  7 | with a as (
  8 | 
  9 |     {{ a_query }}
 10 | 
 11 | ),
 12 | 
 13 | b as (
 14 | 
 15 |     {{ b_query }}
 16 | 
 17 | ),
 18 | 
 19 | a_intersect_b as (
 20 | 
 21 |     select * from a
 22 |     {{ dbt.intersect() }}
 23 |     select * from b
 24 | 
 25 | ),
 26 | 
 27 | a_except_b as (
 28 | 
 29 |     select * from a
 30 |     {{ dbt.except() }}
 31 |     select * from b
 32 | 
 33 | ),
 34 | 
 35 | b_except_a as (
 36 | 
 37 |     select * from b
 38 |     {{ dbt.except() }}
 39 |     select * from a
 40 | 
 41 | ),
 42 | 
 43 | all_records as (
 44 | 
 45 |     select
 46 |         *,
 47 |         true as in_a,
 48 |         true as in_b
 49 |     from a_intersect_b
 50 | 
 51 |     union all
 52 | 
 53 |     select
 54 |         *,
 55 |         true as in_a,
 56 |         false as in_b
 57 |     from a_except_b
 58 | 
 59 |     union all
 60 | 
 61 |     select
 62 |         *,
 63 |         false as in_a,
 64 |         true as in_b
 65 |     from b_except_a
 66 | 
 67 | ),
 68 | 
 69 | {%- if summarize %}
 70 | 
 71 | summary_stats as (
 72 | 
 73 |     select
 74 | 
 75 |         in_a,
 76 |         in_b,
 77 |         count(*) as count
 78 | 
 79 |     from all_records
 80 |     group by 1, 2
 81 | 
 82 | ),
 83 | 
 84 | final as (
 85 | 
 86 |     select
 87 | 
 88 |         *,
 89 |         round(100.0 * count / sum(count) over (), 2) as percent_of_total
 90 | 
 91 |     from summary_stats
 92 |     order by in_a desc, in_b desc
 93 | 
 94 | )
 95 | 
 96 | {%- else %}
 97 | 
 98 | final as (
 99 |     
100 |     select * from all_records
101 |     where not (in_a and in_b)
102 |     order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc
103 | 
104 | )
105 | 
106 | {%- endif %}
107 | 
108 | select * from final
109 | {%- if limit and not summarize %}
110 | limit {{ limit }}
111 | {%- endif %}
112 | 
113 | 
114 | {% endmacro %}
115 | 


--------------------------------------------------------------------------------
/macros/compare_relation_columns.sql:
--------------------------------------------------------------------------------
  1 | {% macro compare_relation_columns(a_relation, b_relation) %}
  2 |   {{ return(adapter.dispatch('compare_relation_columns', 'audit_helper')(a_relation, b_relation)) }}
  3 | {% endmacro %}
  4 | 
  5 | {% macro default__compare_relation_columns(a_relation, b_relation) %}
  6 | 
  7 | with a_cols as (
  8 |     {{ audit_helper.get_columns_in_relation_sql(a_relation) }}
  9 | ),
 10 | 
 11 | b_cols as (
 12 |     {{ audit_helper.get_columns_in_relation_sql(b_relation) }}
 13 | )
 14 | 
 15 | select
 16 |     column_name,
 17 |     a_cols.ordinal_position as a_ordinal_position,
 18 |     b_cols.ordinal_position as b_ordinal_position,
 19 |     a_cols.data_type as a_data_type,
 20 |     b_cols.data_type as b_data_type,
 21 |     coalesce(a_cols.ordinal_position = b_cols.ordinal_position, false) as has_ordinal_position_match,
 22 |     coalesce(a_cols.data_type = b_cols.data_type, false) as has_data_type_match,
 23 |     a_cols.data_type is not null and b_cols.data_type is null as in_a_only,
 24 |     b_cols.data_type is not null and a_cols.data_type is null as in_b_only,
 25 |     b_cols.data_type is not null and a_cols.data_type is not null as in_both
 26 | from a_cols
 27 | full outer join b_cols using (column_name)
 28 | order by coalesce(a_cols.ordinal_position, b_cols.ordinal_position)
 29 | 
 30 | {% endmacro %}
 31 | 
 32 | 
 33 | {% macro get_columns_in_relation_sql(relation) %}
 34 | 
 35 | {{ adapter.dispatch('get_columns_in_relation_sql', 'audit_helper')(relation) }}
 36 | 
 37 | {% endmacro %}
 38 | 
 39 | {% macro default__get_columns_in_relation_sql(relation) %}
 40 |     
 41 |   {% set columns = adapter.get_columns_in_relation(relation) %}
 42 |   {% for column in columns %}
 43 |     select 
 44 |       {{ dbt.string_literal(column.name) }} as column_name, 
 45 |       {{ loop.index }} as ordinal_position,
 46 |       {{ dbt.string_literal(column.data_type) }} as data_type
 47 | 
 48 |   {% if not loop.last -%}
 49 |     union all 
 50 |   {%- endif %}
 51 |   {% endfor %}
 52 | 
 53 | 
 54 | {% endmacro %}
 55 | 
 56 | {% macro redshift__get_columns_in_relation_sql(relation) %}
 57 |   {# You can't store the results of an info schema query to a table/view in Redshift, because the data only lives on the leader node #}
 58 |   {{ return (audit_helper.default__get_columns_in_relation_sql(relation)) }}
 59 | {% endmacro %}
 60 | 
 61 | 
 62 | {% macro snowflake__get_columns_in_relation_sql(relation) %}
 63 | {#-
 64 | From: https://github.com/dbt-labs/dbt/blob/dev/louisa-may-alcott/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql#L48
 65 | Edited to include ordinal_position
 66 | -#}
 67 |   select
 68 |       ordinal_position,
 69 |       column_name,
 70 |       data_type,
 71 |       character_maximum_length,
 72 |       numeric_precision,
 73 |       numeric_scale
 74 | 
 75 |   from
 76 |   {{ relation.information_schema('columns') }}
 77 | 
 78 |   where table_name ilike '{{ relation.identifier }}'
 79 |     {% if relation.schema %}
 80 |     and table_schema ilike '{{ relation.schema }}'
 81 |     {% endif %}
 82 |     {% if relation.database %}
 83 |     and table_catalog ilike '{{ relation.database }}'
 84 |     {% endif %}
 85 |   order by ordinal_position
 86 | {% endmacro %}
 87 | 
 88 | 
 89 | {% macro postgres__get_columns_in_relation_sql(relation) %}
 90 | {#-
 91 | From: https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/postgres/dbt/include/postgres/macros/adapters.sql#L32
 92 | Edited to include ordinal_position
 93 | -#}
 94 |   select
 95 |       ordinal_position,
 96 |       column_name,
 97 |       data_type,
 98 |       character_maximum_length,
 99 |       numeric_precision,
100 |       numeric_scale
101 | 
102 |   from {{ relation.information_schema('columns') }}
103 |   where table_name = '{{ relation.identifier }}'
104 |     {% if relation.schema %}
105 |     and table_schema = '{{ relation.schema }}'
106 |     {% endif %}
107 |   order by ordinal_position
108 | {% endmacro %}
109 | 
110 | 
111 | {% macro bigquery__get_columns_in_relation_sql(relation) %}
112 | 
113 |   select
114 |       ordinal_position,
115 |       column_name,
116 |       data_type
117 | 
118 |   from `{{ relation.database }}`.`{{ relation.schema }}`.INFORMATION_SCHEMA.COLUMNS
119 |   where table_name = '{{ relation.identifier }}'
120 | 
121 | {% endmacro %}
122 | 


--------------------------------------------------------------------------------
/macros/compare_relations.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_relations(a_relation, b_relation, exclude_columns=[], primary_key=None, summarize=true, limit=None) %}
 2 | 
 3 | {% set column_names = dbt_utils.get_filtered_columns_in_relation(from=a_relation, except=exclude_columns) %}
 4 | 
 5 | {% set column_selection %}
 6 | 
 7 |   {% for column_name in column_names %} 
 8 |     {{ adapter.quote(column_name) }} 
 9 |     {% if not loop.last %}
10 |       , 
11 |     {% endif %} 
12 |   {% endfor %}
13 | 
14 | {% endset %}
15 | 
16 | {% set a_query %}
17 | select
18 | 
19 |   {{ column_selection }}
20 | 
21 | from {{ a_relation }}
22 | {% endset %}
23 | 
24 | {% set b_query %}
25 | select
26 | 
27 |   {{ column_selection }}
28 | 
29 | from {{ b_relation }}
30 | {% endset %}
31 | 
32 | {{ audit_helper.compare_queries(a_query, b_query, primary_key, summarize, limit) }}
33 | 
34 | {% endmacro %}
35 | 


--------------------------------------------------------------------------------
/macros/compare_row_counts.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_row_counts(a_relation, b_relation) %}
 2 |   {{ return(adapter.dispatch('compare_row_counts', 'audit_helper')(a_relation, b_relation)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__compare_row_counts(a_relation, b_relation) %}
 6 | 
 7 |         select
 8 |             '{{ a_relation }}' as relation_name,
 9 |             count(*) as total_records
10 |         from {{ a_relation }}
11 | 
12 |         union all
13 | 
14 |         select
15 |             '{{ b_relation }}' as relation_name,
16 |             count(*) as total_records
17 |         from {{ b_relation }}
18 |   
19 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/compare_which_query_columns_differ.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_which_query_columns_differ(a_query, b_query, primary_key_columns=[], columns=[], event_time=None) %}
 2 |     {{ return(adapter.dispatch('compare_which_query_columns_differ', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__compare_which_query_columns_differ(a_query, b_query, primary_key_columns, columns, event_time) %}
 6 |     {% set columns = audit_helper._ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
 7 |     {% if event_time %}
 8 |         {% set event_time_props = audit_helper._get_comparison_bounds(event_time) %}
 9 |     {% endif %}
10 | 
11 |     {% set joined_cols = columns | join (", ") %}
12 | 
13 |         with a as (
14 |             select 
15 |                 {{ joined_cols }},
16 |                 {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
17 |             from ({{ a_query }}) as a_subq
18 |             {{ audit_helper.event_time_filter(event_time_props) }}
19 |         ),
20 |         b as (
21 |             select 
22 |                 {{ joined_cols }},
23 |                 {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
24 |             from ({{ b_query }}) as b_subq
25 |             {{ audit_helper.event_time_filter(event_time_props) }}
26 |         ),
27 | 
28 |         calculated as (
29 |             select 
30 |                 {% for column in columns %}
31 |                     {% set quoted_column = adapter.quote(column) %}
32 |                     {% set compare_statement %}
33 |                         (
34 |                             (a.{{ quoted_column }} != b.{{ quoted_column }})
35 |                             or (a.{{ quoted_column }} is null and b.{{ quoted_column }} is not null)
36 |                             or (a.{{ quoted_column }} is not null and b.{{ quoted_column }} is null)
37 |                         )
38 |                     {% endset %}
39 |                 
40 |                 {{ dbt.bool_or(compare_statement) }} as {{ column | lower }}_has_difference
41 | 
42 |                 {%- if not loop.last %}, {% endif %}
43 |                 {% endfor %}
44 |             from a
45 |             inner join b on a.dbt_audit_surrogate_key = b.dbt_audit_surrogate_key
46 |         )
47 | 
48 |     {% for column in columns %}
49 |     
50 |     select 
51 |         '{{ column }}' as column_name, 
52 |         {{ column | lower }}_has_difference as has_difference
53 |     
54 |     from calculated
55 | 
56 |     {% if not loop.last %}
57 |         
58 |     union all 
59 | 
60 |     {% endif %}
61 | 
62 |     {% endfor %}
63 | 
64 | {% endmacro %}
65 | 


--------------------------------------------------------------------------------
/macros/compare_which_relation_columns_differ.sql:
--------------------------------------------------------------------------------
 1 | {% macro compare_which_relation_columns_differ(a_relation, b_relation, primary_key_columns=[], columns=[], event_time=None) %}
 2 |     {%- if not columns -%}
 3 |         {%- set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) -%}
 4 |     {%- endif -%}
 5 | 
 6 |     {{ 
 7 |         audit_helper.compare_which_query_columns_differ(
 8 |             "select * from " ~ a_relation,
 9 |             "select * from " ~ b_relation,
10 |             primary_key_columns,
11 |             columns,
12 |             event_time
13 |         )
14 |     }}
15 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/quick_are_queries_identical.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | As described by the Infinite Lambda team here: https://infinitelambda.com/data-validation-refactoring-snowflake/
 3 | 
 4 | Some platforms let you take a hash of the whole table, which can be very very fast compared to comparing each row. 
 5 | 
 6 | If you run this and it returns false, you still have to run the more in-depth queries to find out what specific changes there are, 
 7 | but it's a good way to quickly verify identical results if that's what you're expecting. 
 8 | */
 9 | 
10 | {% macro quick_are_queries_identical(query_a, query_b, columns=[], event_time=None) %}
11 |     {{ return (adapter.dispatch('quick_are_queries_identical', 'audit_helper')(query_a, query_b, columns, event_time)) }}
12 | {% endmacro %}
13 | 
14 | {% macro default__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
15 |     {% if execute %}
16 |         {# Need to only throw this error when the macro is actually trying to be used, not during intial parse phase #}
17 |         {# if/when unit tests get support for `enabled` config, this check can be removed as they won't be supplied for parse anyway #}
18 |         {% do exceptions.raise_compiler_error("quick_are_queries_identical() is not implemented for adapter '"~ target.type ~ "'" ) %}
19 |     {% endif %}
20 | {% endmacro %}
21 | 
22 | {% macro bigquery__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
23 |     {% set joined_cols = columns | join(", ") %}
24 |     {% if event_time %}
25 |         {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
26 |     {% endif %}
27 | 
28 |     with query_a as (
29 |         select {{ joined_cols }}
30 |         from ({{ query_a }})
31 |         {{ audit_helper.event_time_filter(event_time_props) }}
32 |     ), 
33 |     query_b as (
34 |         select {{ joined_cols }}
35 |         from ({{ query_b }})
36 |         {{ audit_helper.event_time_filter(event_time_props) }}
37 |     )
38 | 
39 |     select count(distinct hash_result) = 1 as are_tables_identical
40 |     from (
41 |         select bit_xor(farm_fingerprint(to_json_string(query_a))) as hash_result
42 |         from query_a
43 | 
44 |         union all
45 |         
46 |         select bit_xor(farm_fingerprint(to_json_string(query_b))) as hash_result
47 |         from query_b
48 |     ) as hashes
49 | {% endmacro %}
50 | 
51 | {% macro snowflake__quick_are_queries_identical(query_a, query_b, columns, event_time) %}
52 |     {% set joined_cols = columns | join(", ") %}
53 |     {% if event_time %}
54 |         {% set event_time_props = audit_helper._get_comparison_bounds(a_query, b_query, event_time) %}
55 |     {% endif %}
56 | 
57 |     select count(distinct hash_result) = 1 as are_tables_identical
58 |     from (
59 |         select hash_agg({{ joined_cols }}) as hash_result
60 |         from ({{ query_a }}) query_a_subq
61 |         {{ audit_helper.event_time_filter(event_time_props) }}
62 | 
63 |         union all
64 |         
65 |         select hash_agg({{ joined_cols }}) as hash_result
66 |         from ({{ query_b }}) query_b_subq
67 |         {{ audit_helper.event_time_filter(event_time_props) }}
68 | 
69 |     ) as hashes
70 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/quick_are_relations_identical.sql:
--------------------------------------------------------------------------------
 1 | {% macro quick_are_relations_identical(a_relation, b_relation, columns=None, event_time=None) %}
 2 |     {% if not columns %}
 3 |         {% set columns = audit_helper._get_intersecting_columns_from_relations(a_relation, b_relation) %}
 4 |     {% endif %}
 5 | 
 6 |     {{
 7 |         audit_helper.quick_are_queries_identical(
 8 |             "select * from " ~ a_relation,
 9 |             "select * from " ~ b_relation,
10 |             columns, 
11 |             event_time
12 |         )
13 |     }}
14 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_classify_audit_row_status.sql:
--------------------------------------------------------------------------------
 1 | {% macro _classify_audit_row_status() %}
 2 |     {{ return(adapter.dispatch('_classify_audit_row_status', 'audit_helper')()) }}
 3 | {% endmacro %}
 4 | 
 5 | {%- macro default___classify_audit_row_status() -%}
 6 |     case 
 7 |         when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
 8 |         when dbt_audit_in_a and dbt_audit_in_b then 'identical'
 9 |         when {{ dbt.bool_or('dbt_audit_in_a') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) 
10 |             and {{ dbt.bool_or('dbt_audit_in_b') }} over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
11 |             then 'modified'
12 |         when dbt_audit_in_a then 'removed'
13 |         when dbt_audit_in_b then 'added'
14 |     end
15 | {% endmacro %}
16 | 
17 | 
18 | {%- macro redshift___classify_audit_row_status() -%}
19 |     {#- Redshift doesn't support bitwise operations (e.g. bool_or) inside of a window function :( -#}
20 |     case 
21 |         when max(dbt_audit_pk_row_num) over (partition by dbt_audit_surrogate_key) > 1 then 'nonunique_pk'
22 |         when dbt_audit_in_a and dbt_audit_in_b then 'identical'
23 |         when max(case when dbt_audit_in_a then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
24 |             and max(case when dbt_audit_in_b then 1 else 0 end) over (partition by dbt_audit_surrogate_key, dbt_audit_pk_row_num) = 1
25 |             then 'modified'
26 |         when dbt_audit_in_a then 'removed'
27 |         when dbt_audit_in_b then 'added'
28 |     end{% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_count_num_rows_in_status.sql:
--------------------------------------------------------------------------------
 1 | {% macro _count_num_rows_in_status() %}
 2 |     {{ return(adapter.dispatch('_count_num_rows_in_status', 'audit_helper')()) }}
 3 | {% endmacro %}
 4 | 
 5 | {%- macro default___count_num_rows_in_status() -%}
 6 |     count(distinct dbt_audit_surrogate_key, dbt_audit_pk_row_num) over (partition by dbt_audit_row_status)
 7 | {% endmacro %}
 8 | 
 9 | {%- macro bigquery___count_num_rows_in_status() -%}
10 |     count(distinct {{ dbt.concat(["dbt_audit_surrogate_key", "dbt_audit_pk_row_num"]) }}) over (partition by dbt_audit_row_status)
11 | {% endmacro %}
12 | 
13 | {%- macro postgres___count_num_rows_in_status() -%}
14 |     {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
15 | {% endmacro %}
16 | 
17 | {%- macro databricks___count_num_rows_in_status() -%}
18 |     {{ audit_helper._count_num_rows_in_status_without_distinct_window_func() }}
19 | {% endmacro %}
20 | 
21 | {% macro _count_num_rows_in_status_without_distinct_window_func() %}
22 |     {#- Some platforms don't support count(distinct) inside of window functions -#}
23 |     {#- You can get the same outcome by dense_rank, assuming no nulls (we've already handled that) #}
24 |     {# https://stackoverflow.com/a/22347502 -#}
25 |     dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
26 |     + dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key desc, dbt_audit_pk_row_num desc)
27 |     - 1
28 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_ensure_all_pks_are_in_column_set.sql:
--------------------------------------------------------------------------------
 1 | {# If someone forgot to include the PK columns in their main set of columns, fix it up for them #}
 2 | {# Assuming that the PKs are the most important columns, so they go to the front of the list #}
 3 | 
 4 | {% macro _ensure_all_pks_are_in_column_set(primary_key_columns, columns) %}
 5 |     {% set lower_cols = columns | map('lower') | list %}
 6 |     {% set missing_pks = [] %}
 7 | 
 8 |     {% for pk in primary_key_columns %}
 9 |         {% if pk | lower not in lower_cols %}
10 |             {% do missing_pks.append(pk) %}
11 |         {% endif %}
12 |     {% endfor %}
13 | 
14 |     {% if missing_pks | length > 0 %}
15 |         {% set columns = missing_pks + columns %}
16 |     {% endif %}
17 |     
18 |     {% do return (columns) %}
19 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_generate_null_safe_sk.sql:
--------------------------------------------------------------------------------
 1 | {# Taken from https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/generate_surrogate_key.sql but without the option to treat nulls as empty strings #}
 2 | 
 3 | {%- macro _generate_null_safe_surrogate_key(field_list) -%}
 4 |     {{ return(adapter.dispatch('_generate_null_safe_surrogate_key', 'audit_helper')(field_list)) }}
 5 | {% endmacro %}
 6 | 
 7 | {%- macro default___generate_null_safe_surrogate_key(field_list) -%}
 8 | 
 9 | {%- set fields = [] -%}
10 | 
11 | {%- for field in field_list -%}
12 | 
13 |     {%- do fields.append(
14 |         "coalesce(cast(" ~ field ~ " as " ~ dbt.type_string() ~ "), '_dbt_audit_helper_surrogate_key_null_')"
15 |     ) -%}
16 | 
17 |     {%- if not loop.last %}
18 |         {%- do fields.append("'-'") -%}
19 |     {%- endif -%}
20 | 
21 | {%- endfor -%}
22 | 
23 | {{ dbt.hash(dbt.concat(fields)) }}
24 | 
25 | {%- endmacro -%}


--------------------------------------------------------------------------------
/macros/utils/_generate_set_results.sql:
--------------------------------------------------------------------------------
  1 | {#-
  2 |     Set generation is dispatched because it's possible to get performance optimisations 
  3 |     on some platforms, while keeping the post-processing standardised
  4 |     See https://infinitelambda.com/data-validation-refactoring-snowflake/ for an example and background
  5 | -#}
  6 | 
  7 | {% macro _generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props=None) %}
  8 |   {{ return(adapter.dispatch('_generate_set_results', 'audit_helper')(a_query, b_query, primary_key_columns, columns, event_time_props)) }}
  9 | {% endmacro %}
 10 | 
 11 | {% macro default___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
 12 |     {% set joined_cols = columns | join(", ") %}
 13 | 
 14 |     a_base as (
 15 |         select 
 16 |             {{ joined_cols }}, 
 17 |             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
 18 |         from ( {{-  a_query  -}} ) a_base_subq
 19 |         {{ audit_helper.event_time_filter(event_time_props) }}
 20 |     ),
 21 | 
 22 |     b_base as (
 23 |         select 
 24 |             {{ joined_cols }}, 
 25 |             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key
 26 |         from ( {{-  b_query  -}} ) b_base_subq
 27 |         {{ audit_helper.event_time_filter(event_time_props) }}
 28 |     ),
 29 | 
 30 |     a as (
 31 |         select 
 32 |             *, 
 33 |             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
 34 |         from a_base
 35 |     ),
 36 | 
 37 |     b as (
 38 |         select 
 39 |             *, 
 40 |             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key) as dbt_audit_pk_row_num
 41 |         from b_base
 42 |     ),
 43 | 
 44 |     a_intersect_b as (
 45 | 
 46 |         select * from a
 47 |         {{ dbt.intersect() }}
 48 |         select * from b
 49 | 
 50 |     ),
 51 | 
 52 |     a_except_b as (
 53 | 
 54 |         select * from a
 55 |         {{ dbt.except() }}
 56 |         select * from b
 57 | 
 58 |     ),
 59 | 
 60 |     b_except_a as (
 61 | 
 62 |         select * from b
 63 |         {{ dbt.except() }}
 64 |         select * from a
 65 | 
 66 |     )
 67 | {% endmacro %}
 68 | 
 69 | {% macro bigquery___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
 70 |     {% set joined_cols = columns | join(", ") %}
 71 |     {% set surrogate_key = audit_helper._generate_null_safe_surrogate_key(primary_key_columns) %}
 72 |     subset_columns_a as (
 73 |         select 
 74 |             {{ joined_cols }}, 
 75 |             {{ surrogate_key }} as dbt_audit_surrogate_key,
 76 |             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
 77 |         from ( {{-  a_query  -}} )
 78 |         {{ audit_helper.event_time_filter(event_time_props) }}
 79 |     ),
 80 | 
 81 |     subset_columns_b as (
 82 |         select 
 83 |             {{ joined_cols }}, 
 84 |             {{ surrogate_key }} as dbt_audit_surrogate_key,
 85 |             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num
 86 |         from ( {{-  b_query  -}} )
 87 |         {{ audit_helper.event_time_filter(event_time_props) }}
 88 |     ),
 89 | 
 90 |     a as (
 91 |         select
 92 |             *,
 93 |             farm_fingerprint(to_json_string(subset_columns_a)) as dbt_audit_row_hash
 94 |         from subset_columns_a
 95 |     ), 
 96 | 
 97 |     b as (
 98 |         select
 99 |             *,
100 |             farm_fingerprint(to_json_string(subset_columns_b)) as dbt_audit_row_hash
101 |         from subset_columns_b
102 |     ),
103 | 
104 |     a_intersect_b as (
105 | 
106 |         select * from a
107 |         where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
108 | 
109 |     ),
110 | 
111 |     a_except_b as (
112 | 
113 |         select * from a
114 |         where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
115 | 
116 |     ),
117 | 
118 |     b_except_a as (
119 | 
120 |         select * from b
121 |         where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
122 | 
123 |     )
124 | {% endmacro %}
125 | 
126 | {% macro databricks___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
127 |     {% set cast_columns = [] %}
128 |     {# Map types can't be compared by default (you need to opt in to a legacy behaviour flag) #}
129 |     {# so everything needs to be cast as a string first :( #}
130 |     {% for col in columns %}
131 |         {% do cast_columns.append(dbt.cast(col, api.Column.translate_type("string"))) %}
132 |     {% endfor %}
133 |     {% set joined_cols = cast_columns | join(", ") %}
134 |     {% set surrogate_key = audit_helper._generate_null_safe_surrogate_key(primary_key_columns) %}
135 |     a as (
136 |         select 
137 |             {{ joined_cols }}, 
138 |             {{ surrogate_key }} as dbt_audit_surrogate_key,
139 |             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
140 |             xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
141 |         from ( {{-  a_query  -}} )
142 |         {{ audit_helper.event_time_filter(event_time_props) }}
143 |     ),
144 | 
145 |     b as (
146 |         select 
147 |             {{ joined_cols }}, 
148 |             {{ surrogate_key }} as dbt_audit_surrogate_key,
149 |             row_number() over (partition by {{ surrogate_key }} order by 1 ) as dbt_audit_pk_row_num,
150 |             xxhash64({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
151 |         from ( {{-  b_query  -}} )
152 |         {{ audit_helper.event_time_filter(event_time_props) }}
153 |     ),
154 | 
155 |     a_intersect_b as (
156 | 
157 |         select * from a
158 |         where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
159 | 
160 |     ),
161 | 
162 |     a_except_b as (
163 | 
164 |         select * from a
165 |         where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
166 | 
167 |     ),
168 | 
169 |     b_except_a as (
170 | 
171 |         select * from b
172 |         where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
173 | 
174 |     )   
175 | {% endmacro %}
176 | 
177 | {% macro snowflake___generate_set_results(a_query, b_query, primary_key_columns, columns, event_time_props) %}
178 |     {% set joined_cols = columns | join(", ") %}
179 |     a as (
180 |         select 
181 |             {{ joined_cols }}, 
182 |             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
183 |             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
184 |             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
185 |         from ( {{-  a_query  -}} )
186 |         {{ audit_helper.event_time_filter(event_time_props) }}
187 |     ),
188 | 
189 |     b as (
190 |         select 
191 |             {{ joined_cols }}, 
192 |             {{ audit_helper._generate_null_safe_surrogate_key(primary_key_columns) }} as dbt_audit_surrogate_key,
193 |             row_number() over (partition by dbt_audit_surrogate_key order by dbt_audit_surrogate_key ) as dbt_audit_pk_row_num,
194 |             hash({{ joined_cols }}, dbt_audit_pk_row_num) as dbt_audit_row_hash
195 |         from ( {{-  b_query  -}} )
196 |         {{ audit_helper.event_time_filter(event_time_props) }}
197 |     ),
198 | 
199 |     a_intersect_b as (
200 | 
201 |         select * from a
202 |         where a.dbt_audit_row_hash in (select b.dbt_audit_row_hash from b)
203 | 
204 |     ),
205 | 
206 |     a_except_b as (
207 | 
208 |         select * from a
209 |         where a.dbt_audit_row_hash not in (select b.dbt_audit_row_hash from b)
210 | 
211 |     ),
212 | 
213 |     b_except_a as (
214 | 
215 |         select * from b
216 |         where b.dbt_audit_row_hash not in (select a.dbt_audit_row_hash from a)
217 | 
218 |     )
219 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_get_comparison_bounds.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | The idea here is that if the event_time is set, we will only compare records enclosed in both models.
 3 | This improves performance and allows us to compare apples to apples, instead of detecting millions/billions
 4 | of "deletions" identified due to prod having all data while CI only has a few days' worth.
 5 | 
 6 | In the diagram below, the thatched section is the comparison bounds. You can think of it as
 7 |                                                          
 8 |          greatest(model_a.min_value, model_b.min_value)  
 9 |             least(model_a.max_value, model_b.max_value)  
10 |                                                          
11 |                  ┌────────────────────────────┐          
12 |   a min_value    │                a max_value │        
13 |     └──► ┌───────┼────────────────────┐ ◄───┘ │        
14 |          │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
15 | model_a  │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │ model_b
16 |          │       │┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼│       │        
17 |          └───────┼────────────────────┘       │        
18 |             ┌──► └────────────────────────────┘ ◄────┐ 
19 |            b min_value                      b max_value 
20 | */
21 | {% macro _get_comparison_bounds(a_query, b_query, event_time) %}
22 |     {% set min_max_queries %}
23 |         with min_maxes as (
24 |             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
25 |             from ({{ a_query }}) a_subq
26 |             union all 
27 |             select min({{ event_time }}) as min_event_time, max({{ event_time }}) as max_event_time
28 |             from ({{ b_query }}) b_subq
29 |         )
30 |         select max(min_event_time) as min_event_time, min(max_event_time) as max_event_time
31 |         from min_maxes
32 |     {% endset %}
33 | 
34 |     {% set query_response = dbt_utils.get_query_results_as_dict(min_max_queries) %}
35 |     
36 |     {% set event_time_props = {"event_time": event_time} %}
37 |     
38 |     {# query_response.keys() are only `min_event_time` and `max_event_time`, but they have indeterminate capitalisation #}
39 |     {# hence the dynamic approach for what is otherwise just two well-known values #}
40 |     {% for k in query_response.keys() %}
41 |         {% do event_time_props.update({k | lower: query_response[k][0]}) %}
42 |     {% endfor %}
43 |     
44 |     {% do return(event_time_props) %}
45 | {% endmacro %}
46 | 
47 | {% macro event_time_filter(event_time_props) %}
48 |     {% if event_time_props %}
49 |         where {{ event_time_props["event_time"] }} >= '{{ event_time_props["min_event_time"] }}'
50 |         and {{ event_time_props["event_time"] }} <= '{{ event_time_props["max_event_time"] }}'
51 |     {% endif %}
52 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/utils/_get_intersecting_columns_from_relations.sql:
--------------------------------------------------------------------------------
 1 | {% macro _get_intersecting_columns_from_relations(a_relation, b_relation) %}        
 2 |     {%- set a_cols = dbt_utils.get_filtered_columns_in_relation(a_relation) -%}
 3 |     {%- set b_cols = dbt_utils.get_filtered_columns_in_relation(b_relation) -%}
 4 |     
 5 |     {%- set intersection = [] -%}
 6 |     {%- for col in a_cols -%}
 7 |         {%- if col in b_cols -%}
 8 |             {%- do intersection.append(col) -%}
 9 |         {%- endif -%}
10 |     {%- endfor -%}
11 | 
12 |     {% do return(intersection) %}
13 | {% endmacro %}


--------------------------------------------------------------------------------
/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: [">=0.9.0", "<2.0.0"]
4 | 


--------------------------------------------------------------------------------
/supported_adapters.env:
--------------------------------------------------------------------------------
1 | SUPPORTED_ADAPTERS=postgres
2 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | skipsdist = True
 3 | envlist = lint_all, testenv
 4 | 
 5 | [testenv]
 6 | passenv =
 7 |     # postgres env vars
 8 |     POSTGRES_HOST
 9 |     POSTGRES_USER
10 |     DBT_ENV_SECRET_POSTGRES_PASS
11 |     POSTGRES_PORT
12 |     POSTGRES_DATABASE
13 |     POSTGRES_SCHEMA
14 | 
15 | # Postgres integration tests for centralized dbt testing
16 | # run dbt commands directly, assumes dbt is already installed in environment
17 | [testenv:dbt_integration_postgres]
18 | changedir = integration_tests
19 | allowlist_externals = 
20 |     dbt
21 | skip_install = true
22 | commands =
23 |     dbt --version
24 |     dbt debug --target postgres
25 | 


--------------------------------------------------------------------------------