├── .gitignore ├── LICENSE ├── README.md ├── analysis └── .gitkeep ├── data └── .gitkeep ├── dbt_modules └── dbt_utils │ ├── .circleci │ └── config.yml │ ├── .github │ ├── CODEOWNERS │ ├── issue_template │ │ ├── bug_report.md │ │ └── feature_request.md │ └── pull_request_template.md │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── dbt_project.yml │ ├── docker-compose.yml │ ├── etc │ └── dbt-logo.png │ ├── integration_tests │ ├── .env │ │ ├── bigquery.env │ │ ├── postgres.env │ │ ├── redshift.env │ │ └── snowflake.env │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── ci │ │ └── sample.profiles.yml │ ├── data │ │ ├── .gitkeep │ │ ├── cross_db │ │ │ ├── data_concat.csv │ │ │ ├── data_date_trunc.csv │ │ │ ├── data_dateadd.csv │ │ │ ├── data_datediff.csv │ │ │ ├── data_hash.csv │ │ │ ├── data_last_day.csv │ │ │ ├── data_length.csv │ │ │ ├── data_position.csv │ │ │ ├── data_replace.csv │ │ │ ├── data_right.csv │ │ │ ├── data_safe_cast.csv │ │ │ ├── data_split_part.csv │ │ │ └── data_width_bucket.csv │ │ ├── datetime │ │ │ └── data_date_spine.csv │ │ ├── etc │ │ │ └── data_people.csv │ │ ├── materializations │ │ │ └── data_insert_by_period.csv │ │ ├── schema_tests │ │ │ ├── data_test_at_least_one.csv │ │ │ ├── data_test_equal_rowcount.csv │ │ │ ├── data_test_expression_is_true.csv │ │ │ ├── data_test_mutually_exclusive_ranges_no_gaps.csv │ │ │ ├── data_test_mutually_exclusive_ranges_with_gaps.csv │ │ │ ├── data_test_not_constant.csv │ │ │ ├── data_test_relationships_where_table_1.csv │ │ │ ├── data_test_relationships_where_table_2.csv │ │ │ └── data_unique_combination_of_columns.csv │ │ ├── sql │ │ │ ├── data_events_20180101.csv │ │ │ ├── data_events_20180102.csv │ │ │ ├── data_generate_series.csv │ │ │ ├── data_get_column_values.csv │ │ │ ├── data_get_query_results_as_dict.csv │ │ │ ├── data_nullcheck_table.csv │ │ │ ├── data_pivot.csv │ │ │ ├── data_pivot_expected.csv │ │ │ ├── data_safe_add.csv │ │ │ ├── data_star.csv │ │ │ ├── data_star_expected.csv │ │ │ ├── data_surrogate_key.csv │ │ │ ├── data_union_expected.csv │ │ │ ├── data_union_table_1.csv │ │ │ ├── data_union_table_2.csv │ │ │ ├── data_unpivot.csv │ │ │ ├── data_unpivot_expected.csv │ │ │ └── data_unpivot_original_api_expected.csv │ │ └── web │ │ │ ├── data_url_host.csv │ │ │ ├── data_url_path.csv │ │ │ └── data_urls.csv │ ├── dbt_project.yml │ ├── macros │ │ ├── .gitkeep │ │ └── tests.sql │ ├── models │ │ ├── cross_db_utils │ │ │ ├── schema.yml │ │ │ ├── test_concat.sql │ │ │ ├── test_current_timestamp.sql │ │ │ ├── test_current_timestamp_in_utc.sql │ │ │ ├── test_date_trunc.sql │ │ │ ├── test_dateadd.sql │ │ │ ├── test_datediff.sql │ │ │ ├── test_hash.sql │ │ │ ├── test_last_day.sql │ │ │ ├── test_length.sql │ │ │ ├── test_position.sql │ │ │ ├── test_replace.sql │ │ │ ├── test_right.sql │ │ │ ├── test_safe_cast.sql │ │ │ ├── test_split_part.sql │ │ │ └── test_width_bucket.sql │ │ ├── datetime │ │ │ ├── schema.yml │ │ │ └── test_date_spine.sql │ │ ├── materializations │ │ │ ├── expected_insert_by_period.sql │ │ │ ├── schema.yml │ │ │ └── test_insert_by_period.sql │ │ ├── schema_tests │ │ │ ├── schema.yml │ │ │ ├── test_equal_column_subset.sql │ │ │ ├── test_equal_rowcount.sql │ │ │ └── test_recency.sql │ │ ├── sql │ │ │ ├── schema.yml │ │ │ ├── test_generate_series.sql │ │ │ ├── test_get_column_values.sql │ │ │ ├── test_get_relations_by_prefix_and_union.sql │ │ │ ├── test_get_tables_by_prefix_and_union.sql │ │ │ ├── test_groupby.sql │ │ │ ├── test_nullcheck_table.sql │ │ │ ├── test_pivot.sql │ │ │ ├── test_safe_add.sql │ │ │ ├── test_star.sql │ │ │ ├── test_surrogate_key.sql │ │ │ ├── test_union.sql │ │ │ ├── test_union_base.sql │ │ │ ├── test_unpivot.sql │ │ │ └── test_unpivot_original_api.sql │ │ └── web │ │ │ ├── schema.yml │ │ │ ├── test_url_host.sql │ │ │ ├── test_url_path.sql │ │ │ └── test_urls.sql │ ├── packages.yml │ └── tests │ │ ├── assert_get_query_results_as_dict_objects_equal.sql │ │ └── logger │ │ ├── assert_pretty_output_msg_is_string.sql │ │ └── assert_pretty_time_is_string.sql │ ├── macros │ ├── cross_db_utils │ │ ├── _is_relation.sql │ │ ├── concat.sql │ │ ├── current_timestamp.sql │ │ ├── datatypes.sql │ │ ├── date_trunc.sql │ │ ├── dateadd.sql │ │ ├── datediff.sql │ │ ├── except.sql │ │ ├── hash.sql │ │ ├── identifier.sql │ │ ├── intersect.sql │ │ ├── last_day.sql │ │ ├── length.sql │ │ ├── literal.sql │ │ ├── position.sql │ │ ├── replace.sql │ │ ├── right.sql │ │ ├── safe_cast.sql │ │ ├── split_part.sql │ │ └── width_bucket.sql │ ├── datetime │ │ └── date_spine.sql │ ├── geo │ │ └── haversine_distance.sql │ ├── logger │ │ ├── log_info.sql │ │ ├── pretty_log_format.sql │ │ └── pretty_time.sql │ ├── materializations │ │ └── insert_by_period_materialization.sql │ ├── schema_tests │ │ ├── at_least_one.sql │ │ ├── cardinality_equality.sql │ │ ├── equal_rowcount.sql │ │ ├── equality.sql │ │ ├── expression_is_true.sql │ │ ├── mutually_exclusive_ranges.sql │ │ ├── not_constant.sql │ │ ├── recency.sql │ │ ├── relationships_where.sql │ │ └── unique_combination_of_columns.sql │ ├── sql │ │ ├── generate_series.sql │ │ ├── get_column_values.sql │ │ ├── get_query_results_as_dict.sql │ │ ├── get_relations_by_prefix.sql │ │ ├── get_tables_by_prefix_sql.sql │ │ ├── groupby.sql │ │ ├── nullcheck.sql │ │ ├── nullcheck_table.sql │ │ ├── pivot.sql │ │ ├── safe_add.sql │ │ ├── star.sql │ │ ├── surrogate_key.sql │ │ ├── union.sql │ │ └── unpivot.sql │ └── web │ │ ├── get_url_host.sql │ │ ├── get_url_parameter.sql │ │ └── get_url_path.sql │ └── run_test.sh ├── dbt_project.yml ├── macros ├── .gitkeep └── get_column_values.sql ├── models ├── base │ ├── dedup_events.sql │ ├── pageviews_proc.sql │ └── unique_events_and_parameters.sql ├── math │ └── user_page_paths.sql └── schema.yml ├── packages.yml ├── snapshots └── .gitkeep └── tests └── .gitkeep /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .DS_Store 3 | target/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Coding is for Losers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ga4-bigquery-starter 2 | A starter package of dbt models for the GA4 BigQuery stream 3 | 4 | https://www.loom.com/share/a8a3651629bd4d80aa9ea861b3d9c29e 5 | 6 | ### This is a #measure community project 7 | If you have ideas for views / reports to add to the project, please submit them as issues. 8 | 9 | Contributors are welcome - if you're able to build something in dbt that you think people would enjoy, please submit it as a PR. 10 | 11 | ### Using the starter project 12 | 13 | After installing [dbt](https://getdbt.com), try running the following commands: 14 | - dbt run 15 | - dbt test 16 | 17 | This project can also be run via [dbt cloud](https://cloud.getdbt.com), which provides free individual developer accounts. 18 | 19 | ### dbt Resources: 20 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 21 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 22 | - Join the [chat](http://slack.getdbt.com/) on Slack for live discussions and support 23 | - Find [dbt events](https://events.getdbt.com) near you 24 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 25 | -------------------------------------------------------------------------------- /analysis/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/analysis/.gitkeep -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/data/.gitkeep -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.circleci/config.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | jobs: 5 | build: 6 | docker: 7 | - image: circleci/python:3.6.3-stretch 8 | - image: circleci/postgres:9.6.5-alpine-ram 9 | 10 | steps: 11 | - checkout 12 | 13 | - run: 14 | run: setup_creds 15 | command: | 16 | echo $GCLOUD_SERVICE_KEY | base64 --decode --ignore-garbage > ${HOME}/gcloud-service-key.json 17 | 18 | - restore_cache: 19 | key: deps1-{{ .Branch }} 20 | 21 | - run: 22 | name: "Run Tests - Postgres" 23 | environment: 24 | CI_DBT_HOST: localhost 25 | CI_DBT_USER: root 26 | CI_DBT_PASS: '' 27 | CI_DBT_PORT: 5432 28 | CI_DBT_DBNAME: circle_test 29 | command: ./run_test.sh postgres 30 | 31 | - run: 32 | name: "Run Tests - Redshift" 33 | command: ./run_test.sh redshift 34 | 35 | - run: 36 | name: "Run Tests - Snowflake" 37 | command: ./run_test.sh snowflake 38 | 39 | - run: 40 | name: "Run Tests - BigQuery" 41 | environment: 42 | GCLOUD_SERVICE_KEY_PATH: "/home/circleci/gcloud-service-key.json" 43 | command: ./run_test.sh bigquery 44 | 45 | - save_cache: 46 | key: deps1-{{ .Branch }} 47 | paths: 48 | - "venv" 49 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @clrcrl 2 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.github/issue_template/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug or an issue you've found with this package 4 | title: '' 5 | labels: bug, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the bug 11 | 14 | 15 | ### Steps to reproduce 16 | 19 | 20 | ### Expected results 21 | 24 | 25 | ### Actual results 26 | 29 | 30 | ### Screenshots and log output 31 | 34 | 35 | ### System information 36 | **The contents of your `packages.yml` file:** 37 | 38 | **Which database are you using dbt with?** 39 | - [ ] postgres 40 | - [ ] redshift 41 | - [ ] bigquery 42 | - [ ] snowflake 43 | - [ ] other (specify: ____________) 44 | 45 | 46 | **The output of `dbt --version`:** 47 | ``` 48 | 49 | ``` 50 | 51 | **The operating system you're using:** 52 | 53 | **The output of `python --version`:** 54 | 55 | ### Additional context 56 | 59 | 60 | ### Are you interested in contributing the fix? 61 | 64 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.github/issue_template/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the feature 11 | A clear and concise description of what you want to happen. 12 | 13 | ### Describe alternatives you've considered 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | ### Additional context 17 | Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here. 18 | 19 | ### Who will this benefit? 20 | What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. 21 | 22 | ### Are you interested in contributing this feature? 23 | 26 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description & motivation 2 | 5 | 6 | ## Checklist 7 | - [ ] I have verified that these changes work locally 8 | - [ ] I have updated the README.md (if applicable) 9 | - [ ] I have added tests & descriptions to my models (and macros if applicable) 10 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | venv/ -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/README.md: -------------------------------------------------------------------------------- 1 | This [dbt](https://github.com/fishtown-analytics/dbt) package contains macros that can be (re)used across dbt projects. 2 | 3 | ## Installation Instructions 4 | Check [dbt Hub](https://hub.getdbt.com/fishtown-analytics/dbt_utils/latest/) for the latest installation instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) for more information on installing packages. 5 | 6 | ---- 7 | 8 | ## Macros 9 | ### Cross-database 10 | While these macros are cross database, they do not support all databases. 11 | These macros are provided to make date calculations easier and are not a core part of dbt. 12 | Most date macros are not supported on postgres. 13 | 14 | #### current_timestamp ([source](macros/cross_db_utils/current_timestamp.sql)) 15 | This macro returns the current timestamp. 16 | 17 | Usage: 18 | ``` 19 | {{ dbt_utils.current_timestamp() }} 20 | ``` 21 | 22 | #### dateadd ([source](macros/cross_db_utils/dateadd.sql)) 23 | This macro adds a time/day interval to the supplied date/timestamp. Note: The `datepart` argument is database-specific. 24 | 25 | Usage: 26 | ``` 27 | {{ dbt_utils.dateadd(datepart='day', interval=1, from_date_or_timestamp="'2017-01-01'") }} 28 | ``` 29 | 30 | #### datediff ([source](macros/cross_db_utils/datediff.sql)) 31 | This macro calculates the difference between two dates. 32 | 33 | Usage: 34 | ``` 35 | {{ dbt_utils.datediff("'2018-01-01'", "'2018-01-20'", 'day') }} 36 | ``` 37 | 38 | 39 | #### split_part ([source](macros/cross_db_utils/split_part.sql)) 40 | This macro splits a string of text using the supplied delimiter and returns the supplied part number (1-indexed). 41 | 42 | Usage: 43 | ``` 44 | {{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} 45 | ``` 46 | 47 | #### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) 48 | Truncates a date or timestamp to the specified datepart. Note: The `datepart` argument is database-specific. 49 | 50 | Usage: 51 | ``` 52 | {{ dbt_utils.date_trunc(datepart, date) }} 53 | ``` 54 | 55 | #### last_day ([source](macros/cross_db_utils/last_day.sql)) 56 | Gets the last day for a given date and datepart. Notes: 57 | 58 | - The `datepart` argument is database-specific. 59 | - This macro currently only supports dateparts of `month` and `quarter`. 60 | 61 | Usage: 62 | ``` 63 | {{ dbt_utils.last_day(date, datepart) }} 64 | ``` 65 | 66 | #### width_bucket ([source](macros/cross_db_utils/width_bucket.sql)) 67 | This macro is modeled after the `width_bucket` function natively available in Snowflake. 68 | 69 | From the original Snowflake [documentation](https://docs.snowflake.net/manuals/sql-reference/functions/width_bucket.html): 70 | 71 | Constructs equi-width histograms, in which the histogram range is divided into intervals of identical size, and returns the bucket number into which the value of an expression falls, after it has been evaluated. The function returns an integer value or null (if any input is null). 72 | Notes: 73 | 74 | - `expr` 75 | The expression for which the histogram is created. This expression must evaluate to a numeric value or to a value that can be implicitly converted to a numeric value. 76 | 77 | - `min_value` and `max_value` 78 | The low and high end points of the acceptable range for the expression. The end points must also evaluate to numeric values and not be equal. 79 | 80 | - `num_buckets` 81 | The desired number of buckets; must be a positive integer value. A value from the expression is assigned to each bucket, and the function then returns the corresponding bucket number. 82 | 83 | When an expression falls outside the range, the function returns: 84 | 85 | `0` if the expression is less than min_value. 86 | 87 | `num_buckets + 1` if the expression is greater than or equal to max_value. 88 | 89 | 90 | Usage: 91 | ``` 92 | {{ dbt_utils.width_bucket(expr, min_value, max_value, num_buckets) }} 93 | ``` 94 | 95 | --- 96 | ### Date/Time 97 | #### date_spine ([source](macros/datetime/date_spine.sql)) 98 | This macro returns the sql required to build a date spine. 99 | 100 | Usage: 101 | ``` 102 | {{ dbt_utils.date_spine( 103 | datepart="minute", 104 | start_date="to_date('01/01/2016', 'mm/dd/yyyy')", 105 | end_date="dateadd(week, 1, current_date)" 106 | ) 107 | }} 108 | ``` 109 | --- 110 | ### Geo 111 | #### haversine_distance ([source](macros/geo/haversine_distance.sql)) 112 | This macro calculates the [haversine distance](http://daynebatten.com/2015/09/latitude-longitude-distance-sql/) between a pair of x/y coordinates. 113 | 114 | Usage: 115 | ``` 116 | {{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=) }} 117 | ``` 118 | --- 119 | ### Schema Tests 120 | #### equal_rowcount ([source](macros/schema_tests/equal_rowcount.sql)) 121 | This schema test asserts the that two relations have the same number of rows. 122 | 123 | Usage: 124 | ```yaml 125 | version: 2 126 | 127 | models: 128 | - name: model_name 129 | tests: 130 | - dbt_utils.equal_rowcount: 131 | compare_model: ref('other_table_name') 132 | 133 | ``` 134 | 135 | #### equality ([source](macros/schema_tests/equality.sql)) 136 | This schema test asserts the equality of two relations. Optionally specify a subset of columns to compare. 137 | 138 | Usage: 139 | ```yaml 140 | version: 2 141 | 142 | models: 143 | - name: model_name 144 | tests: 145 | - dbt_utils.equality: 146 | compare_model: ref('other_table_name') 147 | compare_columns: 148 | - first_column 149 | - second_column 150 | 151 | ``` 152 | 153 | #### expression_is_true ([source](macros/schema_tests/expression_is_true.sql)) 154 | This schema test asserts that a valid sql expression is true for all records. This is useful when checking integrity across columns, for example, that a total is equal to the sum of its parts, or that at least one column is true. 155 | 156 | Usage: 157 | ```yaml 158 | version: 2 159 | 160 | models: 161 | - name: model_name 162 | tests: 163 | - dbt_utils.expression_is_true: 164 | expression: "col_a + col_b = total" 165 | 166 | ``` 167 | 168 | The macro accepts an optional parameter `condition` that allows for asserting 169 | the `expression` on a subset of all records. 170 | 171 | Usage: 172 | ```yaml 173 | version: 2 174 | 175 | models: 176 | - name: model_name 177 | tests: 178 | - dbt_utils.expression_is_true: 179 | expression: "col_a + col_b = total" 180 | condition: "created_at > '2018-12-31'" 181 | 182 | ``` 183 | 184 | 185 | #### recency ([source](macros/schema_tests/recency.sql)) 186 | This schema test asserts that there is data in the referenced model at least as recent as the defined interval prior to the current timestamp. 187 | 188 | Usage: 189 | ```yaml 190 | version: 2 191 | 192 | models: 193 | - name: model_name 194 | tests: 195 | - dbt_utils.recency: 196 | datepart: day 197 | field: created_at 198 | interval: 1 199 | ``` 200 | 201 | #### at_least_one ([source](macros/schema_tests/at_least_one.sql)) 202 | This schema test asserts if column has at least one value. 203 | 204 | Usage: 205 | ```yaml 206 | version: 2 207 | 208 | models: 209 | - name: model_name 210 | columns: 211 | - name: col_name 212 | tests: 213 | - dbt_utils.at_least_one 214 | 215 | 216 | ``` 217 | 218 | #### not_constant ([source](macros/schema_tests/not_constant.sql)) 219 | This schema test asserts if column does not have same value in all rows. 220 | 221 | Usage: 222 | ```yaml 223 | version: 2 224 | 225 | models: 226 | - name: model_name 227 | columns: 228 | - name: column_name 229 | tests: 230 | - dbt_utils.not_constant 231 | 232 | ``` 233 | 234 | #### cardinality_equality ([source](macros/schema_tests/cardinality_equality.sql)) 235 | This schema test asserts if values in a given column have exactly the same cardinality as values from a different column in a different model. 236 | 237 | Usage: 238 | ```yaml 239 | version: 2 240 | 241 | models: 242 | - name: model_name 243 | columns: 244 | - name: from_column 245 | tests: 246 | - dbt_utils.cardinality_equality: 247 | field: other_column_name 248 | to: ref('other_model_name') 249 | 250 | ``` 251 | 252 | #### relationships_where ([source](macros/schema_tests/relationships_where.sql)) 253 | This test validates the referential integrity between two relations (same as the core relationships schema test) with an added predicate to filter out some rows from the test. This is useful to exclude records such as test entities, rows created in the last X minutes/hours to account for temporary gaps due to ETL limitations, etc. 254 | 255 | Usage: 256 | ```yaml 257 | version: 2 258 | 259 | models: 260 | - name: model_name 261 | columns: 262 | - name: id 263 | tests: 264 | - dbt_utils.relationships_where: 265 | to: ref('other_model_name') 266 | field: client_id 267 | from_condition: id <> '4ca448b8-24bf-4b88-96c6-b1609499c38b' 268 | 269 | ``` 270 | 271 | #### mutually_exclusive_ranges ([source](macros/schema_tests/mutually_exclusive_ranges.sql)) 272 | This test confirms that for a given lower_bound_column and upper_bound_column, 273 | the ranges of between the lower and upper bounds do not overlap with the ranges 274 | of another row. 275 | 276 | **Usage:** 277 | ```yaml 278 | version: 2 279 | 280 | models: 281 | # test that age ranges do not overlap 282 | - name: age_brackets 283 | tests: 284 | - dbt_utils.mutually_exclusive_ranges: 285 | lower_bound_column: min_age 286 | upper_bound_column: max_age 287 | gaps: not_allowed 288 | 289 | # test that each customer can only have one subscription at a time 290 | - name: subscriptions 291 | tests: 292 | - dbt_utils.mutually_exclusive_ranges: 293 | lower_bound_column: started_at 294 | upper_bound_column: ended_at 295 | partition_by: customer_id 296 | gaps: required 297 | ``` 298 | **Args:** 299 | * `lower_bound_column` (required): The name of the column that represents the 300 | lower value of the range. Must be not null. 301 | * `upper_bound_column` (required): The name of the column that represents the 302 | upper value of the range. Must be not null. 303 | * `partition_by` (optional): If a subset of records should be mutually exclusive 304 | (e.g. all periods for a single subscription_id are mutually exclusive), use this 305 | argument to indicate which column to partition by. `default=none` 306 | * `gaps` (optional): Whether there can be gaps are allowed between ranges. 307 | `default='allowed', one_of=['not_allowed', 'allowed', 'required']` 308 | 309 | **Note:** Both `lower_bound_column` and `upper_bound_column` should be not null. 310 | If this is not the case in your data source, consider passing a coalesce function 311 | to the `lower_` and `upper_bound_column` arguments, like so: 312 | ```yaml 313 | version: 2 314 | 315 | models: 316 | - name: subscriptions 317 | tests: 318 | - dbt_utils.mutually_exclusive_ranges: 319 | lower_bound_column: coalesce(started_at, '1900-01-01') 320 | upper_bound_column: coalesce(ended_at, '2099-12-31') 321 | partition_by: customer_id 322 | gaps: allowed 323 | ``` 324 | 325 | **Understanding the `gaps` parameter:** 326 | Here are a number of examples for each allowed `gaps` parameter. 327 | * `gaps:not_allowed`: The upper bound of one record must be the lower bound of 328 | the next record. 329 | 330 | | lower_bound | upper_bound | 331 | |-------------|-------------| 332 | | 0 | 1 | 333 | | 1 | 2 | 334 | | 2 | 3 | 335 | 336 | * `gaps:allowed` (default): There may be a gap between the upper bound of one 337 | record and the lower bound of the next record. 338 | 339 | | lower_bound | upper_bound | 340 | |-------------|-------------| 341 | | 0 | 1 | 342 | | 2 | 3 | 343 | | 3 | 4 | 344 | 345 | * `gaps:required`: There must be a gap between the upper bound of one record and 346 | the lower bound of the next record (common for date ranges). 347 | 348 | | lower_bound | upper_bound | 349 | |-------------|-------------| 350 | | 0 | 1 | 351 | | 2 | 3 | 352 | | 4 | 5 | 353 | 354 | #### unique_combination_of_columns ([source](macros/schema_tests/unique_combination_of_columns.sql)) 355 | This test confirms that the combination of columns is unique. For example, the 356 | combination of month and product is unique, however neither column is unique 357 | in isolation. 358 | 359 | We generally recommend testing this uniqueness condition by either: 360 | * generating a [surrogate_key](#surrogate_key-source) for your model and testing 361 | the uniqueness of said key, OR 362 | * passing the `unique` test a coalesce of the columns (as discussed [here](https://docs.getdbt.com/docs/building-a-dbt-project/testing-and-documentation/testing/#testing-expressions)). 363 | 364 | However, these approaches can become non-perfomant on large data sets, in which 365 | case we recommend using this test instead. 366 | 367 | **Usage:** 368 | ```yaml 369 | - name: revenue_by_product_by_month 370 | tests: 371 | - dbt_utils.unique_combination_of_columns: 372 | combination_of_columns: 373 | - month 374 | - product 375 | ``` 376 | 377 | --- 378 | ### SQL helpers 379 | #### get_query_results_as_dict ([source](macros/sql/get_query_results_as_dict.sql)) 380 | This macro returns a dictionary from a sql query, so that you don't need to interact with the Agate library to operate on the result 381 | 382 | Usage: 383 | ``` 384 | -- Returns a dictionary of the users table where the state is California 385 | {% set california_cities = dbt_utils.get_query_results_as_dict("select * from" ~ ref('cities') ~ "where state = 'CA' and city is not null ") %} 386 | select 387 | city, 388 | {% for city in california_cities %} 389 | sum(case when city = {{ city }} then 1 else 0 end) as users_in_{{ city }}, 390 | {% endfor %} 391 | count(*) as total 392 | from {{ ref('users') }} 393 | 394 | group by 1 395 | ``` 396 | 397 | #### get_column_values ([source](macros/sql/get_column_values.sql)) 398 | This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation). 399 | It takes an options `default` argument for compiling when the relation does not already exist. 400 | 401 | Usage: 402 | ``` 403 | -- Returns a list of the top 50 states in the `users` table 404 | {% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50, default=[]) %} 405 | 406 | {% for state in states %} 407 | ... 408 | {% endfor %} 409 | 410 | ... 411 | ``` 412 | #### get_relations_by_prefix 413 | > This replaces the `get_tables_by_prefix` macro. Note that the `get_tables_by_prefix` macro will 414 | be deprecated in a future release of this package. 415 | 416 | Returns a list of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) 417 | that match a given prefix, with an optional exclusion pattern. It's particularly 418 | handy paired with `union_relations`. 419 | **Usage:** 420 | ``` 421 | -- Returns a list of relations that match schema.prefix% 422 | {% set relations = dbt_utils.get_relations_by_prefix('my_schema', 'my_prefix') %} 423 | 424 | -- Returns a list of relations as above, excluding any that end in `deprecated` 425 | {% set relations = dbt_utils.get_relations_by_prefix('my_schema', 'my_prefix', '%deprecated') %} 426 | 427 | -- Example using the union_relations macro 428 | {% set event_relations = dbt_utils.get_relations_by_prefix('events', 'event_') %} 429 | {{ dbt_utils.union_relations(relations = event_relations) }} 430 | ``` 431 | 432 | **Args:** 433 | * `schema` (required): The schema to inspect for relations. 434 | * `prefix` (required): The prefix of the table/view (case insensitive) 435 | * `exclude` (optional): Exclude any relations that match this pattern. 436 | * `database` (optional, default = `target.database`): The database to inspect 437 | for relations. 438 | 439 | #### group_by ([source](macros/sql/groupby.sql)) 440 | This macro build a group by statement for fields 1...N 441 | 442 | Usage: 443 | ``` 444 | {{ dbt_utils.group_by(n=3) }} --> group by 1,2,3 445 | ``` 446 | 447 | #### star ([source](macros/sql/star.sql)) 448 | This macro generates a list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias. 449 | 450 | Usage: 451 | ``` 452 | select 453 | {{ dbt_utils.star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} 454 | from {{ref('my_model')}} 455 | ``` 456 | 457 | #### union_relations ([source](macros/sql/union.sql)) 458 | > This replaces the `union_tables` macro. Note that the `union_tables` macro will 459 | be deprecated in a future release of this package. 460 | 461 | This macro unions together an array of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation), 462 | even when columns have differing orders in each Relation, and/or some columns are 463 | missing from some relations. Any columns exclusive to a subset of these 464 | relations will be filled with `null` where not present. An new column 465 | (`_dbt_source_relation`) is also added to indicate the source for each record. 466 | 467 | **Usage:** 468 | ``` 469 | {{ dbt_utils.union_relations( 470 | relations=[ref('my_model'), source('my_source', 'my_table')], 471 | exclude=["_loaded_at"] 472 | ) }} 473 | ``` 474 | **Args:** 475 | * `relations` (required): An array of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation). 476 | * `exclude` (optional): A list of column names that should be excluded from 477 | the final query. 478 | * `include` (optional): A list of column names that should be included in the 479 | final query. Note the `include` and `exclude` parameters are mutually exclusive. 480 | * `column_override` (optional): A dictionary of explicit column type overrides, 481 | e.g. `{"some_field": "varchar(100)"}`.`` 482 | * `source_column_name` (optional, `default="_dbt_source_relation"`): The name of 483 | the column that records the source of this row. 484 | 485 | #### generate_series ([source](macros/sql/generate_series.sql)) 486 | This macro implements a cross-database mechanism to generate an arbitrarily long list of numbers. Specify the maximum number you'd like in your list and it will create a 1-indexed SQL result set. 487 | 488 | Usage: 489 | ``` 490 | {{ dbt_utils.generate_series(upper_bound=1000) }} 491 | ``` 492 | 493 | #### surrogate_key ([source](macros/sql/surrogate_key.sql)) 494 | Implements a cross-database way to generate a hashed surrogate key using the fields specified. 495 | 496 | Usage: 497 | ``` 498 | {{ dbt_utils.surrogate_key(['field_a', 'field_b'[,...]]) }} 499 | ``` 500 | 501 | #### safe_add ([source](macros/sql/safe_add.sql)) 502 | Implements a cross-database way to sum nullable fiellds using the fields specified. 503 | 504 | Usage: 505 | ``` 506 | {{ dbt_utils.safe_add('field_a', 'field_b'[,...]) }} 507 | ``` 508 | 509 | #### pivot ([source](macros/sql/pivot.sql)) 510 | This macro pivots values from rows to columns. 511 | 512 | Usage: 513 | ``` 514 | {{ dbt_utils.pivot(, ) }} 515 | ``` 516 | 517 | Example: 518 | 519 | Input: orders 520 | 521 | | size | color | 522 | |------|-------| 523 | | S | red | 524 | | S | blue | 525 | | S | red | 526 | | M | red | 527 | 528 | select 529 | size, 530 | {{ dbt_utils.pivot( 531 | 'color', 532 | dbt_utils.get_column_values(ref('orders'), 'color') 533 | ) }} 534 | from {{ ref('orders') }} 535 | group by size 536 | 537 | Output: 538 | 539 | | size | red | blue | 540 | |------|-----|------| 541 | | S | 2 | 1 | 542 | | M | 1 | 0 | 543 | 544 | Arguments: 545 | 546 | - column: Column name, required 547 | - values: List of row values to turn into columns, required 548 | - alias: Whether to create column aliases, default is True 549 | - agg: SQL aggregation function, default is sum 550 | - cmp: SQL value comparison, default is = 551 | - prefix: Column alias prefix, default is blank 552 | - suffix: Column alias postfix, default is blank 553 | - then_value: Value to use if comparison succeeds, default is 1 554 | - else_value: Value to use if comparison fails, default is 0 555 | - quote_identifiers: Whether to surround column aliases with double quotes, default is true 556 | 557 | #### unpivot ([source](macros/sql/unpivot.sql)) 558 | This macro "un-pivots" a table from wide format to long format. Functionality is similar to pandas [melt](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.melt.html) function. 559 | 560 | Usage: 561 | ``` 562 | {{ dbt_utils.unpivot( 563 | relation=ref('table_name'), 564 | cast_to='datatype', 565 | exclude=[], 566 | remove=[], 567 | field_name=, 568 | value_name= 569 | ) }} 570 | ``` 571 | 572 | **Usage:** 573 | 574 | Input: orders 575 | 576 | | date | size | color | status | 577 | |------------|------|-------|------------| 578 | | 2017-01-01 | S | red | complete | 579 | | 2017-03-01 | S | red | processing | 580 | 581 | {{ dbt_utils.unpivot(ref('orders'), cast_to='varchar', exclude=['date','status']) }} 582 | 583 | Output: 584 | 585 | | date | status | field_name | value | 586 | |------------|------------|------------|-------| 587 | | 2017-01-01 | complete | size | S | 588 | | 2017-01-01 | complete | color | red | 589 | | 2017-03-01 | processing | size | S | 590 | | 2017-03-01 | processing | color | red | 591 | 592 | **Args**: 593 | - `relation`: The [Relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) to unpivot. 594 | - `cast_to`: The data type to cast the unpivoted values to, default is varchar 595 | - `exclude`: A list of columns to exclude from the unpivot operation but keep in the resulting table. 596 | - `remove`: A list of columns to remove from the resulting table. 597 | - `field_name`: column name in the resulting table for field 598 | - `value_name`: column name in the resulting table for value 599 | 600 | --- 601 | ### Web 602 | #### get_url_parameter ([source](macros/web/get_url_parameter.sql)) 603 | This macro extracts a url parameter from a column containing a url. 604 | 605 | Usage: 606 | ``` 607 | {{ dbt_utils.get_url_parameter(field='page_url', url_parameter='utm_source') }} 608 | ``` 609 | 610 | #### get_url_host ([source](macros/web/get_url_host.sql)) 611 | This macro extracts a hostname from a column containing a url. 612 | 613 | Usage: 614 | ``` 615 | {{ dbt_utils.get_url_host(field='page_url') }} 616 | ``` 617 | 618 | #### get_url_path ([source](macros/web/get_url_path.sql)) 619 | This macro extracts a page path from a column containing a url. 620 | 621 | Usage: 622 | ``` 623 | {{ dbt_utils.get_url_path(field='page_url') }} 624 | ``` 625 | 626 | --- 627 | ### Logger 628 | #### pretty_time ([source](macros/logger/pretty_time.sql)) 629 | This macro returns a string of the current timestamp, optionally taking a datestring format. 630 | ```sql 631 | {#- This will return a string like '14:50:34' -#} 632 | {{ dbt_utils.pretty_time() }} 633 | 634 | {#- This will return a string like '2019-05-02 14:50:34' -#} 635 | {{ dbt_utils.pretty_time(format='%Y-%m-%d %H:%M:%S') }} 636 | ``` 637 | 638 | #### pretty_log_format ([source](macros/logger/pretty_log_format.sql)) 639 | This macro formats the input in a way that will print nicely to the command line when you `log` it. 640 | ```sql 641 | {#- This will return a string like: 642 | "11:07:31 + my pretty message" 643 | -#} 644 | 645 | {{ dbt_utils.pretty_log_format("my pretty message") }} 646 | ``` 647 | #### log_info ([source](macros/logger/log_info.sql)) 648 | This macro logs a formatted message (with a timestamp) to the command line. 649 | ```sql 650 | {{ dbt_utils.log_info("my pretty message") }} 651 | ``` 652 | 653 | ``` 654 | 11:07:28 | 1 of 1 START table model analytics.fct_orders........................ [RUN] 655 | 11:07:31 + my pretty message 656 | ``` 657 | 658 | ### Materializations 659 | #### insert_by_period ([source](macros/materializations/insert_by_period_materialization.sql)) 660 | `insert_by_period` allows dbt to insert records into a table one period (i.e. day, week) at a time. 661 | 662 | This materialization is appropriate for event data that can be processed in discrete periods. It is similar in concept to the built-in incremental materialization, but has the added benefit of building the model in chunks even during a full-refresh so is particularly useful for models where the initial run can be problematic. 663 | 664 | Should a run of a model using this materialization be interrupted, a subsequent run will continue building the target table from where it was interrupted (granted the `--full-refresh` flag is omitted). 665 | 666 | Progress is logged in the command line for easy monitoring. 667 | 668 | Usage: 669 | ```sql 670 | {{ 671 | config( 672 | materialized = "insert_by_period", 673 | period = "day", 674 | timestamp_field = "created_at", 675 | start_date = "2018-01-01", 676 | stop_date = "2018-06-01") 677 | }} 678 | 679 | with events as ( 680 | 681 | select * 682 | from {{ ref('events') }} 683 | where __PERIOD_FILTER__ -- This will be replaced with a filter in the materialization code 684 | 685 | ) 686 | 687 | ....complex aggregates here.... 688 | 689 | ``` 690 | Configuration values: 691 | * `period`: period to break the model into, must be a valid [datepart](https://docs.aws.amazon.com/redshift/latest/dg/r_Dateparts_for_datetime_functions.html) (default='Week') 692 | * `timestamp_field`: the column name of the timestamp field that will be used to break the model into smaller queries 693 | * `start_date`: literal date or timestamp - generally choose a date that is earlier than the start of your data 694 | * `stop_date`: literal date or timestamp (default=current_timestamp) 695 | 696 | Caveats: 697 | * This materialization is compatible with dbt 0.10.1. 698 | * This materialization has been written for Redshift. 699 | * This materialization can only be used for a model where records are not expected to change after they are created. 700 | * Any model post-hooks that use `{{ this }}` will fail using this materialization. For example: 701 | ```yaml 702 | models: 703 | project-name: 704 | post-hook: "grant select on {{ this }} to db_reader" 705 | ``` 706 | A useful workaround is to change the above post-hook to: 707 | ```yaml 708 | post-hook: "grant select on {{ this.schema }}.{{ this.name }} to db_reader" 709 | ``` 710 | 711 | ---- 712 | 713 | ### Contributing 714 | 715 | We welcome contributions to this repo! To contribute a new feature or a fix, please open a Pull Request with 1) your changes, 2) updated documentation for the `README.md` file, and 3) a working integration test. See [this page](integration_tests/README.md) for more information. 716 | 717 | ---- 718 | 719 | ### Getting started with dbt 720 | 721 | - [What is dbt]? 722 | - Read the [dbt viewpoint] 723 | - [Installation] 724 | - Join the [chat][slack-url] on Slack for live questions and support. 725 | 726 | 727 | ## Code of Conduct 728 | 729 | Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct]. 730 | 731 | 732 | 733 | [PyPA Code of Conduct]: https://www.pypa.io/en/latest/code-of-conduct/ 734 | [slack-url]: http://ac-slackin.herokuapp.com/ 735 | [Installation]: https://dbt.readme.io/docs/installation 736 | [What is dbt]: https://dbt.readme.io/docs/overview 737 | [dbt viewpoint]: https://dbt.readme.io/docs/viewpoint 738 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_utils' 2 | version: '0.1.0' 3 | 4 | require-dbt-version: ">=0.17.0" 5 | config-version: 2 6 | 7 | target-path: "target" 8 | clean-targets: ["target", "dbt_modules"] 9 | macro-paths: ["macros"] 10 | log-path: "logs" 11 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | 4 | dbt: 5 | image: circleci/python:3.6.3-stretch 6 | depends_on: 7 | - ${TARGET} 8 | env_file: "./integration_tests/.env/${TARGET}.env" 9 | entrypoint: "/repo/run_test.sh ${TARGET} ${MODELS} ${SEEDS}" 10 | working_dir: /repo 11 | volumes: 12 | - ".:/repo" 13 | 14 | postgres: 15 | image: circleci/postgres:9.6.5-alpine-ram 16 | ports: 17 | - "5432:5432" 18 | 19 | # dummy container, since snowflake is a managed service 20 | snowflake: 21 | image: circleci/python:3.6.3-stretch 22 | entrypoint: "/bin/true" 23 | 24 | # dummy container, since bigquery is a managed service 25 | bigquery: 26 | image: circleci/python:3.6.3-stretch 27 | entrypoint: "/bin/true" 28 | 29 | # dummy container, since redshift is a managed service 30 | redshift: 31 | image: circleci/python:3.6.3-stretch 32 | entrypoint: "/bin/true" 33 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/etc/dbt-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/dbt_modules/dbt_utils/etc/dbt-logo.png -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/.env/bigquery.env: -------------------------------------------------------------------------------- 1 | GCLOUD_SERVICE_KEY_PATH= -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/.env/postgres.env: -------------------------------------------------------------------------------- 1 | CI_DBT_HOST=postgres 2 | CI_DBT_USER=root 3 | CI_DBT_PASS='' 4 | CI_DBT_PORT=5432 5 | CI_DBT_DBNAME=circle_test -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/.env/redshift.env: -------------------------------------------------------------------------------- 1 | CI_REDSHIFT_DBT_HOST= 2 | CI_REDSHIFT_DBT_USER= 3 | CI_REDSHIFT_DBT_PASS= 4 | CI_REDSHIFT_DBT_DBNAME= -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/.env/snowflake.env: -------------------------------------------------------------------------------- 1 | CI_SNOWFLAKE_DBT_ACCOUNT= 2 | CI_SNOWFLAKE_DBT_USER= 3 | CI_SNOWFLAKE_DBT_PASS= 4 | CI_SNOWFLAKE_DBT_ROLE= 5 | CI_SNOWFLAKE_DBT_DATABASE= 6 | CI_SNOWFLAKE_DBT_WAREHOUSE= -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS = postgres redshift snowflake bigquery 2 | .PHONY : test test-all $(TARGETS) 3 | 4 | test: export TARGET = $(target) 5 | test: export MODELS = $(models) 6 | test: export SEEDS = $(seeds) 7 | test: 8 | docker-compose -f ../docker-compose.yml up dbt 9 | 10 | $(TARGETS): 11 | $(MAKE) test target=$@ 12 | 13 | test-all: $(TARGETS) 14 | echo "Tested all targets" 15 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/README.md: -------------------------------------------------------------------------------- 1 | ### Run the integration tests 2 | 3 | To run the integration tests on your local machine, like they will get run in the CI (using CircleCI), you can do the following: 4 | 5 | Assuming you are in the `integration_tests` folder, 6 | 7 | ```bash 8 | make test target=[postgres|redshift|...] [models=...] [seeds=...] 9 | ``` 10 | 11 | or more specific: 12 | 13 | ```bash 14 | make test target=postgres models=sql.test_star seeds=sql.data_star 15 | ``` 16 | 17 | or, to test against all targets: 18 | 19 | ```bash 20 | make test-all [models=...] [seeds=...] 21 | ``` 22 | 23 | Specying `models=` and `seeds=` is optional, however _if_ you specify `seeds`, you have to specify `models` too. 24 | 25 | Where possible, targets are being run in docker containers (this works for Postgres or in the future Spark for example). For managed services like Snowflake, BigQuery and Redshift this is not possible, hence your own configuration for these services has to be provided in the appropriate env files in `integration_tests/.env/[TARGET].env` 26 | 27 | ### Creating a new integration test 28 | 29 | This directory contains an example dbt project which tests the macros in the `dbt-utils` package. An integration test typically involves making 1) a new seed file 2) a new model file 3) a schema test. 30 | 31 | For an example integration tests, check out the tests for the `get_url_parameter` macro: 32 | 33 | 1. [Macro definition](https://github.com/fishtown-analytics/dbt-utils/blob/master/macros/web/get_url_parameter.sql) 34 | 2. [Seed file with fake data](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/data/web/data_urls.csv) 35 | 3. [Model to test the macro](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/test_urls.sql) 36 | 4. [A schema test to assert the macro works as expected](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/schema.yml#L2) 37 | 38 | 39 | Once you've added all of these files, you should be able to run: 40 | ``` 41 | $ dbt deps 42 | $ dbt seed 43 | $ dbt run --model {your_model_name} 44 | $ dbt test --model {your_model_name} 45 | ``` 46 | 47 | If the tests all pass, then you're good to go! All tests will be run automatically when you create a PR against this repo. -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/ci/sample.profiles.yml: -------------------------------------------------------------------------------- 1 | 2 | # HEY! This file is used in the dbt-utils integrations tests with CircleCI. 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :) 4 | 5 | config: 6 | send_anonymous_usage_stats: False 7 | use_colors: True 8 | 9 | integration_tests: 10 | target: postgres 11 | outputs: 12 | postgres: 13 | type: postgres 14 | host: "{{ env_var('CI_DBT_HOST') }}" 15 | user: "{{ env_var('CI_DBT_USER') }}" 16 | pass: "{{ env_var('CI_DBT_PASS') }}" 17 | port: "{{ env_var('CI_DBT_PORT') }}" 18 | dbname: "{{ env_var('CI_DBT_DBNAME') }}" 19 | schema: dbt_utils_integration_tests_postgres 20 | threads: 1 21 | 22 | redshift: 23 | type: redshift 24 | host: "{{ env_var('CI_REDSHIFT_DBT_HOST') }}" 25 | user: "{{ env_var('CI_REDSHIFT_DBT_USER') }}" 26 | pass: "{{ env_var('CI_REDSHIFT_DBT_PASS') }}" 27 | dbname: "{{ env_var('CI_REDSHIFT_DBT_DBNAME') }}" 28 | port: 5439 29 | schema: dbt_utils_integration_tests_redshift 30 | threads: 1 31 | 32 | bigquery: 33 | type: bigquery 34 | method: service-account 35 | keyfile: "{{ env_var('GCLOUD_SERVICE_KEY_PATH') }}" 36 | project: 'dbt-integration-tests' 37 | schema: dbt_utils_integration_tests_bigquery 38 | threads: 1 39 | 40 | snowflake: 41 | type: snowflake 42 | account: "{{ env_var('CI_SNOWFLAKE_DBT_ACCOUNT') }}" 43 | user: "{{ env_var('CI_SNOWFLAKE_DBT_USER') }}" 44 | password: "{{ env_var('CI_SNOWFLAKE_DBT_PASS') }}" 45 | role: "{{ env_var('CI_SNOWFLAKE_DBT_ROLE') }}" 46 | database: "{{ env_var('CI_SNOWFLAKE_DBT_DATABASE') }}" 47 | warehouse: "{{ env_var('CI_SNOWFLAKE_DBT_WAREHOUSE') }}" 48 | schema: dbt_utils_integration_tests_snowflake 49 | threads: 1 -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/dbt_modules/dbt_utils/integration_tests/data/.gitkeep -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_concat.csv: -------------------------------------------------------------------------------- 1 | input_1,input_2,output 2 | a,b,ab 3 | a,,a 4 | ,b,b 5 | ,, 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_date_trunc.csv: -------------------------------------------------------------------------------- 1 | updated_at,day,month 2 | 2018-01-05 12:00:00,2018-01-05,2018-01-01 3 | ,, 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_dateadd.csv: -------------------------------------------------------------------------------- 1 | from_time,interval_length,datepart,result 2 | 2018-01-01 01:00:00,1,day,2018-01-02 01:00:00 3 | 2018-01-01 01:00:00,1,month,2018-02-01 01:00:00 4 | 2018-01-01 01:00:00,1,year,2019-01-01 01:00:00 5 | 2018-01-01 01:00:00,1,hour,2018-01-01 02:00:00 6 | ,1,day, 7 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_datediff.csv: -------------------------------------------------------------------------------- 1 | first_date,second_date,datepart,result 2 | 2018-01-01 01:00:00,2018-01-02 01:00:00,day,1 3 | 2018-01-01 01:00:00,2018-02-01 01:00:00,month,1 4 | 2018-01-01 01:00:00,2019-01-01 01:00:00,year,1 5 | 2018-01-01 01:00:00,2018-01-01 02:00:00,hour,1 6 | 2018-01-01 01:00:00,2018-01-01 02:01:00,minute,61 7 | 2018-01-01 01:00:00,2018-01-01 02:00:01,second,3601 8 | 2019-12-31 00:00:00,2019-12-27 00:00:00,week,-1 9 | 2019-12-31 00:00:00,2019-12-30 00:00:00,week,0 10 | 2019-12-31 00:00:00,2020-01-02 00:00:00,week,0 11 | 2019-12-31 00:00:00,2020-01-06 02:00:00,week,1 12 | ,2018-01-01 02:00:00,hour, 13 | 2018-01-01 02:00:00,,hour, 14 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_hash.csv: -------------------------------------------------------------------------------- 1 | input_1,output 2 | ab,187ef4436122d1cc2f40dc2b92f0eba0 3 | a,0cc175b9c0f1b6a831c399e269772661 4 | 1,c4ca4238a0b923820dcc509a6f75849b 5 | ,d41d8cd98f00b204e9800998ecf8427e 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_last_day.csv: -------------------------------------------------------------------------------- 1 | date_day,date_part,result 2 | 2018-01-02,month,2018-01-31 3 | 2018-01-02,year,2018-12-31 4 | ,month, 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_length.csv: -------------------------------------------------------------------------------- 1 | expression,output 2 | abcdef,6 3 | fishtown,8 4 | december,8 5 | www.google.com/path,19 -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_position.csv: -------------------------------------------------------------------------------- 1 | substring_text,string_text,result 2 | def,abcdef,4 3 | land,earth,0 4 | town,fishtown,5 5 | ember,december,4 -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_replace.csv: -------------------------------------------------------------------------------- 1 | string_text,search_chars,replace_chars,result 2 | a,a,b,b 3 | http://google.com,http://,"",google.com -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_right.csv: -------------------------------------------------------------------------------- 1 | string_text,length_expression,output 2 | abcdef,3,def 3 | fishtown,4,town 4 | december,5,ember 5 | december,0, -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_safe_cast.csv: -------------------------------------------------------------------------------- 1 | field,output 2 | abc,abc 3 | 123,123 4 | , 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_split_part.csv: -------------------------------------------------------------------------------- 1 | parts,split_on,result_1,result_2,result_3 2 | a|b|c,|,a,b,c 3 | 1|2|3,|,1,2,3 4 | ,|,,, 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/cross_db/data_width_bucket.csv: -------------------------------------------------------------------------------- 1 | date_col,amount,num_buckets,min_value,max_value,bucket 2 | 2012-08-01,190000.00,4,200000.0,600000.0,0 3 | 2013-08-01,290000.00,4,200000.0,600000.0,1 4 | 2014-02-01,320000.00,4,200000.0,600000.0,2 5 | 2015-04-01,399999.99,4,200000.0,600000.0,2 6 | 2016-04-01,400000.00,4,200000.0,600000.0,3 7 | 2017-04-01,470000.00,4,200000.0,600000.0,3 8 | 2018-04-01,510000.00,4,200000.0,600000.0,4 9 | 2019-04-01,610000.00,4,200000.0,600000.0,5 10 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/datetime/data_date_spine.csv: -------------------------------------------------------------------------------- 1 | date_day 2 | 2018-01-01 3 | 2018-01-02 4 | 2018-01-03 5 | 2018-01-04 6 | 2018-01-05 7 | 2018-01-06 8 | 2018-01-07 9 | 2018-01-08 10 | 2018-01-09 -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/etc/data_people.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,ip_address,created_at,is_active 2 | 1,Dame,Cluley,dcluley0@nih.gov,155.86.204.241,2017-02-07 09:48:26,false 3 | 2,Guy,Wittering,gwittering1@reddit.com,221.174.176.36,2017-08-08 00:37:53,false 4 | 3,Klement,Bucke,kbucke2@dedecms.com,167.94.85.199,2016-09-05 23:43:19,true 5 | 4,Roselia,Dallander,rdallander3@adobe.com,135.10.21.248,2016-08-11 00:00:11,false 6 | 5,Arly,Terzza,aterzza4@va.gov,219.66.192.10,2017-03-23 22:11:42,true 7 | 6,Arron,Siehard,asiehard5@ibm.com,116.211.108.88,2017-07-07 23:11:50,true 8 | 7,Debera,Petrazzi,dpetrazzi6@addthis.com,18.167.49.108,2017-11-12 04:34:50,false 9 | 8,Timi,Agget,tagget7@home.pl,170.171.78.217,2016-03-14 02:04:33,true 10 | 9,Ines,Brixey,ibrixey8@biblegateway.com,251.141.4.42,2017-10-01 16:41:21,false 11 | 10,Karlen,Eggleton,keggleton9@amazon.co.jp,100.179.149.224,2016-04-15 10:05:00,true 12 | 11,Hamish,Winfield,hwinfielda@squarespace.com,5.34.205.16,2017-12-29 22:44:52,true 13 | 12,Stanton,Tiron,stironb@rambler.ru,171.5.190.125,2017-01-20 23:31:15,true 14 | 13,Tyne,Elner,telnerc@jiathis.com,165.155.112.184,2017-06-12 23:42:54,false 15 | 14,Lita,Kitley,lkitleyd@gmpg.org,138.131.8.94,2018-01-25 15:03:51,false 16 | 15,Alan,Morsley,amorsleye@dell.com,5.81.121.91,2016-03-18 19:37:49,true 17 | 16,Erinn,Stokoe,estokoef@walmart.com,244.57.254.248,2017-02-23 22:51:09,true 18 | 17,Dela,Oxley,doxleyg@state.gov,163.86.24.94,2017-04-12 20:19:20,true 19 | 18,Daryle,Reeve,dreeveh@1und1.de,175.30.172.20,2017-07-09 20:46:10,false 20 | 19,Micah,Smitham,msmithami@techcrunch.com,164.75.157.186,2016-02-25 16:17:57,true 21 | 20,Bernice,Van der Velde,bvanderveldej@i2i.jp,141.99.132.98,2017-07-28 23:31:24,false 22 | 21,Odo,Janacek,ojanacekk@redcross.org,50.195.72.49,2017-05-01 05:59:30,false 23 | 22,Lyndsey,Exter,lexterl@scribd.com,244.5.43.160,2017-02-13 11:32:04,false 24 | 23,Correy,Brash,cbrashm@loc.gov,233.67.52.95,2018-02-27 05:26:29,false 25 | 24,Lyle,Josilevich,ljosilevichn@rambler.ru,99.16.127.176,2016-08-06 03:37:03,false 26 | 25,Skip,Castiello,scastielloo@rambler.ru,118.174.3.50,2016-06-07 23:32:19,true 27 | 26,Philbert,Daltry,pdaltryp@tamu.edu,181.93.127.23,2016-08-16 12:52:52,true 28 | 27,Addie,Sikora,asikoraq@theatlantic.com,120.33.67.44,2016-09-01 12:45:37,true 29 | 28,Sibyl,Songist,ssongistr@noaa.gov,151.85.172.142,2016-02-11 01:14:50,false 30 | 29,Eyde,Dankersley,edankersleys@illinois.edu,147.170.154.132,2017-08-09 18:14:00,false 31 | 30,Dion,Pessler,dpesslert@reverbnation.com,51.92.202.203,2017-01-30 02:05:47,true 32 | 31,Rodd,Huntly,rhuntlyu@google.ru,82.198.158.0,2016-04-22 06:44:15,false 33 | 32,Inness,Cartmer,icartmerv@tripod.com,44.147.127.200,2017-03-11 12:03:56,false 34 | 33,Blakeley,Figgins,bfigginsw@ebay.co.uk,116.54.91.30,2016-05-28 14:25:49,true 35 | 34,Yancey,Leeburne,yleeburnex@people.com.cn,8.44.104.205,2016-08-09 03:15:02,false 36 | 35,Gustavus,Kemp,gkempy@sourceforge.net,101.126.34.176,2018-02-02 12:15:57,true 37 | 36,Annabela,Ardron,aardronz@slideshare.net,135.255.20.212,2017-10-29 03:13:03,true 38 | 37,Allister,Janota,ajanota10@yahoo.com,41.139.90.112,2016-09-19 04:21:50,true 39 | 38,Yoko,McBryde,ymcbryde11@weather.com,124.17.222.132,2016-08-21 14:32:04,false 40 | 39,Aprilette,Colebeck,acolebeck12@elegantthemes.com,14.62.14.45,2017-04-04 04:47:31,true 41 | 40,Oralia,Marklew,omarklew13@cnet.com,108.161.10.231,2017-12-29 23:15:15,true 42 | 41,Vi,Bryde,vbryde14@harvard.edu,20.91.132.215,2017-12-01 21:02:36,false 43 | 42,Koren,Emmanueli,kemmanueli15@fotki.com,151.86.146.63,2016-11-10 22:36:05,true 44 | 43,Corrie,Pendry,cpendry16@technorati.com,78.110.104.252,2017-11-22 07:57:23,true 45 | 44,Berton,Jakovijevic,bjakovijevic17@themeforest.net,243.201.191.244,2017-12-22 20:30:37,false 46 | 45,Ahmad,Lawerence,alawerence18@bluehost.com,234.146.69.92,2017-07-07 17:37:17,true 47 | 46,Walther,Mardee,wmardee19@sciencedirect.com,86.10.226.173,2016-06-23 09:20:51,false 48 | 47,Raynor,Reignolds,rreignolds1a@github.com,192.159.109.53,2016-04-19 13:32:00,false 49 | 48,Dom,Brodhead,dbrodhead1b@ed.gov,13.193.83.80,2016-09-24 03:16:43,false 50 | 49,Patton,Marrett,pmarrett1c@sourceforge.net,73.142.143.198,2016-06-02 19:20:48,true 51 | 50,Murielle,Reina,mreina1d@washington.edu,88.67.241.169,2017-10-01 01:56:52,true 52 | 51,Markos,Zylberdik,mzylberdik1e@ask.com,169.62.233.37,2017-03-23 19:40:19,true 53 | 52,Dorisa,Gosalvez,dgosalvez1f@mit.edu,10.111.156.111,2016-02-24 12:37:30,true 54 | 53,Amata,Moar,amoar1g@tinypic.com,214.241.229.183,2016-05-22 05:04:06,true 55 | 54,Graehme,Finnigan,gfinnigan1h@trellian.com,229.14.230.4,2016-12-27 00:49:18,true 56 | 55,Tanya,Sheers,tsheers1i@house.gov,43.212.37.134,2018-02-04 05:17:30,true 57 | 56,Germain,Beavers,gbeavers1j@hexun.com,91.219.240.74,2017-01-26 23:03:39,false 58 | 57,Emmye,Cerie,ecerie1k@independent.co.uk,58.183.233.79,2017-04-30 14:13:31,true 59 | 58,Reese,Glaisner,rglaisner1l@dropbox.com,63.181.9.68,2016-07-29 05:49:41,true 60 | 59,Christie,Phlippsen,cphlippsen1m@ucoz.ru,236.91.248.168,2017-07-07 12:37:10,false 61 | 60,Anthia,Tolland,atolland1n@hibu.com,124.60.13.101,2016-02-06 14:38:37,true 62 | 61,Annamarie,Pipworth,apipworth1o@ftc.gov,53.219.191.107,2017-06-13 08:29:04,true 63 | 62,Price,O'Gready,pogready1p@theatlantic.com,131.188.180.57,2016-09-28 08:44:38,false 64 | 63,Sergei,Cicero,scicero1q@telegraph.co.uk,100.97.16.84,2017-10-02 15:58:45,false 65 | 64,Dolorita,Lilion,dlilion1r@vimeo.com,150.43.252.51,2017-09-06 12:39:46,true 66 | 65,Perrine,Peetermann,ppeetermann1s@fema.gov,93.27.202.229,2017-07-08 08:49:37,false 67 | 66,Frieda,Gemelli,fgemelli1t@altervista.org,20.21.177.102,2016-04-18 05:58:59,false 68 | 67,Webster,Tully,wtully1u@nba.com,61.55.62.136,2016-02-18 11:01:23,true 69 | 68,Clara,Dadd,cdadd1v@rakuten.co.jp,67.84.203.36,2017-06-10 22:20:50,false 70 | 69,Gardener,Clarkin,gclarkin1w@bbc.co.uk,211.175.17.92,2017-11-27 23:33:42,true 71 | 70,Doll,Celez,dcelez1x@imdb.com,65.124.34.165,2017-01-03 06:40:44,true 72 | 71,Willy,Remnant,wremnant1y@nasa.gov,183.190.219.35,2017-05-27 11:05:47,false 73 | 72,Felicle,Scoterbosh,fscoterbosh1z@macromedia.com,12.103.23.230,2017-05-04 05:22:27,true 74 | 73,Egan,Ryland,eryland20@t.co,227.35.15.147,2016-06-09 11:50:39,true 75 | 74,Donny,Clissold,dclissold21@yellowpages.com,210.51.117.212,2016-03-08 22:48:18,true 76 | 75,Gwyneth,Brash,gbrash22@vistaprint.com,30.243.157.153,2016-01-23 17:11:17,true 77 | 76,Mervin,Woolis,mwoolis23@elegantthemes.com,52.171.162.135,2017-06-17 15:36:58,false 78 | 77,Alicea,Mewton,amewton24@com.com,236.53.172.152,2017-12-21 10:35:45,true 79 | 78,Whittaker,Biaggiotti,wbiaggiotti25@patch.com,85.219.77.207,2017-12-27 09:25:13,true 80 | 79,Teddie,Matevushev,tmatevushev26@nsw.gov.au,121.24.14.214,2017-12-09 16:30:35,false 81 | 80,Mal,Mc Gee,mmcgee27@eventbrite.com,85.138.92.81,2016-01-14 03:02:43,true 82 | 81,Teressa,Lourenco,tlourenco28@zdnet.com,33.2.78.199,2016-03-17 02:29:47,false 83 | 82,Willabella,Danelutti,wdanelutti29@ted.com,221.78.224.255,2016-03-06 14:34:53,true 84 | 83,Samaria,Hessle,shessle2a@surveymonkey.com,216.8.59.131,2017-03-30 11:02:45,true 85 | 84,Ruperto,Staite,rstaite2b@wikispaces.com,79.47.189.125,2017-08-23 22:09:19,true 86 | 85,Ashlee,Scotsbrook,ascotsbrook2c@trellian.com,91.104.127.195,2017-10-02 15:01:49,false 87 | 86,Godfry,Lawson,glawson2d@seesaa.net,241.54.44.84,2016-04-03 04:42:19,false 88 | 87,Rose,Lathleiffure,rlathleiffure2e@instagram.com,21.172.211.218,2016-05-11 04:37:44,true 89 | 88,Ricky,Helwig,rhelwig2f@sciencedirect.com,130.213.100.214,2017-12-02 11:58:19,true 90 | 89,Hersh,Castleman,hcastleman2g@mediafire.com,196.170.63.20,2016-11-06 15:18:34,false 91 | 90,Upton,Midghall,umidghall2h@wordpress.org,29.108.156.94,2017-03-24 03:48:22,true 92 | 91,Devi,Lowmass,dlowmass2i@parallels.com,243.189.157.74,2016-07-31 13:35:43,true 93 | 92,Cherry,Goldstein,cgoldstein2j@delicious.com,21.78.25.159,2016-06-02 22:19:13,true 94 | 93,Alfy,Yakubovics,ayakubovics2k@bigcartel.com,29.28.179.184,2016-10-13 08:03:28,true 95 | 94,Ambrosi,Martinyuk,amartinyuk2l@163.com,1.42.244.146,2016-01-24 23:02:40,false 96 | 95,Daniel,Duly,dduly2m@engadget.com,74.32.138.66,2017-03-26 09:02:19,false 97 | 96,Hazlett,Oakton,hoakton2n@booking.com,248.196.158.127,2016-11-01 10:55:45,true 98 | 97,Vivienne,Millery,vmillery2o@nyu.edu,99.21.99.255,2016-04-19 15:25:08,true 99 | 98,Glynda,Kupper,gkupper2p@yahoo.co.jp,89.13.224.249,2016-04-05 07:01:28,false 100 | 99,Pavlov,MacDwyer,pmacdwyer2q@netvibes.com,147.162.14.191,2018-02-10 06:57:45,true 101 | 100,Fonzie,Filip,ffilip2r@tripadvisor.com,244.178.118.180,2016-11-18 00:09:42,false 102 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/materializations/data_insert_by_period.csv: -------------------------------------------------------------------------------- 1 | id,created_at 2 | 1,2017-12-02 3 | 2,2018-01-02 4 | 3,2018-02-02 5 | 4,2018-03-02 6 | 5,2018-04-02 7 | 6,2018-05-02 8 | 7,2018-06-02 9 | 8,2018-07-02 10 | 9,2018-08-02 11 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_at_least_one.csv: -------------------------------------------------------------------------------- 1 | field 2 | a 3 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_equal_rowcount.csv: -------------------------------------------------------------------------------- 1 | field 2 | 1 3 | 1 4 | 2 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_expression_is_true.csv: -------------------------------------------------------------------------------- 1 | col_a,col_b 2 | 0,1 3 | 1,0 4 | 0.5,0.5 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_no_gaps.csv: -------------------------------------------------------------------------------- 1 | lower_bound,upper_bound 2 | 0,1 3 | 1,2 4 | 2,4 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps.csv: -------------------------------------------------------------------------------- 1 | subscription_id,valid_from,valid_to 2 | 1,2019-01-01,2019-02-01 3 | 1,2019-03-03,2019-04-01 4 | 2,2019-05-06,2019-07-02 5 | 2,2019-07-03, 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_not_constant.csv: -------------------------------------------------------------------------------- 1 | field 2 | 1 3 | 1 4 | 2 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_relationships_where_table_1.csv: -------------------------------------------------------------------------------- 1 | id 2 | 1 3 | 2 4 | 3 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_test_relationships_where_table_2.csv: -------------------------------------------------------------------------------- 1 | id 2 | 1 3 | 2 4 | 4 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/schema_tests/data_unique_combination_of_columns.csv: -------------------------------------------------------------------------------- 1 | month,product,revenue 2 | 2019-01-01,jaffle,500 3 | 2019-01-01,lamington,100 4 | 2019-01-01,pavlova,600 5 | 2019-02-01,jaffle,300 6 | 2019-02-01,lamington,300 7 | 2019-02-01,pavlova,400 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_events_20180101.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 1,play 3 | 2,pause 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_events_20180102.csv: -------------------------------------------------------------------------------- 1 | user_id,event 2 | 3,play 3 | 4,pause 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_generate_series.csv: -------------------------------------------------------------------------------- 1 | generated_number 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | 10 12 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_get_column_values.csv: -------------------------------------------------------------------------------- 1 | field 2 | a 3 | b 4 | c 5 | d 6 | e 7 | f 8 | g 9 | g 10 | g 11 | g 12 | g 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_get_query_results_as_dict.csv: -------------------------------------------------------------------------------- 1 | col_1,col_2,col_3 2 | 1,a,True 3 | 2,b,False 4 | 3,c, 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_nullcheck_table.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3 2 | a,'',1 3 | '',b,2 4 | '','',3 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_pivot.csv: -------------------------------------------------------------------------------- 1 | size,color 2 | S,red 3 | S,blue 4 | M,red 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_pivot_expected.csv: -------------------------------------------------------------------------------- 1 | size,red,blue 2 | S,1,1 3 | M,1,0 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_safe_add.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3,expected 2 | 1,2,3,6 3 | 1,,3,4 4 | ,,2,2 5 | ,,,0 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_star.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3 2 | a,b,c 3 | d,e,f 4 | g,h,i 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_star_expected.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2 2 | a,b 3 | d,e 4 | g,h 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_surrogate_key.csv: -------------------------------------------------------------------------------- 1 | field_1,field_2,field_3,expected 2 | a,b,c,7b193b3d33184464106f41ddf733783b 3 | a,,c,c5fd1b92380c6222ab0ef67839208624 4 | ,,c,267743defab4558f1940311b66274e26 5 | ,,,cfab1ba8c67c7c838db98d666f02a132 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_union_expected.csv: -------------------------------------------------------------------------------- 1 | id,name,favorite_color,favorite_number 2 | 1,"drew",,pi 3 | 2,"bob",,e 4 | 3,"alice",,4 5 | 1,,"green",7 6 | 2,,"pink",13 7 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_union_table_1.csv: -------------------------------------------------------------------------------- 1 | id,name,favorite_number 2 | 1,drew,pi 3 | 2,bob,e 4 | 3,alice,4 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_union_table_2.csv: -------------------------------------------------------------------------------- 1 | id,favorite_color,favorite_number 2 | 1,green,7 3 | 2,pink,13 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_unpivot.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,status,segment,name 2 | 123,2017-01-01,active,tier 1,name 1 3 | 234,2017-02-01,active,tier 3,name 3 4 | 567,2017-03-01,churned,tier 2,name 2 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_unpivot_expected.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,prop,val 2 | 123,"2017-01-01","segment","tier 1" 3 | 123,"2017-01-01","status","active" 4 | 123,"2017-01-01","name","name 1" 5 | 234,"2017-02-01","segment","tier 3" 6 | 234,"2017-02-01","status","active" 7 | 234,"2017-02-01","name","name 3" 8 | 567,"2017-03-01","status","churned" 9 | 567,"2017-03-01","name","name 2" 10 | 567,"2017-03-01","segment","tier 2" 11 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/sql/data_unpivot_original_api_expected.csv: -------------------------------------------------------------------------------- 1 | customer_id,created_at,field_name,value 2 | 123,2017-01-01,status,active 3 | 123,2017-01-01,segment,tier 1 4 | 234,2017-02-01,status,active 5 | 234,2017-02-01,segment,tier 3 6 | 567,2017-03-01,status,churned 7 | 567,2017-03-01,segment,tier 2 8 | 123,2017-01-01,name,name 1 9 | 234,2017-02-01,name,name 3 10 | 567,2017-03-01,name,name 2 -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/web/data_url_host.csv: -------------------------------------------------------------------------------- 1 | original_url,parsed_url 2 | www.google.co.uk?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer,www.google.co.uk 3 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/,witanddelight.com 4 | https://www.nytimes.com/2018/01/01/blog,www.nytimes.com -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/web/data_url_path.csv: -------------------------------------------------------------------------------- 1 | original_url,parsed_path 2 | www.google.co.uk?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer, 3 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/,2018/01/tips-tricks-how-run-half-marathon-first-time/ 4 | https://www.nytimes.com/2018/01/01/blog,2018/01/01/blog 5 | http://witanddelight.com/2018/01/tips-tricks-how-run-half-marathon-first-time/?utm_source=google&utm_medium=cpc&utm_campaign=spring-summer,2018/01/tips-tricks-how-run-half-marathon-first-time/ -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/data/web/data_urls.csv: -------------------------------------------------------------------------------- 1 | url,medium,source 2 | http://drewbanin.com/milky?utm_medium=organic,organic, 3 | http://drewbanin.com/milky?utm_medium=organic&utm_source=github,organic,github 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'dbt_utils_integration_tests' 3 | version: '1.0' 4 | 5 | profile: 'integration_tests' 6 | 7 | # require-dbt-version: inherit this from dbt-utils 8 | 9 | config-version: 2 10 | 11 | source-paths: ["models"] 12 | analysis-paths: ["analysis"] 13 | test-paths: ["tests"] 14 | data-paths: ["data"] 15 | macro-paths: ["macros"] 16 | 17 | target-path: "target" # directory which will store compiled SQL files 18 | clean-targets: # directories to be removed by `dbt clean` 19 | - "target" 20 | - "dbt_modules" 21 | 22 | seeds: 23 | 24 | +quote_columns: false 25 | dbt_utils_integration_tests: 26 | 27 | cross_db: 28 | data_date_trunc: 29 | +column_types: 30 | updated_at: timestamp 31 | day: date 32 | month: date 33 | 34 | data_dateadd: 35 | +column_types: 36 | from_time: timestamp 37 | result: timestamp 38 | 39 | data_datediff: 40 | +column_types: 41 | first_date: timestamp 42 | second_date: timestamp 43 | 44 | data_width_bucket: 45 | +column_types: 46 | num_buckets: integer 47 | min_value: float 48 | max_value: float 49 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/dbt_modules/dbt_utils/integration_tests/macros/.gitkeep -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/macros/tests.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro test_assert_equal(model, actual, expected) %} 3 | 4 | select count(*) from {{ model }} where {{ actual }} != {{ expected }} 5 | 6 | {% endmacro %} 7 | 8 | 9 | {% macro test_not_empty_string(model, arg) %} 10 | 11 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} 12 | 13 | select count(*) from {{ model }} where {{ column_name }} = '' 14 | 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_concat 5 | tests: 6 | - assert_equal: 7 | actual: actual 8 | expected: expected 9 | 10 | - name: test_current_timestamp 11 | tests: 12 | - assert_equal: 13 | actual: actual 14 | expected: expected 15 | 16 | - name: test_date_trunc 17 | tests: 18 | - assert_equal: 19 | actual: actual 20 | expected: expected 21 | 22 | - name: test_dateadd 23 | tests: 24 | - assert_equal: 25 | actual: actual 26 | expected: expected 27 | 28 | - name: test_datediff 29 | tests: 30 | - assert_equal: 31 | actual: actual 32 | expected: expected 33 | 34 | - name: test_hash 35 | tests: 36 | - assert_equal: 37 | actual: actual 38 | expected: expected 39 | 40 | - name: test_last_day 41 | tests: 42 | - assert_equal: 43 | actual: actual 44 | expected: expected 45 | 46 | - name: test_length 47 | tests: 48 | - assert_equal: 49 | actual: actual 50 | expected: expected 51 | 52 | - name: test_safe_cast 53 | tests: 54 | - assert_equal: 55 | actual: actual 56 | expected: expected 57 | 58 | - name: test_split_part 59 | tests: 60 | - assert_equal: 61 | actual: actual 62 | expected: expected 63 | 64 | - name: test_replace 65 | tests: 66 | - assert_equal: 67 | actual: actual 68 | expected: expected 69 | 70 | - name: test_right 71 | tests: 72 | - assert_equal: 73 | actual: actual 74 | expected: expected 75 | 76 | - name: test_position 77 | tests: 78 | - assert_equal: 79 | actual: actual 80 | expected: expected 81 | 82 | - name: test_width_bucket 83 | tests: 84 | - assert_equal: 85 | actual: actual 86 | expected: expected -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_concat.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_concat') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.concat(['input_1', 'input_2']) }} as actual, 10 | output as expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_current_timestamp.sql: -------------------------------------------------------------------------------- 1 | 2 | -- how can we test this better? 3 | select 4 | {{ dbt_utils.current_timestamp() }} as actual, 5 | {{ dbt_utils.current_timestamp() }} as expected 6 | 7 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_current_timestamp_in_utc.sql: -------------------------------------------------------------------------------- 1 | 2 | -- how can we test this better? 3 | select 4 | {{ dbt_utils.current_timestamp_in_utc() }} as actual, 5 | {{ dbt_utils.current_timestamp_in_utc() }} as expected -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_date_trunc.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_date_trunc') }} 5 | 6 | ) 7 | 8 | select 9 | cast({{dbt_utils.date_trunc('day', 'updated_at') }} as date) as actual, 10 | day as expected 11 | 12 | from data 13 | 14 | union all 15 | 16 | select 17 | cast({{ dbt_utils.date_trunc('month', 'updated_at') }} as date) as actual, 18 | month as expected 19 | 20 | from data 21 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_dateadd.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_dateadd') }} 5 | 6 | ) 7 | 8 | select 9 | case 10 | when datepart = 'hour' then cast({{ dbt_utils.dateadd('hour', 'interval_length', 'from_time') }} as {{dbt_utils.type_timestamp()}}) 11 | when datepart = 'day' then cast({{ dbt_utils.dateadd('day', 'interval_length', 'from_time') }} as {{dbt_utils.type_timestamp()}}) 12 | when datepart = 'month' then cast({{ dbt_utils.dateadd('month', 'interval_length', 'from_time') }} as {{dbt_utils.type_timestamp()}}) 13 | when datepart = 'year' then cast({{ dbt_utils.dateadd('year', 'interval_length', 'from_time') }} as {{dbt_utils.type_timestamp()}}) 14 | else null 15 | end as actual, 16 | result as expected 17 | 18 | from data 19 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_datediff.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_datediff') }} 5 | 6 | ) 7 | 8 | select 9 | 10 | case 11 | when datepart = 'second' then {{ dbt_utils.datediff('first_date', 'second_date', 'second') }} 12 | when datepart = 'minute' then {{ dbt_utils.datediff('first_date', 'second_date', 'minute') }} 13 | when datepart = 'hour' then {{ dbt_utils.datediff('first_date', 'second_date', 'hour') }} 14 | when datepart = 'day' then {{ dbt_utils.datediff('first_date', 'second_date', 'day') }} 15 | when datepart = 'week' then {{ dbt_utils.datediff('first_date', 'second_date', 'week') }} 16 | when datepart = 'month' then {{ dbt_utils.datediff('first_date', 'second_date', 'month') }} 17 | when datepart = 'year' then {{ dbt_utils.datediff('first_date', 'second_date', 'year') }} 18 | else null 19 | end as actual, 20 | result as expected 21 | 22 | from data 23 | 24 | -- Also test correct casting of literal values. 25 | 26 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "microsecond") }} as actual, 1 as expected 27 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "millisecond") }} as actual, 1 as expected 28 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "second") }} as actual, 1 as expected 29 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "minute") }} as actual, 1 as expected 30 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "hour") }} as actual, 1 as expected 31 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "day") }} as actual, 1 as expected 32 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-03 00:00:00.000000'", "week") }} as actual, 1 as expected 33 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "month") }} as actual, 1 as expected 34 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "quarter") }} as actual, 1 as expected 35 | union all select {{ dbt_utils.datediff("'1999-12-31 23:59:59.999999'", "'2000-01-01 00:00:00.000000'", "year") }} as actual, 1 as expected 36 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_hash.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_hash') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.hash('input_1') }} as actual, 10 | output as expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_last_day.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_last_day') }} 5 | 6 | ) 7 | 8 | select 9 | case 10 | when date_part = 'month' then {{ dbt_utils.last_day('date_day', 'month') }} 11 | when date_part = 'year' then {{ dbt_utils.last_day('date_day', 'year') }} 12 | else null 13 | end as actual, 14 | result as expected 15 | 16 | from data 17 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_length.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_length') }} 4 | 5 | ) 6 | 7 | select 8 | 9 | {{ dbt_utils.length('expression') }} as actual, 10 | output as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_position.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_position') }} 4 | 5 | ) 6 | 7 | select 8 | 9 | {{ dbt_utils.position('substring_text', 'string_text') }} as actual, 10 | result as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_replace.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select 4 | 5 | *, 6 | coalesce(search_chars, '') as old_chars, 7 | coalesce(replace_chars, '') as new_chars 8 | 9 | from {{ ref('data_replace') }} 10 | 11 | ) 12 | 13 | select 14 | 15 | {{ dbt_utils.replace('string_text', 'old_chars', 'new_chars') }} as actual, 16 | result as expected 17 | 18 | from data 19 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_right.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_right') }} 4 | 5 | ) 6 | 7 | select 8 | 9 | {{ dbt_utils.right('string_text', 'length_expression') }} as actual, 10 | coalesce(output, '') as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_safe_cast.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_safe_cast') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.safe_cast('field', dbt_utils.type_string()) }} as actual, 10 | output as expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_split_part.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_split_part') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.split_part('parts', 'split_on', 1) }} as actual, 10 | result_1 as expected 11 | 12 | from data 13 | 14 | union all 15 | 16 | select 17 | {{ dbt_utils.split_part('parts', 'split_on', 2) }} as actual, 18 | result_2 as expected 19 | 20 | from data 21 | 22 | union all 23 | 24 | select 25 | {{ dbt_utils.split_part('parts', 'split_on', 3) }} as actual, 26 | result_3 as expected 27 | 28 | from data 29 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/cross_db_utils/test_width_bucket.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_width_bucket') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.width_bucket('amount', 'min_value', 'max_value', 'num_buckets') }} as actual, 10 | bucket as expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/datetime/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_date_spine 5 | tests: 6 | - dbt_utils.equality: 7 | compare_model: ref('data_date_spine') 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/datetime/test_date_spine.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake doesn't like this as a view because the `generate_series` 3 | -- call creates a CTE called `unioned`, as does the `equality` schema test. 4 | -- Ideally, Snowflake would be smart enough to know that these CTE names are 5 | -- different, as they live in different relations. TODO: use a less common cte name 6 | 7 | {{ config(materialized='table') }} 8 | 9 | with date_spine as ( 10 | 11 | {% if target.type == 'postgres' %} 12 | {{ log("WARNING: Not testing - datediff macro is unsupported on Postgres", info=True) }} 13 | select * from {{ ref('data_date_spine') }} 14 | 15 | {% elif target.type == 'bigquery' %} 16 | select cast(date_day as date) as date_day 17 | from ({{ dbt_utils.date_spine("day", "'2018-01-01'", "'2018-01-10'") }}) 18 | 19 | {% else %} 20 | {{ dbt_utils.date_spine("day", "'2018-01-01'", "'2018-01-10'") }} 21 | {% endif %} 22 | 23 | ) 24 | 25 | select date_day 26 | from date_spine 27 | 28 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/materializations/expected_insert_by_period.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'view', 4 | enabled=(target.type == 'redshift') 5 | ) 6 | }} 7 | 8 | select * 9 | from {{ ref('data_insert_by_period') }} 10 | where id in (2, 3, 4, 5, 6) 11 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/materializations/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_insert_by_period 5 | tests: 6 | - dbt_utils.equality: 7 | compare_model: ref('expected_insert_by_period') 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/materializations/test_insert_by_period.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized = 'insert_by_period', 4 | period = 'month', 5 | timestamp_field = 'created_at', 6 | start_date = '2018-01-01', 7 | stop_date = '2018-06-01', 8 | enabled=(target.type == 'redshift') 9 | ) 10 | }} 11 | 12 | with events as ( 13 | select * 14 | from {{ ref('data_insert_by_period') }} 15 | where __PERIOD_FILTER__ 16 | ) 17 | 18 | select * from events 19 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/schema_tests/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: data_test_not_constant 5 | columns: 6 | - name: field 7 | tests: 8 | - dbt_utils.not_constant 9 | 10 | - name: data_test_at_least_one 11 | columns: 12 | - name: field 13 | tests: 14 | - dbt_utils.at_least_one 15 | 16 | - name: data_test_expression_is_true 17 | tests: 18 | - dbt_utils.expression_is_true: 19 | expression: col_a + col_b = 1 20 | - dbt_utils.expression_is_true: 21 | expression: col_a = 0.5 22 | condition: col_b = 0.5 23 | 24 | - name: test_recency 25 | tests: 26 | - dbt_utils.recency: 27 | datepart: day 28 | field: today 29 | interval: 1 30 | 31 | - name: test_equal_rowcount 32 | tests: 33 | - dbt_utils.equal_rowcount: 34 | compare_model: ref('test_equal_rowcount') 35 | 36 | - name: test_equal_column_subset 37 | tests: 38 | - dbt_utils.equality: 39 | compare_model: ref('data_people') 40 | compare_columns: 41 | - first_name 42 | - last_name 43 | - email 44 | 45 | - name: data_people 46 | columns: 47 | - name: is_active 48 | tests: 49 | - dbt_utils.cardinality_equality: 50 | field: is_active 51 | to: ref('data_people') 52 | 53 | - name: data_test_relationships_where_table_2 54 | columns: 55 | - name: id 56 | tests: 57 | - dbt_utils.relationships_where: 58 | from: id 59 | to: ref('data_test_relationships_where_table_1') 60 | field: id 61 | from_condition: id <> 4 62 | 63 | - name: data_test_mutually_exclusive_ranges_no_gaps 64 | tests: 65 | - dbt_utils.mutually_exclusive_ranges: 66 | lower_bound_column: lower_bound 67 | upper_bound_column: upper_bound 68 | gaps: not_allowed 69 | 70 | - name: data_test_mutually_exclusive_ranges_with_gaps 71 | tests: 72 | - dbt_utils.mutually_exclusive_ranges: 73 | lower_bound_column: valid_from 74 | upper_bound_column: coalesce(valid_to, '2099-01-01') 75 | partition_by: subscription_id 76 | gaps: allowed 77 | 78 | - dbt_utils.mutually_exclusive_ranges: 79 | lower_bound_column: valid_from 80 | upper_bound_column: coalesce(valid_to, '2099-01-01') 81 | partition_by: subscription_id 82 | gaps: required 83 | 84 | - name: data_unique_combination_of_columns 85 | tests: 86 | - dbt_utils.unique_combination_of_columns: 87 | combination_of_columns: 88 | - month 89 | - product 90 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/schema_tests/test_equal_column_subset.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='ephemeral') }} 2 | 3 | select 4 | 5 | first_name, 6 | last_name, 7 | email 8 | 9 | from {{ ref('data_people') }} 10 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/schema_tests/test_equal_rowcount.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ ref('data_test_equal_rowcount') }} 4 | 5 | ) 6 | 7 | select 8 | field 9 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/schema_tests/test_recency.sql: -------------------------------------------------------------------------------- 1 | 2 | {% if target.type == 'postgres' %} 3 | 4 | select 5 | {{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as today 6 | 7 | {% else %} 8 | 9 | select 10 | cast({{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as datetime) as today 11 | 12 | {% endif %} 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_generate_series 5 | tests: 6 | - dbt_utils.equality: 7 | arg: ref('data_generate_series') 8 | 9 | - name: test_get_column_values 10 | columns: 11 | - name: count_a 12 | tests: 13 | - accepted_values: 14 | values: 15 | - '1' 16 | 17 | - name: count_b 18 | tests: 19 | - accepted_values: 20 | values: 21 | - '1' 22 | 23 | - name: count_c 24 | tests: 25 | - accepted_values: 26 | values: 27 | - '1' 28 | 29 | - name: count_d 30 | tests: 31 | - accepted_values: 32 | values: 33 | - '1' 34 | 35 | - name: count_e 36 | tests: 37 | - accepted_values: 38 | values: 39 | - '1' 40 | 41 | - name: count_f 42 | tests: 43 | - accepted_values: 44 | values: 45 | - '1' 46 | 47 | - name: count_g 48 | tests: 49 | - accepted_values: 50 | values: 51 | - '5' 52 | 53 | - name: test_get_tables_by_prefix_and_union 54 | columns: 55 | - name: event 56 | tests: 57 | - not_null 58 | - name: user_id 59 | tests: 60 | - dbt_utils.at_least_one 61 | - not_null 62 | - unique 63 | 64 | - name: test_nullcheck_table 65 | columns: 66 | - name: field_1 67 | tests: 68 | - not_empty_string 69 | 70 | - name: field_2 71 | tests: 72 | - not_empty_string 73 | 74 | - name: field_3 75 | tests: 76 | - not_empty_string 77 | 78 | - name: test_safe_add 79 | tests: 80 | - assert_equal: 81 | actual: actual 82 | expected: expected 83 | 84 | - name: test_pivot 85 | tests: 86 | - dbt_utils.equality: 87 | compare_model: ref('data_pivot_expected') 88 | 89 | - name: test_unpivot_original_api 90 | tests: 91 | - dbt_utils.equality: 92 | compare_model: ref('data_unpivot_original_api_expected') 93 | 94 | - name: test_unpivot 95 | tests: 96 | - dbt_utils.equality: 97 | compare_model: ref('data_unpivot_expected') 98 | 99 | - name: test_star 100 | tests: 101 | - dbt_utils.equality: 102 | compare_model: ref('data_star_expected') 103 | 104 | - name: test_surrogate_key 105 | tests: 106 | - assert_equal: 107 | actual: actual_arguments 108 | expected: expected 109 | - assert_equal: 110 | actual: actual_list 111 | expected: expected 112 | 113 | - name: test_union 114 | tests: 115 | - dbt_utils.equality: 116 | compare_model: ref('data_union_expected') 117 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_generate_series.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake doesn't like this as a view because the `generate_series` 3 | -- call creates a CTE called `unioned`, as does the `equality` schema test. 4 | -- Ideally, Snowflake would be smart enough to know that these CTE names are 5 | -- different, as they live in different relations. TODO: use a less common cte name 6 | 7 | {{ config(materialized='table') }} 8 | 9 | with data as ( 10 | 11 | {{ dbt_utils.generate_series(10) }} 12 | 13 | ) 14 | 15 | select generated_number from data 16 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_get_column_values.sql: -------------------------------------------------------------------------------- 1 | 2 | {% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = []) %} 3 | 4 | 5 | {% if target.type == 'snowflake' %} 6 | 7 | select 8 | {% for column in columns -%} 9 | 10 | sum(case when field = '{{ column }}' then 1 else 0 end) as count_{{ column }} 11 | {%- if not loop.last %},{% endif -%} 12 | 13 | {%- endfor %} 14 | 15 | from {{ ref('data_get_column_values') }} 16 | 17 | {% else %} 18 | 19 | select 20 | {% for column in columns -%} 21 | 22 | {{dbt_utils.safe_cast("sum(case when field = '" ~ column ~ "' then 1 else 0 end)", dbt_utils.type_string()) }} as count_{{ column }} 23 | {%- if not loop.last %},{% endif -%} 24 | 25 | {%- endfor %} 26 | 27 | from {{ ref('data_get_column_values') }} 28 | 29 | {% endif %} 30 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_get_relations_by_prefix_and_union.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized = 'table') }} 2 | 3 | {% set relations = dbt_utils.get_relations_by_prefix(target.schema, 'data_events_') %} 4 | {{ dbt_utils.union_relations(relations) }} 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql: -------------------------------------------------------------------------------- 1 | {{config( materialized = 'table')}} 2 | 3 | {% set tables = dbt_utils.get_tables_by_prefix(target.schema, 'data_events_') %} 4 | {{ dbt_utils.union_tables(tables) }} 5 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_groupby.sql: -------------------------------------------------------------------------------- 1 | with test_data as ( 2 | 3 | select 4 | 5 | {{ dbt_utils.safe_cast("'a'", dbt_utils.type_string() )}} as column_1, 6 | {{ dbt_utils.safe_cast("'b'", dbt_utils.type_string() )}} as column_2 7 | 8 | ), 9 | 10 | grouped as ( 11 | 12 | select 13 | *, 14 | count(*) as total 15 | 16 | from test_data 17 | {{ dbt_utils.group_by(2) }} 18 | 19 | ) 20 | 21 | select * from grouped 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_nullcheck_table.sql: -------------------------------------------------------------------------------- 1 | {{ config( materialized = "table" ) }} 2 | 3 | -- TO DO: remove if-statement 4 | 5 | {% set tbl = ref('data_nullcheck_table') %} 6 | 7 | 8 | with nulled as ( 9 | 10 | {{ dbt_utils.nullcheck_table(tbl) }} 11 | 12 | ) 13 | 14 | {% if target.type == 'snowflake' %} 15 | 16 | select 17 | field_1::varchar as field_1, 18 | field_2::varchar as field_2, 19 | field_3::varchar as field_3 20 | 21 | from nulled 22 | 23 | {% else %} 24 | 25 | select 26 | 27 | {{ dbt_utils.safe_cast('field_1', 28 | dbt_utils.type_string() 29 | )}} as field_1, 30 | 31 | {{ dbt_utils.safe_cast('field_2', 32 | dbt_utils.type_string() 33 | )}} as field_2, 34 | 35 | {{ dbt_utils.safe_cast('field_3', 36 | dbt_utils.type_string() 37 | )}} as field_3 38 | 39 | from nulled 40 | 41 | {% endif %} 42 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_pivot.sql: -------------------------------------------------------------------------------- 1 | 2 | -- TODO: How do we make this work nicely on Snowflake too? 3 | 4 | {% if target.type == 'snowflake' %} 5 | {% set column_values = ['RED', 'BLUE'] %} 6 | {% set cmp = 'ilike' %} 7 | {% else %} 8 | {% set column_values = ['red', 'blue'] %} 9 | {% set cmp = '=' %} 10 | {% endif %} 11 | 12 | select 13 | size, 14 | {{ dbt_utils.pivot('color', column_values, cmp=cmp) }} 15 | 16 | from {{ ref('data_pivot') }} 17 | group by size 18 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_safe_add.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_safe_add') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.safe_add('field_1', 'field_2', 'field_3') }} as actual, 10 | expected 11 | 12 | from data 13 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_star.sql: -------------------------------------------------------------------------------- 1 | 2 | -- TODO : Should the star macro use a case-insensitive comparison for the `except` field on Snowflake? 3 | 4 | {% set exclude_field = 'FIELD_3' if target.type == 'snowflake' else 'field_3' %} 5 | 6 | 7 | with data as ( 8 | 9 | select 10 | {{ dbt_utils.star(from=ref('data_star'), except=[exclude_field]) }} 11 | 12 | from {{ ref('data_star') }} 13 | 14 | ) 15 | 16 | select * from data 17 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_surrogate_key.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_surrogate_key') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.surrogate_key('field_1', 'field_2', 'field_3') }} as actual_arguments, 10 | {{ dbt_utils.surrogate_key(['field_1', 'field_2', 'field_3']) }} as actual_list, 11 | expected 12 | 13 | from data 14 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_union.sql: -------------------------------------------------------------------------------- 1 | 2 | select 3 | id, 4 | name, 5 | favorite_color 6 | 7 | from {{ ref('test_union_base') }} 8 | 9 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_union_base.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ dbt_utils.union_relations([ 3 | ref('data_union_table_1'), 4 | ref('data_union_table_2')] 5 | ) }} 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_unpivot.sql: -------------------------------------------------------------------------------- 1 | 2 | -- snowflake messes with these tests pretty badly since the 3 | -- output of the macro considers the casing of the source 4 | -- table columns. Using some hacks here to get this to work, 5 | -- but we should consider lowercasing the unpivot macro output 6 | -- at some point in the future for consistency 7 | 8 | {% if target.name == 'snowflake' %} 9 | {% set exclude = ['CUSTOMER_ID', 'CREATED_AT'] %} 10 | {% else %} 11 | {% set exclude = ['customer_id', 'created_at'] %} 12 | {% endif %} 13 | 14 | 15 | select 16 | customer_id, 17 | created_at, 18 | case 19 | when '{{ target.name }}' = 'snowflake' then lower(prop) 20 | else prop 21 | end as prop, 22 | val 23 | 24 | from ( 25 | {{ dbt_utils.unpivot( 26 | relation=ref('data_unpivot'), 27 | cast_to=dbt_utils.type_string(), 28 | exclude=exclude, 29 | remove='name', 30 | field_name='prop', 31 | value_name='val' 32 | ) }} 33 | ) as sbq 34 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/sql/test_unpivot_original_api.sql: -------------------------------------------------------------------------------- 1 | 2 | -- unpivot() was enhanced with 3 new parameters 3 | -- This test targets the original API. 4 | 5 | -- snowflake messes with these tests pretty badly since the 6 | -- output of the macro considers the casing of the source 7 | -- table columns. Using some hacks here to get this to work, 8 | -- but we should consider lowercasing the unpivot macro output 9 | -- at some point in the future for consistency 10 | 11 | {% if target.name == 'snowflake' %} 12 | {% set exclude = ['CUSTOMER_ID', 'CREATED_AT'] %} 13 | {% else %} 14 | {% set exclude = ['customer_id', 'created_at'] %} 15 | {% endif %} 16 | 17 | select 18 | customer_id, 19 | created_at, 20 | case 21 | when '{{ target.name }}' = 'snowflake' then lower(FIELD_NAME) 22 | else field_name 23 | end as field_name, 24 | value 25 | 26 | from ( 27 | {{ dbt_utils.unpivot( 28 | table=ref('data_unpivot'), 29 | cast_to=dbt_utils.type_string(), 30 | exclude=exclude 31 | ) }} 32 | ) as sbq 33 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/web/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: test_urls 5 | tests: 6 | - assert_equal: 7 | actual: actual 8 | expected: expected 9 | 10 | - name: test_url_host 11 | tests: 12 | - assert_equal: 13 | actual: actual 14 | expected: expected 15 | 16 | - name: test_url_path 17 | tests: 18 | - assert_equal: 19 | actual: actual 20 | expected: expected -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/web/test_url_host.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ref('data_url_host')}} 4 | 5 | ) 6 | 7 | select 8 | 9 | {{ dbt_utils.get_url_host('original_url') }} as actual, 10 | parsed_url as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/web/test_url_path.sql: -------------------------------------------------------------------------------- 1 | with data as ( 2 | 3 | select * from {{ref('data_url_path')}} 4 | 5 | ) 6 | 7 | select 8 | 9 | coalesce({{ dbt_utils.get_url_path('original_url') }}, '') as actual, 10 | coalesce(parsed_path, '') as expected 11 | 12 | from data -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/models/web/test_urls.sql: -------------------------------------------------------------------------------- 1 | 2 | with data as ( 3 | 4 | select * from {{ ref('data_urls') }} 5 | 6 | ) 7 | 8 | select 9 | {{ dbt_utils.get_url_parameter('url', 'utm_medium') }} as actual, 10 | medium as expected 11 | 12 | from data 13 | 14 | union all 15 | 16 | select 17 | {{ dbt_utils.get_url_parameter('url', 'utm_source') }} as actual, 18 | source as expected 19 | 20 | from data 21 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | 2 | packages: 3 | - local: ../ 4 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/tests/assert_get_query_results_as_dict_objects_equal.sql: -------------------------------------------------------------------------------- 1 | -- depends_on: {{ ref('data_get_query_results_as_dict') }} 2 | 3 | {% set expected_dictionary={ 4 | 'col_1': [1, 2, 3], 5 | 'col_2': ['a', 'b', 'c'], 6 | 'col_3': [True, False, none] 7 | } %} 8 | 9 | {#- Handle snowflake casing silliness -#} 10 | {% if target.type == 'snowflake' %} 11 | {% set expected_dictionary={ 12 | 'COL_1': [1, 2, 3], 13 | 'COL_2': ['a', 'b', 'c'], 14 | 'COL_3': [True, False, none] 15 | } %} 16 | {% endif %} 17 | 18 | 19 | {% set actual_dictionary=dbt_utils.get_query_results_as_dict( 20 | "select * from " ~ ref('data_get_query_results_as_dict') 21 | ) %} 22 | {#- 23 | For reasons that remain unclear, Jinja won't return True for actual_dictionary == expected_dictionary. 24 | Instead, we'll manually check that the values of these dictionaries are equivalent. 25 | -#} 26 | 27 | {% set ns = namespace( 28 | pass=True, 29 | err_msg = "" 30 | ) %} 31 | {% if execute %} 32 | {#- Check that the dictionaries have the same keys -#} 33 | {% set expected_keys=expected_dictionary.keys() | list | sort %} 34 | {% set actual_keys=actual_dictionary.keys() | list | sort %} 35 | 36 | {% if expected_keys != actual_keys %} 37 | {% set ns.pass=False %} 38 | {% set ns.err_msg %} 39 | The two dictionaries have different keys: 40 | expected_dictionary has keys: {{ expected_keys }} 41 | actual_dictionary has keys: {{ actual_keys }} 42 | {% endset %} 43 | 44 | {% else %} 45 | 46 | {% for key, value in expected_dictionary.items() %} 47 | {% set expected_length=expected_dictionary[key] | length %} 48 | {% set actual_length=actual_dictionary[key] | length %} 49 | 50 | {% if expected_length != actual_length %} 51 | {% set ns.pass=False %} 52 | {% set ns.err_msg %} 53 | The {{ key }} column has different lengths: 54 | expected_dictionary[{{ key }}] has length {{ expected_length }} 55 | actual_dictionary[{{ key }}] has length {{ actual_length }} 56 | {% endset %} 57 | 58 | {% else %} 59 | 60 | {% for i in range(value | length) %} 61 | {% set expected_value=expected_dictionary[key][i] %} 62 | {% set actual_value=actual_dictionary[key][i] %} 63 | {% if expected_value != actual_value %} 64 | {% set ns.pass=False %} 65 | {% set ns.err_msg %} 66 | The {{ key }} column has differing values: 67 | expected_dictionary[{{ key }}][{{ i }}] == {{ expected_value }} 68 | actual_dictionary[{{ key }}][{{ i }}] == {{ actual_value }} 69 | {% endset %} 70 | 71 | {% endif %} 72 | {% endfor %} 73 | {% endif %} 74 | 75 | {% endfor %} 76 | 77 | {% endif %} 78 | 79 | {{ log(ns.err_msg, info=True) }} 80 | select 1 {% if ns.pass %} limit 0 {% endif %} 81 | {% endif %} 82 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/tests/logger/assert_pretty_output_msg_is_string.sql: -------------------------------------------------------------------------------- 1 | {% if dbt_utils.pretty_log_format() is string %} 2 | {# Return 0 rows for the test to pass #} 3 | select 1 limit 0 4 | {% else %} 5 | {# Return >0 rows for the test to fail #} 6 | select 1 7 | {% endif %} 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/integration_tests/tests/logger/assert_pretty_time_is_string.sql: -------------------------------------------------------------------------------- 1 | {% if dbt_utils.pretty_time() is string %} 2 | {# Return 0 rows for the test to pass #} 3 | select 1 limit 0 4 | {% else %} 5 | {# Return >0 rows for the test to fail #} 6 | select 1 7 | {% endif %} 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/_is_relation.sql: -------------------------------------------------------------------------------- 1 | {% macro _is_relation(obj, macro) %} 2 | {%- if not (obj is mapping and obj.get('metadata', {}).get('type', '').endswith('Relation')) -%} 3 | {%- do exceptions.raise_compiler_error("Macro " ~ macro ~ " expected a Relation but received the value: " ~ obj) -%} 4 | {%- endif -%} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/concat.sql: -------------------------------------------------------------------------------- 1 | {% macro concat(fields) -%} 2 | {{ adapter_macro('dbt_utils.concat', fields) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__concat(fields) -%} 7 | concat({{ fields|join(', ') }}) 8 | {%- endmacro %} 9 | 10 | 11 | {% macro alternative_concat(fields) %} 12 | {{ fields|join(' || ') }} 13 | {% endmacro %} 14 | 15 | 16 | {% macro redshift__concat(fields) %} 17 | {{dbt_utils.alternative_concat(fields)}} 18 | {% endmacro %} 19 | 20 | 21 | {% macro snowflake__concat(fields) %} 22 | {{dbt_utils.alternative_concat(fields)}} 23 | {% endmacro %} 24 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/current_timestamp.sql: -------------------------------------------------------------------------------- 1 | {% macro current_timestamp() -%} 2 | {{ adapter_macro('dbt_utils.current_timestamp') }} 3 | {%- endmacro %} 4 | 5 | {% macro default__current_timestamp() %} 6 | current_timestamp::{{dbt_utils.type_timestamp()}} 7 | {% endmacro %} 8 | 9 | {% macro redshift__current_timestamp() %} 10 | getdate() 11 | {% endmacro %} 12 | 13 | {% macro bigquery__current_timestamp() %} 14 | current_timestamp 15 | {% endmacro %} 16 | 17 | 18 | 19 | {% macro current_timestamp_in_utc() -%} 20 | {{ adapter_macro('dbt_utils.current_timestamp_in_utc') }} 21 | {%- endmacro %} 22 | 23 | {% macro default__current_timestamp_in_utc() %} 24 | {{dbt_utils.current_timestamp()}} 25 | {% endmacro %} 26 | 27 | {% macro snowflake__current_timestamp_in_utc() %} 28 | convert_timezone('UTC', {{dbt_utils.current_timestamp()}})::{{dbt_utils.type_timestamp()}} 29 | {% endmacro %} 30 | 31 | {% macro postgres__current_timestamp_in_utc() %} 32 | (current_timestamp at time zone 'utc')::{{dbt_utils.type_timestamp()}} 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/datatypes.sql: -------------------------------------------------------------------------------- 1 | {# string ------------------------------------------------- #} 2 | 3 | {%- macro type_string() -%} 4 | {{ adapter_macro('dbt_utils.type_string') }} 5 | {%- endmacro -%} 6 | 7 | {% macro default__type_string() %} 8 | string 9 | {% endmacro %} 10 | 11 | {%- macro redshift__type_string() -%} 12 | varchar 13 | {%- endmacro -%} 14 | 15 | {% macro postgres__type_string() %} 16 | varchar 17 | {% endmacro %} 18 | 19 | {% macro snowflake__type_string() %} 20 | varchar 21 | {% endmacro %} 22 | 23 | 24 | 25 | {# timestamp ------------------------------------------------- #} 26 | 27 | {%- macro type_timestamp() -%} 28 | {{ adapter_macro('dbt_utils.type_timestamp') }} 29 | {%- endmacro -%} 30 | 31 | {% macro default__type_timestamp() %} 32 | timestamp 33 | {% endmacro %} 34 | 35 | {% macro snowflake__type_timestamp() %} 36 | timestamp_ntz 37 | {% endmacro %} 38 | 39 | 40 | {# float ------------------------------------------------- #} 41 | 42 | {%- macro type_float() -%} 43 | {{ adapter_macro('dbt_utils.type_float') }} 44 | {%- endmacro -%} 45 | 46 | {% macro default__type_float() %} 47 | float 48 | {% endmacro %} 49 | 50 | {% macro bigquery__type_float() %} 51 | float64 52 | {% endmacro %} 53 | 54 | {# numeric ------------------------------------------------ #} 55 | 56 | {%- macro type_numeric() -%} 57 | {{ adapter_macro('dbt_utils.type_numeric') }} 58 | {%- endmacro -%} 59 | 60 | {% macro default__type_numeric() %} 61 | numeric(28, 6) 62 | {% endmacro %} 63 | 64 | {% macro bigquery__type_numeric() %} 65 | numeric 66 | {% endmacro %} 67 | 68 | 69 | {# bigint ------------------------------------------------- #} 70 | 71 | {%- macro type_bigint() -%} 72 | {{ adapter_macro('dbt_utils.type_bigint') }} 73 | {%- endmacro -%} 74 | 75 | {% macro default__type_bigint() %} 76 | bigint 77 | {% endmacro %} 78 | 79 | {% macro bigquery__type_bigint() %} 80 | int64 81 | {% endmacro %} 82 | 83 | {# int ------------------------------------------------- #} 84 | 85 | {%- macro type_int() -%} 86 | {{ adapter_macro('dbt_utils.type_int') }} 87 | {%- endmacro -%} 88 | 89 | {% macro default__type_int() %} 90 | int 91 | {% endmacro %} 92 | 93 | {% macro bigquery__type_int() %} 94 | int64 95 | {% endmacro %} 96 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/date_trunc.sql: -------------------------------------------------------------------------------- 1 | {% macro date_trunc(datepart, date) -%} 2 | {{ adapter_macro('dbt_utils.date_trunc', datepart, date) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__date_trunc(datepart, date) %} 6 | date_trunc('{{datepart}}', {{date}}) 7 | {% endmacro %} 8 | 9 | {% macro bigquery__date_trunc(datepart, date) %} 10 | timestamp_trunc( 11 | cast({{date}} as timestamp), 12 | {{datepart}} 13 | ) 14 | 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/dateadd.sql: -------------------------------------------------------------------------------- 1 | {% macro dateadd(datepart, interval, from_date_or_timestamp) %} 2 | {{ adapter_macro('dbt_utils.dateadd', datepart, interval, from_date_or_timestamp) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__dateadd(datepart, interval, from_date_or_timestamp) %} 7 | 8 | dateadd( 9 | {{ datepart }}, 10 | {{ interval }}, 11 | {{ from_date_or_timestamp }} 12 | ) 13 | 14 | {% endmacro %} 15 | 16 | 17 | {% macro bigquery__dateadd(datepart, interval, from_date_or_timestamp) %} 18 | 19 | datetime_add( 20 | cast( {{ from_date_or_timestamp }} as datetime), 21 | interval {{ interval }} {{ datepart }} 22 | ) 23 | 24 | {% endmacro %} 25 | 26 | 27 | {% macro postgres__dateadd(datepart, interval, from_date_or_timestamp) %} 28 | 29 | {{ from_date_or_timestamp }} + ((interval '1 {{ datepart }}') * ({{ interval }})) 30 | 31 | {% endmacro %} 32 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/datediff.sql: -------------------------------------------------------------------------------- 1 | {% macro datediff(first_date, second_date, datepart) %} 2 | {{ adapter_macro('dbt_utils.datediff', first_date, second_date, datepart) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__datediff(first_date, second_date, datepart) %} 7 | 8 | datediff( 9 | {{ datepart }}, 10 | {{ first_date }}, 11 | {{ second_date }} 12 | ) 13 | 14 | {% endmacro %} 15 | 16 | 17 | {% macro bigquery__datediff(first_date, second_date, datepart) %} 18 | 19 | datetime_diff( 20 | cast({{second_date}} as datetime), 21 | cast({{first_date}} as datetime), 22 | {{datepart}} 23 | ) 24 | 25 | {% endmacro %} 26 | 27 | 28 | {% macro postgres__datediff(first_date, second_date, datepart) %} 29 | 30 | {% if datepart == 'year' %} 31 | (date_part('year', ({{second_date}})::date) - date_part('year', ({{first_date}})::date)) 32 | {% elif datepart == 'quarter' %} 33 | ({{ dbt_utils.datediff(first_date, second_date, 'year') }} * 4 + date_part('quarter', ({{second_date}})::date) - date_part('quarter', ({{first_date}})::date)) 34 | {% elif datepart == 'month' %} 35 | ({{ dbt_utils.datediff(first_date, second_date, 'year') }} * 12 + date_part('month', ({{second_date}})::date) - date_part('month', ({{first_date}})::date)) 36 | {% elif datepart == 'day' %} 37 | (({{second_date}})::date - ({{first_date}})::date) 38 | {% elif datepart == 'week' %} 39 | ({{ dbt_utils.datediff(first_date, second_date, 'day') }} / 7 + case 40 | when date_part('dow', ({{first_date}})::timestamp) <= date_part('dow', ({{second_date}})::timestamp) then 41 | case when {{first_date}} <= {{second_date}} then 0 else -1 end 42 | else 43 | case when {{first_date}} <= {{second_date}} then 1 else 0 end 44 | end) 45 | {% elif datepart == 'hour' %} 46 | ({{ dbt_utils.datediff(first_date, second_date, 'day') }} * 24 + date_part('hour', ({{second_date}})::timestamp) - date_part('hour', ({{first_date}})::timestamp)) 47 | {% elif datepart == 'minute' %} 48 | ({{ dbt_utils.datediff(first_date, second_date, 'hour') }} * 60 + date_part('minute', ({{second_date}})::timestamp) - date_part('minute', ({{first_date}})::timestamp)) 49 | {% elif datepart == 'second' %} 50 | ({{ dbt_utils.datediff(first_date, second_date, 'minute') }} * 60 + floor(date_part('second', ({{second_date}})::timestamp)) - floor(date_part('second', ({{first_date}})::timestamp))) 51 | {% elif datepart == 'millisecond' %} 52 | ({{ dbt_utils.datediff(first_date, second_date, 'minute') }} * 60000 + floor(date_part('millisecond', ({{second_date}})::timestamp)) - floor(date_part('millisecond', ({{first_date}})::timestamp))) 53 | {% elif datepart == 'microsecond' %} 54 | ({{ dbt_utils.datediff(first_date, second_date, 'minute') }} * 60000000 + floor(date_part('microsecond', ({{second_date}})::timestamp)) - floor(date_part('microsecond', ({{first_date}})::timestamp))) 55 | {% else %} 56 | {{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in postgres: {!r}".format(datepart)) }} 57 | {% endif %} 58 | 59 | {% endmacro %} 60 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/except.sql: -------------------------------------------------------------------------------- 1 | {% macro except() %} 2 | {{ adapter_macro('dbt_utils.except') }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__except() %} 7 | 8 | except 9 | 10 | {% endmacro %} 11 | 12 | {% macro bigquery__except() %} 13 | 14 | except distinct 15 | 16 | {% endmacro %} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/hash.sql: -------------------------------------------------------------------------------- 1 | {% macro hash(field) -%} 2 | {{ adapter_macro('dbt_utils.hash', field) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__hash(field) -%} 7 | md5(cast({{field}} as {{dbt_utils.type_string()}})) 8 | {%- endmacro %} 9 | 10 | 11 | {% macro bigquery__hash(field) -%} 12 | to_hex({{dbt_utils.default__hash(field)}}) 13 | {%- endmacro %} 14 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/identifier.sql: -------------------------------------------------------------------------------- 1 | {% macro identifier(value) %} 2 | {% do exceptions.warn("Warning: the `identifier` macro is no longer supported and will be deprecated in a future release of dbt-utils. Use `adapter.quote` instead") %} 3 | {{ adapter_macro('dbt_utils.identifier', value) }} 4 | {% endmacro %} 5 | 6 | {% macro default__identifier(value) -%} 7 | "{{ value }}" 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__identifier(value) -%} 11 | `{{ value }}` 12 | {%- endmacro %} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/intersect.sql: -------------------------------------------------------------------------------- 1 | {% macro intersect() %} 2 | {{ adapter_macro('dbt_utils.intersect') }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__intersect() %} 7 | 8 | intersect 9 | 10 | {% endmacro %} 11 | 12 | {% macro bigquery__intersect() %} 13 | 14 | intersect distinct 15 | 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/last_day.sql: -------------------------------------------------------------------------------- 1 | /* 2 | This function has been tested with dateparts of month and quarters. Further 3 | testing is required to validate that it will work on other dateparts. 4 | */ 5 | 6 | {% macro last_day(date, datepart) %} 7 | {{ adapter_macro('dbt_utils.last_day', date, datepart) }} 8 | {% endmacro %} 9 | 10 | 11 | {%- macro default_last_day(date, datepart) -%} 12 | cast( 13 | {{dbt_utils.dateadd('day', '-1', 14 | dbt_utils.dateadd(datepart, '1', dbt_utils.date_trunc(datepart, date)) 15 | )}} 16 | as date) 17 | {%- endmacro -%} 18 | 19 | 20 | {% macro default__last_day(date, datepart) -%} 21 | {{dbt_utils.default_last_day(date, datepart)}} 22 | {%- endmacro %} 23 | 24 | 25 | {% macro postgres__last_day(date, datepart) -%} 26 | 27 | {%- if datepart == 'quarter' -%} 28 | {{ exceptions.raise_compiler_error( 29 | "dbt_utils.last_day is not supported for datepart 'quarter' on this adapter") }} 30 | {%- else -%} 31 | {{dbt_utils.default_last_day(date, datepart)}} 32 | {%- endif -%} 33 | 34 | {%- endmacro %} 35 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/length.sql: -------------------------------------------------------------------------------- 1 | {% macro length(expression) -%} 2 | {{ adapter_macro('dbt_utils.length', expression) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__length(expression) %} 7 | 8 | length( 9 | {{ expression }} 10 | ) 11 | 12 | {%- endmacro -%} 13 | 14 | 15 | {% macro redshift__length(expression) %} 16 | 17 | len( 18 | {{ expression }} 19 | ) 20 | 21 | {%- endmacro -%} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/literal.sql: -------------------------------------------------------------------------------- 1 | 2 | {%- macro string_literal(value) -%} 3 | {{ adapter_macro('dbt_utils.string_literal', value) }} 4 | {%- endmacro -%} 5 | 6 | {% macro default__string_literal(value) -%} 7 | '{{ value }}' 8 | {%- endmacro %} 9 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/position.sql: -------------------------------------------------------------------------------- 1 | {% macro position(substring_text, string_text) -%} 2 | {{ adapter_macro('dbt_utils.position', substring_text, string_text) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__position(substring_text, string_text) %} 7 | 8 | position( 9 | {{ substring_text }} in {{ string_text }} 10 | ) 11 | 12 | {%- endmacro -%} 13 | 14 | {% macro bigquery__position(substring_text, string_text) %} 15 | 16 | strpos( 17 | {{ string_text }}, 18 | {{ substring_text }} 19 | 20 | ) 21 | 22 | {%- endmacro -%} 23 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/replace.sql: -------------------------------------------------------------------------------- 1 | {% macro replace(field, old_chars, new_chars) -%} 2 | {{ adapter_macro('dbt_utils.replace', field, old_chars, new_chars) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__replace(field, old_chars, new_chars) %} 7 | 8 | replace( 9 | {{ field }}, 10 | {{ old_chars }}, 11 | {{ new_chars }} 12 | ) 13 | 14 | 15 | {% endmacro %} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/right.sql: -------------------------------------------------------------------------------- 1 | {% macro right(string_text, length_expression) -%} 2 | {{ adapter_macro('dbt_utils.right', string_text, length_expression) }} 3 | {% endmacro %} 4 | 5 | {% macro default__right(string_text, length_expression) %} 6 | 7 | right( 8 | {{ string_text }}, 9 | {{ length_expression }} 10 | ) 11 | 12 | {%- endmacro -%} 13 | 14 | {% macro bigquery__right(string_text, length_expression) %} 15 | 16 | case when {{ length_expression }} = 0 17 | then '' 18 | else 19 | substr( 20 | {{ string_text }}, 21 | -1 * ({{ length_expression }}) 22 | ) 23 | end 24 | 25 | {%- endmacro -%} 26 | 27 | {% macro snowflake__right(string_text, length_expression) %} 28 | 29 | case when {{ length_expression }} = 0 30 | then '' 31 | else 32 | right( 33 | {{ string_text }}, 34 | {{ length_expression }} 35 | ) 36 | end 37 | 38 | {%- endmacro -%} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/safe_cast.sql: -------------------------------------------------------------------------------- 1 | {% macro safe_cast(field, type) %} 2 | {{ adapter_macro('dbt_utils.safe_cast', field, type) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__safe_cast(field, type) %} 7 | {# most databases don't support this function yet 8 | so we just need to use cast #} 9 | cast({{field}} as {{type}}) 10 | {% endmacro %} 11 | 12 | 13 | {% macro snowflake__safe_cast(field, type) %} 14 | try_cast({{field}} as {{type}}) 15 | {% endmacro %} 16 | 17 | 18 | {% macro bigquery__safe_cast(field, type) %} 19 | safe_cast({{field}} as {{type}}) 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/split_part.sql: -------------------------------------------------------------------------------- 1 | {% macro split_part(string_text, delimiter_text, part_number) %} 2 | {{ adapter_macro('dbt_utils.split_part', string_text, delimiter_text, part_number) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__split_part(string_text, delimiter_text, part_number) %} 7 | 8 | split_part( 9 | {{ string_text }}, 10 | {{ delimiter_text }}, 11 | {{ part_number }} 12 | ) 13 | 14 | {% endmacro %} 15 | 16 | 17 | {% macro bigquery__split_part(string_text, delimiter_text, part_number) %} 18 | 19 | split( 20 | {{ string_text }}, 21 | {{ delimiter_text }} 22 | )[safe_offset({{ part_number - 1 }})] 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/cross_db_utils/width_bucket.sql: -------------------------------------------------------------------------------- 1 | {% macro width_bucket(expr, min_value, max_value, num_buckets) %} 2 | {{ adapter_macro('dbt_utils.width_bucket', expr, min_value, max_value, num_buckets) }} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__width_bucket(expr, min_value, max_value, num_buckets) -%} 7 | 8 | {% set bin_size -%} 9 | (( {{ max_value }} - {{ min_value }} ) / {{ num_buckets }} ) 10 | {%- endset %} 11 | ( 12 | -- to break ties when the amount is eaxtly at the bucket egde 13 | case 14 | when 15 | mod( 16 | {{ dbt_utils.safe_cast(expr, dbt_utils.type_numeric() ) }}, 17 | {{ dbt_utils.safe_cast(bin_size, dbt_utils.type_numeric() ) }} 18 | ) = 0 19 | then 1 20 | else 0 21 | end 22 | ) + 23 | -- Anything over max_value goes the N+1 bucket 24 | least( 25 | ceil( 26 | ({{ expr }} - {{ min_value }})/{{ bin_size }} 27 | ), 28 | {{ num_buckets }} + 1 29 | ) 30 | {%- endmacro %} 31 | 32 | {% macro redshift__width_bucket(expr, min_value, max_value, num_buckets) -%} 33 | 34 | {% set bin_size -%} 35 | (( {{ max_value }} - {{ min_value }} ) / {{ num_buckets }} ) 36 | {%- endset %} 37 | ( 38 | -- to break ties when the amount is exactly at the bucket edge 39 | case 40 | when 41 | {{ dbt_utils.safe_cast(expr, dbt_utils.type_numeric() ) }} % 42 | {{ dbt_utils.safe_cast(bin_size, dbt_utils.type_numeric() ) }} 43 | = 0 44 | then 1 45 | else 0 46 | end 47 | ) + 48 | -- Anything over max_value goes the N+1 bucket 49 | least( 50 | ceil( 51 | ({{ expr }} - {{ min_value }})/{{ bin_size }} 52 | ), 53 | {{ num_buckets }} + 1 54 | ) 55 | {%- endmacro %} 56 | 57 | {% macro snowflake__width_bucket(expr, min_value, max_value, num_buckets) %} 58 | width_bucket({{ expr }}, {{ min_value }}, {{ max_value }}, {{ num_buckets }} ) 59 | {% endmacro %} 60 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/datetime/date_spine.sql: -------------------------------------------------------------------------------- 1 | {% macro get_intervals_between(start_date, end_date, datepart) -%} 2 | 3 | {%- call statement('get_intervals_between', fetch_result=True) %} 4 | 5 | select {{dbt_utils.datediff(start_date, end_date, datepart)}} 6 | 7 | {%- endcall -%} 8 | 9 | {%- set value_list = load_result('get_intervals_between') -%} 10 | 11 | {%- if value_list and value_list['data'] -%} 12 | {%- set values = value_list['data'] | map(attribute=0) | list %} 13 | {{ return(values[0]) }} 14 | {%- else -%} 15 | {{ return(1) }} 16 | {%- endif -%} 17 | 18 | {%- endmacro %} 19 | 20 | 21 | 22 | 23 | {% macro date_spine(datepart, start_date, end_date) %} 24 | 25 | /* 26 | call as follows: 27 | 28 | date_spine( 29 | "day", 30 | "to_date('01/01/2016', 'mm/dd/yyyy')", 31 | "dateadd(week, 1, current_date)" 32 | ) 33 | 34 | */ 35 | 36 | with rawdata as ( 37 | 38 | {{dbt_utils.generate_series( 39 | dbt_utils.get_intervals_between(start_date, end_date, datepart) 40 | )}} 41 | 42 | ), 43 | 44 | all_periods as ( 45 | 46 | select ( 47 | {{ 48 | dbt_utils.dateadd( 49 | datepart, 50 | "row_number() over (order by 1) - 1", 51 | start_date 52 | ) 53 | }} 54 | ) as date_{{datepart}} 55 | from rawdata 56 | 57 | ), 58 | 59 | filtered as ( 60 | 61 | select * 62 | from all_periods 63 | where date_{{datepart}} <= {{ end_date }} 64 | 65 | ) 66 | 67 | select * from filtered 68 | 69 | {% endmacro %} 70 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/geo/haversine_distance.sql: -------------------------------------------------------------------------------- 1 | {# 2 | This calculates the distance between two sets of latitude and longitude. 3 | The formula is from the following blog post: 4 | http://daynebatten.com/2015/09/latitude-longitude-distance-sql/ 5 | 6 | The arguments should be float type. 7 | #} 8 | 9 | {% macro haversine_distance(lat1,lon1,lat2,lon2) -%} 10 | 11 | 2 * 3961 * asin(sqrt((sin(radians(({{lat2}} - {{lat1}}) / 2))) ^ 2 + 12 | cos(radians({{lat1}})) * cos(radians({{lat2}})) * 13 | (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) 14 | 15 | {%- endmacro %} 16 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/logger/log_info.sql: -------------------------------------------------------------------------------- 1 | {% macro log_info(message) %} 2 | 3 | {{ log(dbt_utils.pretty_log_format(message), info=True) }} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/logger/pretty_log_format.sql: -------------------------------------------------------------------------------- 1 | {% macro pretty_log_format(message) %} 2 | 3 | {{ return( dbt_utils.pretty_time() ~ ' + ' ~ message) }} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/logger/pretty_time.sql: -------------------------------------------------------------------------------- 1 | {% macro pretty_time(format='%H:%M:%S') %} 2 | 3 | {{ return(modules.datetime.datetime.now().strftime(format)) }} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/materializations/insert_by_period_materialization.sql: -------------------------------------------------------------------------------- 1 | {% macro get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period) -%} 2 | 3 | {% call statement('period_boundaries', fetch_result=True) -%} 4 | with data as ( 5 | select 6 | coalesce(max("{{timestamp_field}}"), '{{start_date}}')::timestamp as start_timestamp, 7 | coalesce( 8 | {{dbt_utils.dateadd('millisecond', 9 | -1, 10 | "nullif('" ~ stop_date ~ "','')::timestamp")}}, 11 | {{dbt_utils.current_timestamp()}} 12 | ) as stop_timestamp 13 | from "{{target_schema}}"."{{target_table}}" 14 | ) 15 | 16 | select 17 | start_timestamp, 18 | stop_timestamp, 19 | {{dbt_utils.datediff('start_timestamp', 20 | 'stop_timestamp', 21 | period)}} + 1 as num_periods 22 | from data 23 | {%- endcall %} 24 | 25 | {%- endmacro %} 26 | 27 | {% macro get_period_sql(target_cols_csv, sql, timestamp_field, period, start_timestamp, stop_timestamp, offset) -%} 28 | 29 | {%- set period_filter -%} 30 | ("{{timestamp_field}}" > '{{start_timestamp}}'::timestamp + interval '{{offset}} {{period}}' and 31 | "{{timestamp_field}}" <= '{{start_timestamp}}'::timestamp + interval '{{offset}} {{period}}' + interval '1 {{period}}' and 32 | "{{timestamp_field}}" < '{{stop_timestamp}}'::timestamp) 33 | {%- endset -%} 34 | 35 | {%- set filtered_sql = sql | replace("__PERIOD_FILTER__", period_filter) -%} 36 | 37 | select 38 | {{target_cols_csv}} 39 | from ( 40 | {{filtered_sql}} 41 | ) 42 | 43 | {%- endmacro %} 44 | 45 | {% materialization insert_by_period, default -%} 46 | {%- set timestamp_field = config.require('timestamp_field') -%} 47 | {%- set start_date = config.require('start_date') -%} 48 | {%- set stop_date = config.get('stop_date') or '' -%}} 49 | {%- set period = config.get('period') or 'week' -%} 50 | 51 | {%- if sql.find('__PERIOD_FILTER__') == -1 -%} 52 | {%- set error_message -%} 53 | Model '{{ model.unique_id }}' does not include the required string '__PERIOD_FILTER__' in its sql 54 | {%- endset -%} 55 | {{ exceptions.raise_compiler_error(error_message) }} 56 | {%- endif -%} 57 | 58 | {%- set identifier = model['name'] -%} 59 | 60 | {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} 61 | {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, type='table') -%} 62 | 63 | {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} 64 | {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} 65 | 66 | {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} 67 | {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} 68 | 69 | {%- set should_truncate = (non_destructive_mode and full_refresh_mode and exists_as_table) -%} 70 | {%- set should_drop = (not should_truncate and (full_refresh_mode or exists_not_as_table)) -%} 71 | {%- set force_create = (flags.FULL_REFRESH and not flags.NON_DESTRUCTIVE) -%} 72 | 73 | -- setup 74 | {% if old_relation is none -%} 75 | -- noop 76 | {%- elif should_truncate -%} 77 | {{adapter.truncate_relation(old_relation)}} 78 | {%- elif should_drop -%} 79 | {{adapter.drop_relation(old_relation)}} 80 | {%- set old_relation = none -%} 81 | {%- endif %} 82 | 83 | {{run_hooks(pre_hooks, inside_transaction=False)}} 84 | 85 | -- `begin` happens here, so `commit` after it to finish the transaction 86 | {{run_hooks(pre_hooks, inside_transaction=True)}} 87 | {% call statement() -%} 88 | begin; -- make extra sure we've closed out the transaction 89 | commit; 90 | {%- endcall %} 91 | 92 | -- build model 93 | {% if force_create or old_relation is none -%} 94 | {# Create an empty target table -#} 95 | {% call statement('main') -%} 96 | {%- set empty_sql = sql | replace("__PERIOD_FILTER__", 'false') -%} 97 | {{create_table_as(False, target_relation, empty_sql)}}; 98 | {%- endcall %} 99 | {%- endif %} 100 | 101 | {% set _ = dbt_utils.get_period_boundaries(schema, 102 | identifier, 103 | timestamp_field, 104 | start_date, 105 | stop_date, 106 | period) %} 107 | {%- set start_timestamp = load_result('period_boundaries')['data'][0][0] | string -%} 108 | {%- set stop_timestamp = load_result('period_boundaries')['data'][0][1] | string -%} 109 | {%- set num_periods = load_result('period_boundaries')['data'][0][2] | int -%} 110 | 111 | {% set target_columns = adapter.get_columns_in_relation(target_relation) %} 112 | {%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%} 113 | {%- set loop_vars = {'sum_rows_inserted': 0} -%} 114 | 115 | -- commit each period as a separate transaction 116 | {% for i in range(num_periods) -%} 117 | {%- set msg = "Running for " ~ period ~ " " ~ (i + 1) ~ " of " ~ (num_periods) -%} 118 | {{ dbt_utils.log_info(msg) }} 119 | 120 | {%- set tmp_identifier = model['name'] ~ '__dbt_incremental_period' ~ i ~ '_tmp' -%} 121 | {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, 122 | schema=schema, type='table') -%} 123 | {% call statement() -%} 124 | {% set tmp_table_sql = dbt_utils.get_period_sql(target_cols_csv, 125 | sql, 126 | timestamp_field, 127 | period, 128 | start_timestamp, 129 | stop_timestamp, 130 | i) %} 131 | {{dbt.create_table_as(True, tmp_relation, tmp_table_sql)}} 132 | {%- endcall %} 133 | 134 | {{adapter.expand_target_column_types(from_relation=tmp_relation, 135 | to_relation=target_relation)}} 136 | {%- set name = 'main-' ~ i -%} 137 | {% call statement(name, fetch_result=True) -%} 138 | insert into {{target_relation}} ({{target_cols_csv}}) 139 | ( 140 | select 141 | {{target_cols_csv}} 142 | from {{tmp_relation.include(schema=False)}} 143 | ); 144 | {%- endcall %} 145 | {%- set rows_inserted = (load_result('main-' ~ i)['status'].split(" "))[2] | int -%} 146 | {%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%} 147 | {%- if loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %} {% endif -%} 148 | 149 | {%- set msg = "Ran for " ~ period ~ " " ~ (i + 1) ~ " of " ~ (num_periods) ~ "; " ~ rows_inserted ~ " records inserted" -%} 150 | {{ dbt_utils.log_info(msg) }} 151 | 152 | {%- endfor %} 153 | 154 | {% call statement() -%} 155 | begin; 156 | {%- endcall %} 157 | 158 | {{run_hooks(post_hooks, inside_transaction=True)}} 159 | 160 | {% call statement() -%} 161 | commit; 162 | {%- endcall %} 163 | 164 | {{run_hooks(post_hooks, inside_transaction=False)}} 165 | 166 | {%- set status_string = "INSERT " ~ loop_vars['sum_rows_inserted'] -%} 167 | 168 | {% call noop_statement(name='main', status=status_string) -%} 169 | -- no-op 170 | {%- endcall %} 171 | 172 | -- Return the relations created in this materialization 173 | {{ return({'relations': [target_relation]}) }} 174 | 175 | {%- endmaterialization %} 176 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/at_least_one.sql: -------------------------------------------------------------------------------- 1 | {% macro test_at_least_one(model) %} 2 | 3 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} 4 | 5 | select count(*) 6 | from ( 7 | select 8 | 9 | count({{ column_name }}) 10 | 11 | from {{ model }} 12 | 13 | having count({{ column_name }}) = 0 14 | 15 | ) validation_errors 16 | 17 | {% endmacro %} 18 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/cardinality_equality.sql: -------------------------------------------------------------------------------- 1 | {% macro test_cardinality_equality(model, to, field) %} 2 | 3 | {% set column_name = kwargs.get('column_name', kwargs.get('from')) %} 4 | 5 | 6 | with table_a as ( 7 | select 8 | {{ column_name }}, 9 | count(*) as num_rows 10 | from {{ model }} 11 | group by 1 12 | ), 13 | 14 | table_b as ( 15 | select 16 | {{ field }}, 17 | count(*) as num_rows 18 | from {{ to }} 19 | group by 1 20 | ), 21 | 22 | except_a as ( 23 | select * 24 | from table_a 25 | {{ dbt_utils.except() }} 26 | select * 27 | from table_b 28 | ), 29 | 30 | except_b as ( 31 | select * 32 | from table_b 33 | {{ dbt_utils.except() }} 34 | select * 35 | from table_a 36 | ), 37 | 38 | unioned as ( 39 | select * 40 | from except_a 41 | union all 42 | select * 43 | from except_b 44 | ) 45 | 46 | select count(*) 47 | from unioned 48 | 49 | {% endmacro %} 50 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/equal_rowcount.sql: -------------------------------------------------------------------------------- 1 | {% macro test_equal_rowcount(model) %} 2 | 3 | {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} 4 | 5 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 6 | {%- if not execute -%} 7 | {{ return('') }} 8 | {% endif %} 9 | 10 | with a as ( 11 | 12 | select count(*) as count_a from {{ model }} 13 | 14 | ), 15 | b as ( 16 | 17 | select count(*) as count_b from {{ compare_model }} 18 | 19 | ), 20 | final as ( 21 | 22 | select abs( 23 | (select count_a from a) - 24 | (select count_b from b) 25 | ) 26 | as diff_count 27 | 28 | ) 29 | 30 | select diff_count from final 31 | 32 | {% endmacro %} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/equality.sql: -------------------------------------------------------------------------------- 1 | {% macro test_equality(model) %} 2 | 3 | 4 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 5 | {%- if not execute -%} 6 | {{ return('') }} 7 | {% endif %} 8 | 9 | -- setup 10 | {%- do dbt_utils._is_relation(model, 'test_equality') -%} 11 | {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} 12 | {% set compare_columns = kwargs.get('compare_columns', adapter.get_columns_in_relation(model) | map(attribute='quoted') ) %} 13 | {% set compare_cols_csv = compare_columns | join(', ') %} 14 | 15 | with a as ( 16 | 17 | select * from {{ model }} 18 | 19 | ), 20 | 21 | b as ( 22 | 23 | select * from {{ compare_model }} 24 | 25 | ), 26 | 27 | a_minus_b as ( 28 | 29 | select {{compare_cols_csv}} from a 30 | {{ dbt_utils.except() }} 31 | select {{compare_cols_csv}} from b 32 | 33 | ), 34 | 35 | b_minus_a as ( 36 | 37 | select {{compare_cols_csv}} from b 38 | {{ dbt_utils.except() }} 39 | select {{compare_cols_csv}} from a 40 | 41 | ), 42 | 43 | unioned as ( 44 | 45 | select * from a_minus_b 46 | union all 47 | select * from b_minus_a 48 | 49 | ), 50 | 51 | final as ( 52 | 53 | select (select count(*) from unioned) + 54 | (select abs( 55 | (select count(*) from a_minus_b) - 56 | (select count(*) from b_minus_a) 57 | )) 58 | as count 59 | 60 | ) 61 | 62 | select count from final 63 | 64 | {% endmacro %} 65 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/expression_is_true.sql: -------------------------------------------------------------------------------- 1 | {% macro test_expression_is_true(model, condition='true') %} 2 | 3 | {% set expression = kwargs.get('expression', kwargs.get('arg')) %} 4 | 5 | with meet_condition as ( 6 | 7 | select * from {{ model }} where {{ condition }} 8 | 9 | ), 10 | validation_errors as ( 11 | 12 | select 13 | * 14 | from meet_condition 15 | where not({{expression}}) 16 | 17 | ) 18 | 19 | select count(*) 20 | from validation_errors 21 | 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/mutually_exclusive_ranges.sql: -------------------------------------------------------------------------------- 1 | {% macro test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed') %} 2 | 3 | {% if gaps == 'not_allowed' %} 4 | {% set allow_gaps_operator='=' %} 5 | {% set allow_gaps_operator_in_words='equal_to' %} 6 | {% elif gaps == 'allowed' %} 7 | {% set allow_gaps_operator='<=' %} 8 | {% set allow_gaps_operator_in_words='less_than_or_equal_to' %} 9 | {% elif gaps == 'required' %} 10 | {% set allow_gaps_operator='<' %} 11 | {% set allow_gaps_operator_in_words='less_than' %} 12 | {% else %} 13 | {{ exceptions.raise_compiler_error( 14 | "`gaps` argument for mutually_exclusive_ranges test must be one of ['not_allowed', 'allowed', 'required'] Got: '" ~ gaps ~"'.'" 15 | ) }} 16 | 17 | {% endif %} 18 | 19 | {% set partition_clause="partition by " ~ partition_by if partition_by else '' %} 20 | 21 | with window_functions as ( 22 | 23 | select 24 | {% if partition_by %} 25 | {{ partition_by }}, 26 | {% endif %} 27 | {{ lower_bound_column }} as lower_bound, 28 | {{ upper_bound_column }} as upper_bound, 29 | 30 | lead({{ lower_bound_column }}) over ( 31 | {{ partition_clause }} 32 | order by {{ lower_bound_column }} 33 | ) as next_lower_bound, 34 | 35 | row_number() over ( 36 | {{ partition_clause }} 37 | order by {{ lower_bound_column }} desc 38 | ) = 1 as is_last_record 39 | 40 | from {{ model }} 41 | 42 | ), 43 | 44 | calc as ( 45 | -- We want to return records where one of our assumptions fails, so we'll use 46 | -- the `not` function with `and` statements so we can write our assumptions nore cleanly 47 | select 48 | *, 49 | 50 | -- For each record: lower_bound should be < upper_bound. 51 | -- Coalesce it to return an error on the null case (implicit assumption 52 | -- these columns are not_null) 53 | coalesce( 54 | lower_bound < upper_bound, 55 | false 56 | ) as lower_bound_less_than_upper_bound, 57 | 58 | -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. 59 | -- Coalesce it to handle null cases for the last record. 60 | coalesce( 61 | upper_bound {{ allow_gaps_operator }} next_lower_bound, 62 | is_last_record, 63 | false 64 | ) as upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound 65 | 66 | from window_functions 67 | 68 | ), 69 | 70 | validation_errors as ( 71 | 72 | select 73 | * 74 | from calc 75 | 76 | where not( 77 | -- THE FOLLOWING SHOULD BE TRUE -- 78 | lower_bound_less_than_upper_bound 79 | and upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound 80 | ) 81 | ) 82 | 83 | select count(*) from validation_errors 84 | {% endmacro %} 85 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/not_constant.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro test_not_constant(model) %} 3 | 4 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} 5 | 6 | select count(*) 7 | 8 | from ( 9 | 10 | select 11 | count(distinct {{ column_name }}) 12 | 13 | from {{ model }} 14 | 15 | having count(distinct {{ column_name }}) = 1 16 | 17 | ) validation_errors 18 | 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/recency.sql: -------------------------------------------------------------------------------- 1 | {% macro test_recency(model, datepart, interval) %} 2 | 3 | {% set column_name = kwargs.get('column_name', kwargs.get('field')) %} 4 | 5 | select 6 | case when count(*) > 0 then 0 7 | else 1 8 | end as error_result 9 | from {{model}} 10 | where {{column_name}} >= 11 | {{dbt_utils.dateadd(datepart, interval * -1, dbt_utils.current_timestamp())}} 12 | 13 | {% endmacro %} 14 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/relationships_where.sql: -------------------------------------------------------------------------------- 1 | {% macro test_relationships_where(model, to, field) %} 2 | 3 | {% set column_name = kwargs.get('column_name', kwargs.get('from')) %} 4 | {% set from_condition = kwargs.get('from_condition', "true") %} 5 | {% set to_condition = kwargs.get('to_condition', "true") %} 6 | 7 | with left_table as ( 8 | 9 | select 10 | {{column_name}} as id 11 | 12 | from {{model}} 13 | 14 | where {{column_name}} is not null 15 | and {{from_condition}} 16 | 17 | ), 18 | 19 | right_table as ( 20 | 21 | select 22 | {{field}} as id 23 | 24 | from {{to}} 25 | 26 | where {{field}} is not null 27 | and {{to_condition}} 28 | 29 | ), 30 | 31 | exceptions as ( 32 | 33 | select 34 | left_table.id, 35 | right_table.id as right_id 36 | 37 | from left_table 38 | 39 | left join right_table 40 | on left_table.id = right_table.id 41 | 42 | where right_table.id is null 43 | 44 | ) 45 | 46 | select count(*) from exceptions 47 | 48 | {% endmacro %} 49 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/schema_tests/unique_combination_of_columns.sql: -------------------------------------------------------------------------------- 1 | {% macro test_unique_combination_of_columns(model) %} 2 | 3 | {%- set columns = kwargs.get('combination_of_columns', kwargs.get('arg')) %} 4 | 5 | {%- set columns_csv=columns | join(', ') %} 6 | 7 | with validation_errors as ( 8 | 9 | select 10 | {{ columns_csv }} 11 | from {{ model }} 12 | 13 | group by {{ columns_csv }} 14 | having count(*) > 1 15 | 16 | ) 17 | 18 | select count(*) 19 | from validation_errors 20 | 21 | 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/generate_series.sql: -------------------------------------------------------------------------------- 1 | {% macro get_powers_of_two(upper_bound) %} 2 | 3 | {% if upper_bound <= 0 %} 4 | {{ exceptions.raise_compiler_error("upper bound must be positive") }} 5 | {% endif %} 6 | 7 | {% for _ in range(1, 100) %} 8 | {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %} 9 | {% endfor %} 10 | 11 | {% endmacro %} 12 | 13 | 14 | {% macro generate_series(upper_bound) %} 15 | 16 | {% set n = dbt_utils.get_powers_of_two(upper_bound) %} 17 | 18 | with p as ( 19 | select 0 as generated_number union all select 1 20 | ), unioned as ( 21 | 22 | select 23 | 24 | {% for i in range(n) %} 25 | p{{i}}.generated_number * pow(2, {{i}}) 26 | {% if not loop.last %} + {% endif %} 27 | {% endfor %} 28 | + 1 29 | as generated_number 30 | 31 | from 32 | 33 | {% for i in range(n) %} 34 | p as p{{i}} 35 | {% if not loop.last %} cross join {% endif %} 36 | {% endfor %} 37 | 38 | ) 39 | 40 | select * 41 | from unioned 42 | where generated_number <= {{upper_bound}} 43 | order by generated_number 44 | 45 | {% endmacro %} 46 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/get_column_values.sql: -------------------------------------------------------------------------------- 1 | {# 2 | This macro fetches the unique values for `column` in the table `table` 3 | 4 | Arguments: 5 | table: A model `ref`, or a schema.table string for the table to query (Required) 6 | column: The column to query for unique values 7 | max_records: If provided, the maximum number of unique records to return (default: none) 8 | 9 | Returns: 10 | A list of distinct values for the specified columns 11 | #} 12 | 13 | {% macro get_column_values(table, column, max_records=none, default=none) -%} 14 | 15 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 16 | {%- if not execute -%} 17 | {{ return('') }} 18 | {% endif %} 19 | {#-- #} 20 | 21 | {%- set target_relation = adapter.get_relation(database=table.database, 22 | schema=table.schema, 23 | identifier=table.identifier) -%} 24 | 25 | {%- call statement('get_column_values', fetch_result=true) %} 26 | 27 | {%- if not target_relation and default is none -%} 28 | 29 | {{ exceptions.raise_compiler_error("In get_column_values(): relation " ~ table ~ " does not exist and no default value was provided.") }} 30 | 31 | {%- elif not target_relation and default is not none -%} 32 | 33 | {{ log("Relation " ~ table ~ " does not exist. Returning the default value: " ~ default) }} 34 | 35 | {{ return(default) }} 36 | 37 | {%- else -%} 38 | 39 | select 40 | {{ column }} as value 41 | 42 | from {{ target_relation }} 43 | group by 1 44 | order by count(*) desc 45 | 46 | {% if max_records is not none %} 47 | limit {{ max_records }} 48 | {% endif %} 49 | 50 | {% endif %} 51 | 52 | {%- endcall -%} 53 | 54 | {%- set value_list = load_result('get_column_values') -%} 55 | 56 | {%- if value_list and value_list['data'] -%} 57 | {%- set values = value_list['data'] | map(attribute=0) | list %} 58 | {{ return(values) }} 59 | {%- else -%} 60 | {{ return(default) }} 61 | {%- endif -%} 62 | 63 | {%- endmacro %} 64 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/get_query_results_as_dict.sql: -------------------------------------------------------------------------------- 1 | {% macro get_query_results_as_dict(query) %} 2 | {# This macro returns a dictionary of the form {column_name: (tuple_of_results)} #} 3 | 4 | {%- call statement('get_query_results', fetch_result=True,auto_begin=false) -%} 5 | 6 | {{ query }} 7 | 8 | {%- endcall -%} 9 | 10 | {% set sql_results={} %} 11 | 12 | {%- if execute -%} 13 | {% set sql_results_table = load_result('get_query_results').table.columns %} 14 | {% for column_name, column in sql_results_table.items() %} 15 | {% do sql_results.update({column_name: column.values()}) %} 16 | {% endfor %} 17 | {%- endif -%} 18 | 19 | {{ return(sql_results) }} 20 | 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/get_relations_by_prefix.sql: -------------------------------------------------------------------------------- 1 | {% macro get_relations_by_prefix(schema, prefix, exclude='', database=target.database) %} 2 | 3 | {%- call statement('get_tables', fetch_result=True) %} 4 | 5 | {{ dbt_utils.get_tables_by_prefix_sql(schema, prefix, exclude, database) }} 6 | 7 | {%- endcall -%} 8 | 9 | {%- set table_list = load_result('get_tables') -%} 10 | 11 | {%- if table_list and table_list['table'] -%} 12 | {%- set tbl_relations = [] -%} 13 | {%- for row in table_list['table'] -%} 14 | {%- set tbl_relation = api.Relation.create(database, row.table_schema, row.table_name) -%} 15 | {%- do tbl_relations.append(tbl_relation) -%} 16 | {%- endfor -%} 17 | 18 | {{ return(tbl_relations) }} 19 | {%- else -%} 20 | {{ return([]) }} 21 | {%- endif -%} 22 | 23 | {% endmacro %} 24 | 25 | {% macro get_tables_by_prefix(schema, prefix, exclude='', database=target.database) %} 26 | 27 | {% do exceptions.warn("Warning: the `get_tables_by_prefix` macro is no longer supported and will be deprecated in a future release of dbt-utils. Use the `get_relations_by_prefix` macro instead") %} 28 | 29 | {{ return(dbt_utils.get_relations_by_prefix(schema, prefix, exclude, database)) }} 30 | 31 | {% endmacro %} 32 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/get_tables_by_prefix_sql.sql: -------------------------------------------------------------------------------- 1 | {% macro get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %} 2 | {{ adapter_macro('dbt_utils.get_tables_by_prefix_sql', schema, prefix, exclude, database) }} 3 | {% endmacro %} 4 | 5 | {% macro default__get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %} 6 | 7 | select distinct 8 | table_schema as "table_schema", table_name as "table_name" 9 | from {{database}}.information_schema.tables 10 | where table_schema ilike '{{ schema }}' 11 | and table_name ilike '{{ prefix }}%' 12 | and table_name not ilike '{{ exclude }}' 13 | 14 | {% endmacro %} 15 | 16 | 17 | {% macro bigquery__get_tables_by_prefix_sql(schema, prefix, exclude='', database=target.database) %} 18 | 19 | select distinct 20 | dataset_id as table_schema, table_id as table_name 21 | 22 | from {{adapter.quote(database)}}.{{schema}}.__TABLES_SUMMARY__ 23 | where dataset_id = '{{schema}}' 24 | and lower(table_id) like lower ('{{prefix}}%') 25 | and lower(table_id) not like lower ('{{exclude}}') 26 | 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/groupby.sql: -------------------------------------------------------------------------------- 1 | {%- macro group_by(n) -%} 2 | 3 | group by {% for i in range(1, n + 1) -%} 4 | {{ i }}{{ ',' if not loop.last }} 5 | {%- endfor -%} 6 | 7 | {%- endmacro -%} 8 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/nullcheck.sql: -------------------------------------------------------------------------------- 1 | {% macro nullcheck(cols) %} 2 | {%- for col in cols %} 3 | 4 | {% if col.is_string() -%} 5 | 6 | nullif({{col.name}},'') as {{col.name}} 7 | 8 | {%- else -%} 9 | 10 | {{col.name}} 11 | 12 | {%- endif -%} 13 | 14 | {%- if not loop.last -%} , {%- endif -%} 15 | 16 | {%- endfor -%} 17 | {% endmacro %} 18 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/nullcheck_table.sql: -------------------------------------------------------------------------------- 1 | {% macro nullcheck_table(relation) %} 2 | 3 | {%- do dbt_utils._is_relation(relation, 'nullcheck_table') -%} 4 | {% set cols = adapter.get_columns_in_relation(relation) %} 5 | 6 | select {{ dbt_utils.nullcheck(cols) }} 7 | from {{relation}} 8 | 9 | {% endmacro %} 10 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/pivot.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Pivot values from rows to columns. 3 | 4 | Example: 5 | 6 | Input: `public.test` 7 | 8 | | size | color | 9 | |------+-------| 10 | | S | red | 11 | | S | blue | 12 | | S | red | 13 | | M | red | 14 | 15 | select 16 | size, 17 | {{ dbt_utils.pivot('color', dbt_utils.get_column_values('public.test', 18 | 'color')) }} 19 | from public.test 20 | group by size 21 | 22 | Output: 23 | 24 | | size | red | blue | 25 | |------+-----+------| 26 | | S | 2 | 1 | 27 | | M | 1 | 0 | 28 | 29 | Arguments: 30 | column: Column name, required 31 | values: List of row values to turn into columns, required 32 | alias: Whether to create column aliases, default is True 33 | agg: SQL aggregation function, default is sum 34 | cmp: SQL value comparison, default is = 35 | prefix: Column alias prefix, default is blank 36 | suffix: Column alias postfix, default is blank 37 | then_value: Value to use if comparison succeeds, default is 1 38 | else_value: Value to use if comparison fails, default is 0 39 | quote_identifiers: Whether to surround column aliases with double quotes, default is true 40 | #} 41 | 42 | {% macro pivot(column, 43 | values, 44 | alias=True, 45 | agg='sum', 46 | cmp='=', 47 | prefix='', 48 | suffix='', 49 | then_value=1, 50 | else_value=0, 51 | quote_identifiers=True) %} 52 | {% for v in values %} 53 | {{ agg }}( 54 | case 55 | when {{ column }} {{ cmp }} '{{ v }}' 56 | then {{ then_value }} 57 | else {{ else_value }} 58 | end 59 | ) 60 | {% if alias %} 61 | {% if quote_identifiers %} 62 | as {{ adapter.quote(prefix ~ v ~ suffix) }} 63 | {% else %} 64 | as {{prefix ~ v ~ suffix }} 65 | {% endif %} 66 | {% endif %} 67 | {% if not loop.last %},{% endif %} 68 | {% endfor %} 69 | {% endmacro %} 70 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/safe_add.sql: -------------------------------------------------------------------------------- 1 | {%- macro safe_add() -%} 2 | 3 | {% set fields = [] %} 4 | 5 | {%- for field in varargs -%} 6 | 7 | {% do fields.append("coalesce(" ~ field ~ ", 0)") %} 8 | 9 | {%- endfor -%} 10 | 11 | {{ fields|join(' +\n ') }} 12 | 13 | {%- endmacro -%} 14 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/star.sql: -------------------------------------------------------------------------------- 1 | {% macro star(from, relation_alias=False, except=[]) -%} 2 | 3 | {%- do dbt_utils._is_relation(from, 'star') -%} 4 | 5 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} 6 | {%- if not execute -%} 7 | {{ return('') }} 8 | {% endif %} 9 | 10 | {%- set include_cols = [] %} 11 | {%- set cols = adapter.get_columns_in_relation(from) -%} 12 | {%- for col in cols -%} 13 | 14 | {%- if col.column not in except -%} 15 | {% do include_cols.append(col.column) %} 16 | 17 | {%- endif %} 18 | {%- endfor %} 19 | 20 | {%- for col in include_cols %} 21 | 22 | {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} 23 | {%- if not loop.last %},{{ '\n ' }}{% endif %} 24 | 25 | {%- endfor -%} 26 | {%- endmacro %} 27 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/surrogate_key.sql: -------------------------------------------------------------------------------- 1 | {%- macro surrogate_key(field_list) -%} 2 | 3 | 4 | {%- if varargs|length >= 1 %} 5 | 6 | {%- do exceptions.warn("Warning: the `surrogate_key` macro now takes a single list argument instead of multiple string arguments. Support for multiple string arguments will be deprecated in a future release of dbt-utils.") -%} 7 | 8 | {# first argument is not included in varargs, so add first element to field_list_xf #} 9 | {%- set field_list_xf = [field_list] -%} 10 | 11 | {%- for field in varargs %} 12 | {%- set _ = field_list_xf.append(field) -%} 13 | {%- endfor -%} 14 | 15 | {%- else -%} 16 | 17 | {# if using list, just set field_list_xf as field_list #} 18 | {%- set field_list_xf = field_list -%} 19 | 20 | {%- endif -%} 21 | 22 | 23 | {%- set fields = [] -%} 24 | 25 | {%- for field in field_list_xf -%} 26 | 27 | {%- set _ = fields.append( 28 | "coalesce(cast(" ~ field ~ " as " ~ dbt_utils.type_string() ~ "), '')" 29 | ) -%} 30 | 31 | {%- if not loop.last %} 32 | {%- set _ = fields.append("'-'") -%} 33 | {%- endif -%} 34 | 35 | {%- endfor -%} 36 | 37 | {{dbt_utils.hash(dbt_utils.concat(fields))}} 38 | 39 | {%- endmacro -%} 40 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/union.sql: -------------------------------------------------------------------------------- 1 | {%- macro union_relations(relations, column_override=none, include=[], exclude=[], source_column_name=none) -%} 2 | 3 | {%- if exclude and include -%} 4 | {{ exceptions.raise_compiler_error("Both an exclude and include list were provided to the `union` macro. Only one is allowed") }} 5 | {%- endif -%} 6 | 7 | {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. -#} 8 | {%- if not execute %} 9 | {{ return('') }} 10 | {% endif -%} 11 | 12 | {%- set column_override = column_override if column_override is not none else {} -%} 13 | {%- set source_column_name = source_column_name if source_column_name is not none else '_dbt_source_relation' -%} 14 | 15 | {%- set relation_columns = {} -%} 16 | {%- set column_superset = {} -%} 17 | 18 | {%- for relation in relations -%} 19 | 20 | {%- do relation_columns.update({relation: []}) -%} 21 | 22 | {%- do dbt_utils._is_relation(relation, 'union_relations') -%} 23 | {%- set cols = adapter.get_columns_in_relation(relation) -%} 24 | {%- for col in cols -%} 25 | 26 | {#- If an exclude list was provided and the column is in the list, do nothing -#} 27 | {%- if exclude and col.column in exclude -%} 28 | 29 | {#- If an include list was provided and the column is not in the list, do nothing -#} 30 | {%- elif include and col.column not in include -%} 31 | 32 | {#- Otherwise add the column to the column superset -#} 33 | {%- else -%} 34 | 35 | {#- update the list of columns in this relation -#} 36 | {%- do relation_columns[relation].append(col.column) -%} 37 | 38 | {%- if col.column in column_superset -%} 39 | 40 | {%- set stored = column_superset[col.column] -%} 41 | {%- if col.is_string() and stored.is_string() and col.string_size() > stored.string_size() -%} 42 | 43 | {%- do column_superset.update({col.column: col}) -%} 44 | 45 | {%- endif %} 46 | 47 | {%- else -%} 48 | 49 | {%- do column_superset.update({col.column: col}) -%} 50 | 51 | {%- endif -%} 52 | 53 | {%- endif -%} 54 | 55 | {%- endfor -%} 56 | {%- endfor -%} 57 | 58 | {%- set ordered_column_names = column_superset.keys() -%} 59 | 60 | {%- for relation in relations %} 61 | 62 | ( 63 | select 64 | 65 | cast({{ dbt_utils.string_literal(relation) }} as {{ dbt_utils.type_string() }}) as {{ source_column_name }}, 66 | {% for col_name in ordered_column_names -%} 67 | 68 | {%- set col = column_superset[col_name] %} 69 | {%- set col_type = column_override.get(col.column, col.data_type) %} 70 | {%- set col_name = adapter.quote(col_name) if col_name in relation_columns[relation] else 'null' %} 71 | cast({{ col_name }} as {{ col_type }}) as {{ col.quoted }} {% if not loop.last %},{% endif -%} 72 | 73 | {%- endfor %} 74 | 75 | from {{ relation }} 76 | ) 77 | 78 | {% if not loop.last -%} 79 | union all 80 | {% endif -%} 81 | 82 | {%- endfor -%} 83 | 84 | {%- endmacro -%} 85 | 86 | {%- macro union_tables(tables, column_override=none, include=[], exclude=[], source_column_name='_dbt_source_table') -%} 87 | 88 | {%- do exceptions.warn("Warning: the `union_tables` macro is no longer supported and will be deprecated in a future release of dbt-utils. Use the `union_relations` macro instead") -%} 89 | 90 | {{ return(dbt_utils.union_relations(tables, column_override, include, exclude, source_column_name)) }} 91 | 92 | {%- endmacro -%} 93 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/sql/unpivot.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Pivot values from columns to rows. Similar to pandas DataFrame melt() function. 3 | 4 | Example Usage: {{ unpivot(relation=ref('users'), cast_to='integer', exclude=['id','created_at']) }} 5 | 6 | Arguments: 7 | relation: Relation object, required. 8 | cast_to: The datatype to cast all unpivoted columns to. Default is varchar. 9 | exclude: A list of columns to keep but exclude from the unpivot operation. Default is none. 10 | remove: A list of columns to remove from the resulting table. Default is none. 11 | field_name: Destination table column name for the source table column names. 12 | value_name: Destination table column name for the pivoted values 13 | #} 14 | 15 | {% macro unpivot(relation=none, cast_to='varchar', exclude=none, remove=none, field_name='field_name', value_name='value', table=none) -%} 16 | 17 | {% if table %} 18 | {% do exceptions.warn("Warning: the `unpivot` macro no longer accepts a `table` parameter. This parameter will be deprecated in a future release of dbt-utils. Use the `relation` parameter instead") %} 19 | {% endif %} 20 | 21 | {% if relation and table %} 22 | {{ exceptions.raise_compiler_error("Error: both the `relation` and `table` parameters were provided to `unpivot` macro. Choose one only (we recommend `relation`).") }} 23 | {% elif not relation and table %} 24 | {% set relation=table %} 25 | {% elif not relation and not table %} 26 | {{ exceptions.raise_compiler_error("Error: argument `relation` is required for `unpivot` macro.") }} 27 | {% endif %} 28 | 29 | {%- set exclude = exclude if exclude is not none else [] %} 30 | {%- set remove = remove if remove is not none else [] %} 31 | 32 | {%- set include_cols = [] %} 33 | 34 | {%- set table_columns = {} %} 35 | 36 | {%- do table_columns.update({relation: []}) %} 37 | 38 | {%- do dbt_utils._is_relation(relation, 'unpivot') -%} 39 | {%- set cols = adapter.get_columns_in_relation(relation) %} 40 | 41 | {%- for col in cols -%} 42 | {%- if col.column.lower() not in remove|map('lower') and col.column.lower() not in exclude|map('lower') -%} 43 | {% do include_cols.append(col) %} 44 | {%- endif %} 45 | {%- endfor %} 46 | 47 | 48 | {%- for col in include_cols -%} 49 | select 50 | {%- for exclude_col in exclude %} 51 | {{ exclude_col }}, 52 | {%- endfor %} 53 | 54 | cast('{{ col.column }}' as {{ dbt_utils.type_string() }}) as {{ field_name }}, 55 | cast({{ col.column }} as {{ cast_to }}) as {{ value_name }} 56 | 57 | from {{ relation }} 58 | 59 | {% if not loop.last -%} 60 | union all 61 | {% endif -%} 62 | {%- endfor -%} 63 | 64 | {%- endmacro %} 65 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/web/get_url_host.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_host(field) -%} 2 | 3 | {%- set parsed = 4 | dbt_utils.split_part( 5 | dbt_utils.split_part( 6 | dbt_utils.replace( 7 | dbt_utils.replace(field, "'http://'", "''" 8 | ), "'https://'", "''" 9 | ), "'/'", 1 10 | ), "'?'", 1 11 | ) 12 | 13 | -%} 14 | 15 | 16 | {{ dbt_utils.safe_cast( 17 | parsed, 18 | dbt_utils.type_string() 19 | )}} 20 | 21 | 22 | {%- endmacro %} -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/web/get_url_parameter.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_parameter(field, url_parameter) -%} 2 | 3 | {%- set formatted_url_parameter = "'" + url_parameter + "='" -%} 4 | 5 | {%- set split = dbt_utils.split_part(dbt_utils.split_part(field, formatted_url_parameter, 2), "'&'", 1) -%} 6 | 7 | nullif({{ split }},'') 8 | 9 | {%- endmacro %} 10 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/macros/web/get_url_path.sql: -------------------------------------------------------------------------------- 1 | {% macro get_url_path(field) -%} 2 | 3 | {%- set stripped_url = 4 | dbt_utils.replace( 5 | dbt_utils.replace(field, "'http://'", "''"), "'https://'", "''") 6 | -%} 7 | 8 | {%- set first_slash_pos -%} 9 | coalesce( 10 | nullif({{dbt_utils.position("'/'", stripped_url)}}, 0), 11 | {{dbt_utils.position("'?'", stripped_url)}} - 1 12 | ) 13 | {%- endset -%} 14 | 15 | {%- set parsed_path = 16 | dbt_utils.split_part( 17 | dbt_utils.right( 18 | stripped_url, 19 | dbt_utils.length(stripped_url) ~ "-" ~ first_slash_pos 20 | ), 21 | "'?'", 1 22 | ) 23 | -%} 24 | 25 | {{ dbt_utils.safe_cast( 26 | parsed_path, 27 | dbt_utils.type_string() 28 | )}} 29 | 30 | {%- endmacro %} 31 | -------------------------------------------------------------------------------- /dbt_modules/dbt_utils/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | VENV="venv/bin/activate" 3 | 4 | if [[ ! -f $VENV ]]; then 5 | python3 -m venv venv 6 | . $VENV 7 | 8 | pip install --upgrade pip setuptools 9 | pip install dbt 10 | fi 11 | 12 | . $VENV 13 | cd integration_tests 14 | 15 | if [[ ! -e ~/.dbt/profiles.yml ]]; then 16 | mkdir -p ~/.dbt 17 | cp ci/sample.profiles.yml ~/.dbt/profiles.yml 18 | fi 19 | 20 | _models="" 21 | _seeds="--full-refresh" 22 | if [[ ! -z $2 ]]; then _models="--models $2"; fi 23 | if [[ ! -z $3 ]]; then _seeds="--select $3 --full-refresh"; fi 24 | 25 | dbt deps --target $1 26 | dbt seed --target $1 $_seeds 27 | dbt run --target $1 $_models 28 | dbt test --target $1 $_models 29 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'ga4_bigquery_starter' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'ga4-bigquery' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `source-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | source-paths: ["models"] 16 | analysis-paths: ["analysis"] 17 | test-paths: ["tests"] 18 | data-paths: ["data"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_modules" 26 | 27 | 28 | # Configuring models 29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 30 | 31 | # In this example config, we tell dbt to build all models in the example/ directory 32 | # as tables. These settings can be overridden in the individual model files 33 | # using the `{{ config(...) }}` macro. 34 | 35 | vars: 36 | session_lookback_days: 2 37 | 38 | models: 39 | ga4_bigquery_starter: 40 | +persist_docs: 41 | relation: true 42 | columns: true 43 | admin: 44 | materialized: table 45 | base: 46 | materialized: table 47 | join: 48 | materialized: table 49 | math: 50 | materialized: table 51 | visualization: 52 | materialized: table 53 | -------------------------------------------------------------------------------- /macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/macros/.gitkeep -------------------------------------------------------------------------------- /macros/get_column_values.sql: -------------------------------------------------------------------------------- 1 | {% macro get_column_values(table, column, max_records=none, filter_column=none, filter_value=none, filter_column_2=none, filter_value_2=none) -%} 2 | 3 | {%- call statement('get_column_values', fetch_result=True) %} 4 | 5 | select 6 | {{ column }} as value 7 | 8 | from {{ table }} 9 | 10 | {% if filter_column is not none %} 11 | ##where 1 = 1 12 | where {{ filter_column }} = '{{ filter_value }}' 13 | {% endif %} 14 | 15 | {% if filter_column_2 is not none %} 16 | ##and 2 = 2 17 | and {{ filter_column_2 }} = '{{ filter_value_2 }}' 18 | {% endif %} 19 | 20 | group by 1 21 | order by count(*) desc 22 | 23 | {% if max_records is not none %} 24 | limit {{ max_records }} 25 | {% endif %} 26 | 27 | {%- endcall -%} 28 | 29 | {%- set value_list = load_result('get_column_values') -%} 30 | 31 | {%- if value_list and value_list['data'] -%} 32 | {%- set values = value_list['data'] | map(attribute=0) | list %} 33 | {{ return(values) }} 34 | {%- else -%} 35 | {{ return([]) }} 36 | {%- endif -%} 37 | 38 | {%- endmacro %} -------------------------------------------------------------------------------- /models/base/dedup_events.sql: -------------------------------------------------------------------------------- 1 | {{ config( 2 | materialized='incremental', 3 | partition_by={ 4 | "field": "table_date", 5 | "data_type": "date"}, 6 | cluster_by= ["table_date","user_pseudo_id","event_name"], 7 | incremental_strategy = 'insert_overwrite' 8 | )}} 9 | 10 | select 11 | * except(row) 12 | from ( 13 | select 14 | -- extracts date from source table 15 | parse_date('%Y%m%d',regexp_extract(_table_suffix,'[0-9]+')) as table_date, 16 | -- flag to indicate if source table is `events_intraday_` 17 | case when _table_suffix like '%intraday%' then true else false end as is_intraday, 18 | *, 19 | row_number() over (partition by user_pseudo_id, event_name, event_timestamp order by event_timestamp) as row 20 | from 21 | `{{ target.project }}.{{ target.schema }}.events_*` 22 | 23 | {% if is_incremental() %} 24 | -- Refresh only recent session data to limit query costs, unless running with --full-refresh 25 | where regexp_extract(_table_suffix,'[0-9]+') BETWEEN FORMAT_DATE("%Y%m%d", DATE_SUB(CURRENT_DATE(), INTERVAL {{ var('session_lookback_days') }} DAY)) AND 26 | FORMAT_DATE("%Y%m%d", CURRENT_DATE()) 27 | {% endif %} 28 | ) 29 | where 30 | row = 1 -------------------------------------------------------------------------------- /models/base/pageviews_proc.sql: -------------------------------------------------------------------------------- 1 | {{ config( 2 | materialized='incremental', 3 | partition_by={ 4 | "field": "event_date", 5 | "data_type": "date"}, 6 | cluster_by= ["event_date", "ga_session_id"], 7 | incremental_strategy = 'insert_overwrite' 8 | )}} 9 | 10 | SELECT 11 | parse_date("%Y%m%d", event_date) event_date, 12 | event_timestamp, 13 | user_pseudo_id, 14 | user_first_touch_timestamp, 15 | device.category as device_category, 16 | device.language as device_language, 17 | device.web_info.browser as device_browser, 18 | geo.continent as geo_continent, 19 | geo.country as geo_country, 20 | max(if(params.key = 'ga_session_id', params.value.int_value, null)) ga_session_id, 21 | max(if(params.key = 'ga_session_number', params.value.int_value, null)) ga_session_number, 22 | cast(max(if(params.key = 'session_engaged', params.value.string_value, null)) as int64) session_engaged, 23 | max(if(params.key = 'page_title', params.value.string_value, null)) page_title, 24 | max(if(params.key = 'page_location', params.value.string_value, null)) page_location, 25 | max(if(params.key = 'source', params.value.string_value, null)) utm_source, 26 | max(if(params.key = 'medium', params.value.string_value, null)) utm_medium, 27 | max(if(params.key = 'campaign', params.value.string_value, null)) utm_campaign, 28 | max(if(params.key = 'page_referrer', params.value.string_value, null)) utm_referrer, 29 | max(ecommerce.transaction_id) ecommerce_transaction_id, 30 | max(ecommerce.purchase_revenue) ecommerce_purchase_revenue 31 | FROM 32 | {{ ref('dedup_events') }}, 33 | UNNEST(event_params) AS params 34 | WHERE event_name = 'page_view' 35 | 36 | {% if is_incremental() %} 37 | -- Refresh only recent session data to limit query costs, unless running with --full-refresh 38 | AND table_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL {{ var('session_lookback_days') }} DAY) AND 39 | CURRENT_DATE() 40 | {% endif %} 41 | 42 | GROUP BY event_date, event_timestamp, user_pseudo_id, user_first_touch_timestamp, device_category, device_language, device_browser, geo_continent, geo_country -------------------------------------------------------------------------------- /models/base/unique_events_and_parameters.sql: -------------------------------------------------------------------------------- 1 | -- this query creates a list of unique events and parameters with the corresponding data types and the amount these event-parameter combinations occur in the GA4 property (only within the lookback period) 2 | SELECT 3 | event_name, 4 | params.key AS event_parameter_key, 5 | CASE 6 | WHEN params.value.string_value IS NOT NULL THEN 'string' 7 | WHEN params.value.int_value IS NOT NULL THEN 'int' 8 | WHEN params.value.double_value IS NOT NULL THEN 'double' 9 | WHEN params.value.float_value IS NOT NULL THEN 'float' 10 | END 11 | AS event_parameter_value, 12 | count(*) as amount 13 | FROM 14 | {{ ref('dedup_events') }}, 15 | UNNEST(event_params) AS params 16 | WHERE 17 | table_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL {{ var('session_lookback_days') }} DAY) AND CURRENT_DATE() 18 | GROUP BY 19 | event_name, 20 | event_parameter_key, 21 | event_parameter_value 22 | ORDER BY 23 | event_name, 24 | event_parameter_key -------------------------------------------------------------------------------- /models/math/user_page_paths.sql: -------------------------------------------------------------------------------- 1 | {{ config( 2 | materialized='incremental', 3 | partition_by={ 4 | "field": "event_date", 5 | "data_type": "date"}, 6 | cluster_by= ["event_date","user_pseudo_id","ga_session_id"], 7 | incremental_strategy = 'insert_overwrite' 8 | )}} 9 | 10 | SELECT 11 | event_date, 12 | event_timestamp, 13 | user_pseudo_id, 14 | user_first_touch_timestamp, 15 | CASE WHEN first_value(event_timestamp) over paths = user_first_touch_timestamp THEN 'New' 16 | ELSE 'Returning' END as user_type, 17 | ga_session_id, 18 | ga_session_number, 19 | session_engaged, 20 | page_title, 21 | page_location, 22 | row_number() over paths as session_event_order, 23 | first_value(page_location) over paths as first_path, 24 | last_value(page_location) over paths as last_path, 25 | lag(page_location) over paths as prev_path, 26 | lead(page_location) over paths as next_path, 27 | first_value(utm_source) over paths as utm_source, 28 | first_value(utm_medium) over paths as utm_medium, 29 | first_value(utm_campaign) over paths as utm_campaign, 30 | first_value(utm_referrer) over paths as utm_referrer, 31 | device_category, 32 | device_language, 33 | device_browser, 34 | geo_continent, 35 | geo_country, 36 | ecommerce_transaction_id, 37 | ecommerce_purchase_revenue 38 | FROM {{ ref('pageviews_proc') }} 39 | {% if is_incremental() %} 40 | 41 | -- Refresh only recent session data to limit query costs, unless running with --full-refresh 42 | WHERE event_date >= date_sub(current_date(), INTERVAL {{ var('session_lookback_days') }} DAY) 43 | 44 | {% endif %} 45 | WINDOW paths as (PARTITION BY user_pseudo_id, ga_session_id ORDER BY event_timestamp asc) -------------------------------------------------------------------------------- /models/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | 6 | - name: dedup_events 7 | description: "Source table for other models. Takes all data from `events_` and (if available) `events_intraday_` tables and deduplicates by user_pseudo_id, event_name and event_timestamp." 8 | 9 | - name: pageviews_proc 10 | description: "Flattening the `pageviews` event into rows. All columns named as they appear in the `events_` table." 11 | 12 | - name: user_page_paths 13 | description: "Supports page path sequence reporting. Tags each pageview event from `pageviews_proc` with a `session_event_order`." 14 | columns: 15 | - name: session_event_order 16 | description: "Pageview number for the session" 17 | tests: 18 | - not_null 19 | - name: user_type 20 | description: "New vs existing, based on `user_first_touch_timestamp`." 21 | -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: fishtown-analytics/dbt_utils 3 | version: 0.4.0 -------------------------------------------------------------------------------- /snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/snapshots/.gitkeep -------------------------------------------------------------------------------- /tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-is-for-losers/ga4-bigquery-starter/4e38153440ac30b86b9b1a0a31c1f6893e1dbbb5/tests/.gitkeep --------------------------------------------------------------------------------