├── .github
    └── CODEOWNERS
├── .gitignore
├── License.md
├── README.md
├── bq-incrementals
    ├── .gitignore
    ├── data
    │   ├── seed_cov_bond.csv
    │   ├── seed_defs_op.csv
    │   └── seed_oss.csv
    ├── dbt_project.yml
    ├── macros
    │   └── get_last_3d.sql
    ├── models
    │   ├── insert_overwrite_dynamic
    │   │   ├── iod_aggregated.sql
    │   │   ├── iod_enriched.sql
    │   │   └── iod_goldilocks.sql
    │   ├── insert_overwrite_static
    │   │   ├── ios_aggregated.sql
    │   │   ├── ios_enriched.sql
    │   │   └── ios_goldilocks.sql
    │   ├── merge
    │   │   ├── m_aggregated.sql
    │   │   ├── m_enriched.sql
    │   │   └── m_goldilocks.sql
    │   ├── merge_clustered
    │   │   ├── mc_aggregated.sql
    │   │   ├── mc_enriched.sql
    │   │   └── mc_goldilocks.sql
    │   ├── pages_of_interest.sql
    │   └── wikipedia_source.yml
    └── packages.yml
├── business-hours
    ├── .gitignore
    ├── README.md
    ├── analysis
    │   └── .gitkeep
    ├── dbt_project.yml
    ├── macros
    │   ├── .gitkeep
    │   ├── attempt-1-macros
    │   │   └── business_time_functions.sql
    │   └── attempt-2-subquery
    │   │   └── business_time_functions.sql
    ├── models
    │   ├── all_business_hours.sql
    │   └── fct_support_tickets.sql
    ├── packages.yml
    ├── seeds
    │   ├── .gitkeep
    │   └── sample_tickets.csv
    ├── snapshots
    │   └── .gitkeep
    └── tests
    │   └── .gitkeep
├── dynamic-data-masking-redshift
    ├── README.md
    ├── data
    │   └── employees.csv
    ├── dbt_project.yml
    ├── macros
    │   └── apply_data_masking.sql
    ├── models
    │   └── employees_with_masking.sql
    └── packages.yml
├── insert_by_period
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── dbt_project.yml
    ├── integration_tests
    │   ├── .gitignore
    │   ├── ci
    │   │   └── sample.profiles.yml
    │   ├── data
    │   │   ├── data_insert_by_period.csv
    │   │   └── data_insert_by_period_overwrite.csv
    │   ├── dbt_project.yml
    │   ├── macros
    │   │   ├── .gitkeep
    │   │   ├── assert_equal_values.sql
    │   │   ├── limit_zero.sql
    │   │   └── tests.sql
    │   ├── models
    │   │   ├── expected_insert_by_period.sql
    │   │   ├── expected_insert_by_period_overwrite.sql
    │   │   ├── schema.yml
    │   │   └── test_insert_by_period.sql
    │   └── packages.yml
    └── macros
    │   ├── create_relation_for_insert_by_period.sql
    │   ├── get_period_boundaries.sql
    │   ├── get_period_sql.sql
    │   ├── get_rows_inserted.sql
    │   └── insert_by_period_materialization.sql
├── lambda-views
    ├── .gitignore
    ├── README.md
    ├── data
    │   └── .gitkeep
    ├── dbt_project.yml
    ├── etc
    │   ├── option-1-dag.png
    │   └── option-2-dag.png
    ├── macros
    │   ├── .gitkeep
    │   ├── lambda
    │   │   ├── lambda_filter.sql
    │   │   └── lambda_union.sql
    │   └── models
    │   │   ├── page_views_model_sql.sql
    │   │   └── sessions_model_sql.sql
    └── models
    │   ├── option_1
    │       ├── page_views.sql
    │       ├── page_views__lambda_current.sql
    │       ├── page_views__lambda_historical.sql
    │       ├── sessions.sql
    │       ├── sessions__lambda_current.sql
    │       └── sessions__lambda_historical.sql
    │   ├── option_2
    │       ├── page_views.sql
    │       ├── page_views__lambda_historical.sql
    │       ├── sessions.sql
    │       └── sessions__lambda_historical.sql
    │   ├── sources.yml
    │   └── thought_experiment
    │       ├── page_views.sql
    │       └── sessions.sql
├── materialized-views
    ├── .gitignore
    ├── README.md
    ├── dbt_project.yml
    ├── integration_tests
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── dbt_project.yml
    │   ├── macros
    │   │   └── overrides.sql
    │   ├── models
    │   │   ├── base_tbl.sql
    │   │   ├── schema.yml
    │   │   ├── test_mv_auto.sql
    │   │   └── test_mv_manual.sql
    │   ├── packages.yml
    │   └── seed
    │   │   ├── expected.csv
    │   │   ├── seed.csv
    │   │   └── seed_update.csv
    └── macros
    │   ├── bigquery
    │       ├── adapters.sql
    │       └── materialized_view.sql
    │   ├── default
    │       ├── adapters.sql
    │       └── materialized_view.sql
    │   ├── postgres
    │       └── adapters.sql
    │   ├── redshift
    │       └── adapters.sql
    │   └── snowflake
    │       ├── adapters.sql
    │       └── materialized_view.sql
├── read-external-iceberg
    ├── .gitignore
    ├── README.md
    ├── dbt_project.yml
    ├── macros
    │   └── plugins
    │   │   └── snowflake
    │   │       ├── create_iceberg_source.sql
    │   │       └── get_external_build_plan.sql
    └── packages.yml
└── snapshot-testing
    ├── .gitignore
    ├── README.md
    ├── data
        ├── .gitkeep
        └── fct_orders.csv
    ├── dbt_project.yml
    ├── macros
        ├── .gitkeep
        ├── historic_revenue_snapshot_cleanup.sql
        └── test_is_null.sql
    ├── snapshots
        ├── .gitkeep
        ├── historic_revenue_snapshot.sql
        └── schema.yml
    └── tests
        └── .gitkeep


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @dbt-labs/core-team
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | dbt_modules/
3 | logs/


--------------------------------------------------------------------------------
/License.md:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction,
 10 | and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by
 13 | the copyright owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all
 16 | other entities that control, are controlled by, or are under common
 17 | control with that entity. For the purposes of this definition,
 18 | "control" means (i) the power, direct or indirect, to cause the
 19 | direction or management of such entity, whether by contract or
 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 | outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 | "You" (or "Your") shall mean an individual or Legal Entity
 24 | exercising permissions granted by this License.
 25 | 
 26 | "Source" form shall mean the preferred form for making modifications,
 27 | including but not limited to software source code, documentation
 28 | source, and configuration files.
 29 | 
 30 | "Object" form shall mean any form resulting from mechanical
 31 | transformation or translation of a Source form, including but
 32 | not limited to compiled object code, generated documentation,
 33 | and conversions to other media types.
 34 | 
 35 | "Work" shall mean the work of authorship, whether in Source or
 36 | Object form, made available under the License, as indicated by a
 37 | copyright notice that is included in or attached to the work
 38 | (an example is provided in the Appendix below).
 39 | 
 40 | "Derivative Works" shall mean any work, whether in Source or Object
 41 | form, that is based on (or derived from) the Work and for which the
 42 | editorial revisions, annotations, elaborations, or other modifications
 43 | represent, as a whole, an original work of authorship. For the purposes
 44 | of this License, Derivative Works shall not include works that remain
 45 | separable from, or merely link (or bind by name) to the interfaces of,
 46 | the Work and Derivative Works thereof.
 47 | 
 48 | "Contribution" shall mean any work of authorship, including
 49 | the original version of the Work and any modifications or additions
 50 | to that Work or Derivative Works thereof, that is intentionally
 51 | submitted to Licensor for inclusion in the Work by the copyright owner
 52 | or by an individual or Legal Entity authorized to submit on behalf of
 53 | the copyright owner. For the purposes of this definition, "submitted"
 54 | means any form of electronic, verbal, or written communication sent
 55 | to the Licensor or its representatives, including but not limited to
 56 | communication on electronic mailing lists, source code control systems,
 57 | and issue tracking systems that are managed by, or on behalf of, the
 58 | Licensor for the purpose of discussing and improving the Work, but
 59 | excluding communication that is conspicuously marked or otherwise
 60 | designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 | "Contributor" shall mean Licensor and any individual or Legal Entity
 63 | on behalf of whom a Contribution has been received by Licensor and
 64 | subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 | this License, each Contributor hereby grants to You a perpetual,
 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 | copyright license to reproduce, prepare Derivative Works of,
 70 | publicly display, publicly perform, sublicense, and distribute the
 71 | Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 | this License, each Contributor hereby grants to You a perpetual,
 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 | (except as stated in this section) patent license to make, have made,
 77 | use, offer to sell, sell, import, and otherwise transfer the Work,
 78 | where such license applies only to those patent claims licensable
 79 | by such Contributor that are necessarily infringed by their
 80 | Contribution(s) alone or by combination of their Contribution(s)
 81 | with the Work to which such Contribution(s) was submitted. If You
 82 | institute patent litigation against any entity (including a
 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 | or a Contribution incorporated within the Work constitutes direct
 85 | or contributory patent infringement, then any patent licenses
 86 | granted to You under this License for that Work shall terminate
 87 | as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 | Work or Derivative Works thereof in any medium, with or without
 91 | modifications, and in Source or Object form, provided that You
 92 | meet the following conditions:
 93 | 
 94 | (a) You must give any other recipients of the Work or
 95 | Derivative Works a copy of this License; and
 96 | 
 97 | (b) You must cause any modified files to carry prominent notices
 98 | stating that You changed the files; and
 99 | 
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 | 
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 | 
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!)  The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 | 
189 | Copyright {yyyy} {name of copyright owner}
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dbt Labs: Experimental Features
 2 | 
 3 | This repository includes projects that extend of existing dbt features, experiment with new database features not yet natively supported in dbt, or otherwise demonstrate cool stuff you can do with just Jinja macros in your project—no forks necessary.
 4 | 
 5 | In all cases, these are _demo_ projects, not intended as ready-to-use packages. If you want to use code from this repository in your own project, you're more than welcome to clone and install as a [local package](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/#local-packages), or just copy-paste :)
 6 | 
 7 | ## [BigQuery Incremental Strategies](bq-incrementals)
 8 | 
 9 | * These features shipped in dbt v0.16.0! See [changelog](https://github.com/fishtown-analytics/dbt/blob/dev/octavius-catto/CHANGELOG.md#features-4) and [docs](https://docs.getdbt.com/docs/building-a-dbt-project/building-models/bigquery-configs/#merge-behavior-incremental-models)
10 | * The [project here](bq_incrementals) provided the substrate for a [discourse post](https://discourse.getdbt.com/t/981) benchmarking different incremental strategies on BigQuery
11 | 
12 | ## [Materialized views](materialized-views)
13 | 
14 | This project adds support for `materialized_view` as a new dbt materialization. It includes implementations for Postgres, Redshift, Snowflake, and BigQuery, through a mix of new macros and overrides of built-in dbt macros. See the [project README](materialized-views/README.md) for details. For another take on dbt + materialized views, check out the [dbt-materialize](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize#dbt-materialize) plugin.
15 | 
16 | ## [Lambda views](lambda-views)
17 | This lab demonstrates a number of options for lambda views, as discussed in this [discourse article](https://discourse.getdbt.com/t/how-to-create-near-real-time-models-with-just-dbt-sql/1457/3). Additional details about the various approaches can be found in at [lambda-views/README.md](lambda-views/README.md).
18 | 
19 | ## [Snapshot testing](snapshot-testing)
20 | This lab demonstrates how to use snapshots to detect dbt model regressions, as discussed in this [discourse article](https://discourse.getdbt.com/t/build-snapshot-based-tests-to-detect-regressions-in-historic-data/1478). Additional details on how to test this code for yourself can be found at [snapshot-testing/README.md](snapshot-testing/README.md).
21 | 
22 | 
23 | ## [Dynamic data masking on Redshift](dynamic-data-masking-redshift)
24 | This lab demonstrates how to implement dynamic data masking on Redshift.
25 | 
26 | Check out [this discourse article](https://discourse.getdbt.com/t/how-to-implement-dynamic-data-masking-on-redshift/2043) for more information.
27 | 
28 | ## [Time on Task](business_hours)
29 | 
30 | This lab demonstrates two strategies for measuring Time on Task. 
31 | 
32 | Check out [this devhub article](https://docs.getdbt.com/blog/measuring-business-hours-sql-time-on-task) for more information.
33 | 
34 | ## Resources:
35 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
36 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
37 | - Join the [chat](http://community.getdbt.com/) on Slack for live discussions and support
38 | - Find [dbt events](https://events.getdbt.com) near you
39 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
40 | 


--------------------------------------------------------------------------------
/bq-incrementals/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/bq-incrementals/data/seed_cov_bond.csv:
--------------------------------------------------------------------------------
 1 | lang,title
 2 | af,"Kovalente_binding"
 3 | ar,"رابطة_تساهمية"
 4 | ast,"Enllaz_covalente"
 5 | az,"Kovalent_əlaqə"
 6 | be,"Кавалентная_сувязь"
 7 | bg,"Ковалентна_връзка"
 8 | bn,"সমযোজী_বন্ধন"
 9 | bs,"Kovalentna_veza"
10 | ca,"Enllaç_covalent"
11 | ckb,"بەندی_کوالانسی"
12 | cs,"Kovalentní_vazba"
13 | cy,"Bond_cofalent"
14 | da,"Kovalent_binding"
15 | de,"Kovalente_Bindung"
16 | el,"Ομοιοπολικός_δεσμός"
17 | en,"Covalent_bond"
18 | eo,"Kovalenta_ligo"
19 | es,"Enlace_covalente"
20 | et,"Kovalentne_side"
21 | eu,"Lotura_kobalente"
22 | fa,"پیوند_کووالانسی"
23 | fi,"Kovalenttinen_sidos"
24 | fr,"Liaison_covalente"
25 | gl,"Enlace_covalente"
26 | gv,"Kiangley_cofioosagh"
27 | he,"קשר_קוולנטי"
28 | hi,"सहसंयोजी_आबंध"
29 | hr,"Kovalentna_veza"
30 | ht,"Lyezon_kovalan"
31 | hu,"Kovalens_kötés"
32 | id,"Ikatan_kovalen"
33 | it,"Legame_covalente"
34 | ja,"共有結合"
35 | jv,"Ikatan_Kovalen"
36 | kab,"Turza_tattekkant"
37 | ka,"კოვალენტური_ბმა"
38 | kk,"Коваленттік_байланыс"
39 | km,"សម្ព័ន្ធកូវ៉ាឡង់"
40 | ko,"공유_결합"
41 | lt,"Kovalentinis_ryšys"
42 | lv,"Kovalentā_saite"
43 | mk,"Ковалентна_врска"
44 | ml,"സഹസംയോജകബന്ധനം"
45 | mr,"सहसंयुज_बंध"
46 | ms,"Ikatan_kovalen"
47 | nl,"Covalente_binding"
48 | nn,"Kovalent_binding"
49 | no,"Kovalent_binding"
50 | oc,"Ligam_covalent"
51 | pa,"ਸਹਿਯੋਜਕੀ_ਜੋੜ"
52 | pl,"Wiązanie_kowalencyjne"
53 | pnb,"کوویلنٹ_جوڑ"
54 | pt,"Ligação_covalente"
55 | ro,"Legătură_covalentă"
56 | rue,"Ковалентна_вязба"
57 | ru,"Ковалентная_связь"
58 | sco,"Covalent_bond"
59 | sh,"Kovalentna_veza"
60 | simple,"Covalent_bond"
61 | si,"සහසංයුජ_බන්ධනය"
62 | sk,"Kovalentná_väzba"
63 | sl,"Kovalentna_vez"
64 | sq,"Lidhja_kovalente"
65 | sr,"Ковалентна_веза"
66 | su,"Beungkeut_kovalén"
67 | sv,"Kovalent_bindning"
68 | ta,"சகப்_பிணைப்பு"
69 | th,"พันธะโคเวเลนต์"
70 | tr,"Kovalent_bağ"
71 | tt,"Ковалент_бәйләнеш"
72 | uk,"Ковалентний_зв'язок"
73 | ur,"کوویلنٹ_بونڈ"
74 | vi,"Liên_kết_cộng_hóa_trị"
75 | wuu,"共价键"
76 | yi,"קאוואלענטער_בונד"
77 | yo,"Ìsopọ̀_àjọfagbáradìmú"
78 | zh_classical,"共價鍵"
79 | zh_min_nan,"Kiōng-iú_kiat-ha̍p"
80 | zh_yue,"共價鍵"
81 | zh,"共价键"


--------------------------------------------------------------------------------
/bq-incrementals/data/seed_defs_op.csv:
--------------------------------------------------------------------------------
 1 | lang,title
 2 | be,"Пражскія_дэфенестрацыі"
 3 | bg,"Пражка_дефенестрация"
 4 | br,"Difrenestriñ_Praha"
 5 | ca,"Defenestració_de_Praga"
 6 | en,"Defenestrations_of_Prague"
 7 | es,"Defenestraciones_de_Praga"
 8 | fr,"Défenestration_de_Prague"
 9 | gl,"Defenestracións_de_Praga"
10 | hr,"Praška_defenestracija"
11 | id,"Pelemparan_di_Praha"
12 | ja,"プラハ窓外放出事件"
13 | ka,"პრაღის_დეფენესტრაციები"
14 | ko,"프라하_창밖_투척사건"
15 | la,"Defenestratio_Pragensis"
16 | ms,"Defenestratio_Pragensis"
17 | no,"Defenestrasjonene_i_Praha"
18 | pl,"Defenestracja_praska"
19 | pt,"Defenestrações_de_Praga"
20 | ro,"Defenestrațiile_de_la_Praga"
21 | ru,"Пражские_дефенестрации"
22 | scn,"Difinistrazzioni_di_Praga"
23 | sh,"Praška_defenestracija"
24 | sl,"Praška_defenestracija"
25 | sr,"Прашка_дефенестрација"
26 | sv,"Defenestrationerna_i_Prag"
27 | uk,"Празька_дефенестрація"
28 | vec,"Desfenestrazion_de_Praga"
29 | zh,"布拉格拋窗事件"
30 | 


--------------------------------------------------------------------------------
/bq-incrementals/data/seed_oss.csv:
--------------------------------------------------------------------------------
 1 | lang,title
 2 | af,"Oopbronsagteware"
 3 | ar,"برمجيات_مفتوحة_المصدر"
 4 | ast,"Software_de_códigu_abiertu"
 5 | bg,"Софтуер_с_отворен_код"
 6 | bn,"উন্মুক্ত-উৎসের_সফটওয়্যার"
 7 | bs,"Softver_otvorenog_koda"
 8 | ckb,"نەرمامێری_سەرچاوە_کراوە"
 9 | cs,"Otevřený_software"
10 | el,"Λογισμικό_ανοικτού_κώδικα"
11 | en,"Open-source_software"
12 | eo,"Malfermkoda_programaro"
13 | es,"Software_de_código_abierto"
14 | et,"Avatud_lähtekoodiga_tarkvara"
15 | fa,"نرم‌افزار_متن‌باز"
16 | fy,"Open-source_software"
17 | gl,"Software_de_código_aberto"
18 | he,"קוד_פתוח"
19 | hi,"मुक्त_स्रोत_सॉफ्टवेयर"
20 | hu,"Nyílt_forráskódú_szoftver"
21 | id,"Perangkat_lunak_sumber_terbuka"
22 | is,"Opinn_hugbúnaður"
23 | ja,"オープンソースソフトウェア"
24 | ko,"오픈_소스_소프트웨어"
25 | ky,"Ачык_булактуу_програмдык_камсыздоо"
26 | la,"Programma_fontium_apertorum"
27 | lt,"Atvirojo_kodo_programa"
28 | ml,"ഓപ്പൺ_സോഴ്സ്_സോഫ്റ്റ്‌വെയർ"
29 | mr,"मुक्त_स्रोत"
30 | ms,"Perisian_sumber_terbuka"
31 | nl,"Opensourcesoftware"
32 | or,"ଓପନ-ସୋର୍ସ_ସଫ୍ଟୱେର"
33 | pa,"ਖੁੱਲ੍ਹਾ-ਸਰੋਤ_ਸਾਫ਼ਟਵੇਅਰ"
34 | pl,"Otwarte_oprogramowanie"
35 | pt,"Software_de_código_aberto"
36 | ro,"Software_cu_sursă_deschisă"
37 | ru,"Открытое_программное_обеспечениеs"
38 | cn,"Open_source"
39 | sh,"Otvoreni_softver"
40 | si,"විවෘත_කේත_මෘදුකාංග"
41 | sk,"Open-source_softvér"
42 | sl,"Odprtokodna_programska_oprema"
43 | sq,"Softuerët_me_burim_të_hapur"
44 | sr,"Softver_otvorenog_koda"
45 | ta,"திறந்த_மூல_மென்பொருள்te	ఓపెన్_సోర్సు_సాఫ్ట్​వేర్"
46 | th,"ซอฟต์แวร์โอเพนซอร์ซ"
47 | uk,"Відкрите_програмне_забезпечення"
48 | uz,"Ochiq_manbali_dastur"
49 | vi,"Phần_mềm_nguồn_mở"
50 | wuu,"开源软件"
51 | zh_min_nan,"Khai-goân_nńg-thé"
52 | zh,"开源软件"
53 | 


--------------------------------------------------------------------------------
/bq-incrementals/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: 'bq_incremental_testing'
 3 | version: '0.1.0'
 4 | config-version: 2
 5 | 
 6 | profile: 'garage-bigquery'
 7 | 
 8 | source-paths: ["models"]
 9 | analysis-paths: ["analysis"] 
10 | test-paths: ["tests"]
11 | data-paths: ["data"]
12 | macro-paths: ["macros"]
13 | 
14 | target-path: "target"
15 | clean-targets:
16 |     - "target"
17 |     - "dbt_modules"
18 | 
19 | require-dbt-version: ">=0.16.0"
20 | 
21 | models:
22 |   vars:
23 |     old: 3
24 |     new: 1
25 | 


--------------------------------------------------------------------------------
/bq-incrementals/macros/get_last_3d.sql:
--------------------------------------------------------------------------------
 1 | {% macro get_last_3d() %}
 2 | 
 3 |     {% set partitions = [] %}
 4 | 
 5 |     {% set max_d_ago = var('new') + 1 %}
 6 | 
 7 |     {% for i in range(1, max_d_ago) %}
 8 |         {% set this_partition %} date_sub(current_date, interval -{{i}} day) {% endset %}
 9 |         {% do partitions.append(this_partition) %}
10 |     {% endfor %}
11 | 
12 |     {% do return(partitions) %}
13 | 
14 | {% endmacro %}
15 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_dynamic/iod_aggregated.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     incremental_strategy = 'insert_overwrite'
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(date(_dbt_max_partition), interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
35 |         
36 |     from page_views
37 |     
38 | ),
39 | 
40 | tagged as (
41 |     
42 |     select * from parsed
43 |     left join pages_of_interest using (title, lang)
44 | 
45 | ),
46 | 
47 | agg as (
48 |     
49 |     select
50 |     
51 |         datehour as date_hour,
52 |         subject,
53 |         lang,
54 |         sum(views) as total_views
55 |         
56 |     from tagged
57 |     group by 1,2,3
58 |     
59 | ),
60 | 
61 | final as (
62 |     
63 |     select
64 |     
65 |         {{ dbt_utils.surrogate_key('date_hour', 'subject', 'lang') }} as id,
66 |         *
67 |     
68 |     from agg
69 |     
70 | )
71 | 
72 | select * from final
73 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_dynamic/iod_enriched.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     incremental_strategy = 'insert_overwrite'
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(date(_dbt_max_partition), interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
35 |         
36 |     from page_views
37 |     
38 | ),
39 | 
40 | tagged as (
41 |     
42 |     select * from parsed
43 |     left join pages_of_interest using (title, lang)
44 | 
45 | ),
46 | 
47 | agg as (
48 |     
49 |     select
50 |     
51 |         datehour as date_hour,
52 |         lang,
53 |         title,
54 |         subject,
55 |         sum(views) as views
56 |         
57 |     from tagged
58 |     group by 1,2,3,4
59 |     
60 | ),
61 | 
62 | final as (
63 |     
64 |     select
65 |     
66 |         {{ dbt_utils.surrogate_key('date_hour', 'lang', 'title') }} as id,
67 |         *
68 |     
69 |     from agg
70 |     
71 | )
72 | 
73 | select * from final
74 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_dynamic/iod_goldilocks.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_day', 'data_type': 'date'},
 5 |     incremental_strategy = 'insert_overwrite'
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(_dbt_max_partition, interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         date(datehour) as date_day,
35 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
36 |         
37 |     from page_views
38 |     
39 | ),
40 | 
41 | tagged as (
42 |     
43 |     select * from parsed
44 |     left join pages_of_interest using (title, lang)
45 | 
46 | ),
47 | 
48 | agg as (
49 |     
50 |     select
51 |     
52 |         date_day,
53 |         lang,
54 |         title,
55 |         subject,
56 |         sum(views) as views
57 |         
58 |     from tagged
59 |     group by 1,2,3,4
60 |     
61 | ),
62 | 
63 | final as (
64 |     
65 |     select
66 |     
67 |         {{ dbt_utils.surrogate_key('date_day', 'lang', 'title') }} as id,
68 |         *
69 |     
70 |     from agg
71 |     
72 | )
73 | 
74 | select * from final
75 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_static/ios_aggregated.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     incremental_strategy = 'insert_overwrite',
 6 |     partitions = get_last_3d()
 7 | )}}
 8 | 
 9 | with page_views as (
10 |     
11 |     select * from {{source('wikipedia', 'pageviews_2020')}}
12 |     
13 |     {% if is_incremental() %}
14 |         -- always rebuild up to the current day
15 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
16 |           and date(datehour) < current_date
17 |     {% else %}
18 |         -- this source table requires a partition filter regardless
19 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
20 |           and date(datehour) < current_date
21 |     {% endif %}
22 |     
23 | ),
24 | 
25 | pages_of_interest as (
26 |     
27 |     select * from {{ref('pages_of_interest')}}
28 |     
29 | ),
30 | 
31 | parsed as (
32 |     
33 |     select *,
34 |     
35 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
36 |         
37 |     from page_views
38 |     
39 | ),
40 | 
41 | tagged as (
42 |     
43 |     select * from parsed
44 |     left join pages_of_interest using (title, lang)
45 | 
46 | ),
47 | 
48 | agg as (
49 |     
50 |     select
51 |     
52 |         datehour as date_hour,
53 |         subject,
54 |         lang,
55 |         sum(views) as total_views
56 |         
57 |     from tagged
58 |     group by 1,2,3
59 |     
60 | ),
61 | 
62 | final as (
63 |     
64 |     select
65 |     
66 |         {{ dbt_utils.surrogate_key('date_hour', 'subject', 'lang') }} as id,
67 |         *
68 |     
69 |     from agg
70 |     
71 | )
72 | 
73 | select * from final
74 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_static/ios_enriched.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     incremental_strategy = 'insert_overwrite',
 6 |     partitions = get_last_3d()
 7 | )}}
 8 | 
 9 | with page_views as (
10 |     
11 |     select * from {{source('wikipedia', 'pageviews_2020')}}
12 |     
13 |     {% if is_incremental() %}
14 |         -- always rebuild up to the current day
15 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
16 |           and date(datehour) < current_date
17 |     {% else %}
18 |         -- this source table requires a partition filter regardless
19 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
20 |           and date(datehour) < current_date
21 |     {% endif %}
22 |     
23 | ),
24 | 
25 | pages_of_interest as (
26 |     
27 |     select * from {{ref('pages_of_interest')}}
28 |     
29 | ),
30 | 
31 | parsed as (
32 |     
33 |     select *,
34 |     
35 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
36 |         
37 |     from page_views
38 |     
39 | ),
40 | 
41 | tagged as (
42 |     
43 |     select * from parsed
44 |     left join pages_of_interest using (title, lang)
45 | 
46 | ),
47 | 
48 | agg as (
49 |     
50 |     select
51 |     
52 |         datehour as date_hour,
53 |         lang,
54 |         title,
55 |         subject,
56 |         sum(views) as views
57 |         
58 |     from tagged
59 |     group by 1,2,3,4
60 |     
61 | ),
62 | 
63 | final as (
64 |     
65 |     select
66 |     
67 |         {{ dbt_utils.surrogate_key('date_hour', 'lang', 'title') }} as id,
68 |         *
69 |     
70 |     from agg
71 |     
72 | )
73 | 
74 | select * from final
75 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/insert_overwrite_static/ios_goldilocks.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_day', 'data_type': 'date'},
 5 |     incremental_strategy = 'insert_overwrite',
 6 |     partitions = get_last_3d()
 7 | )}}
 8 | 
 9 | with page_views as (
10 |     
11 |     select * from {{source('wikipedia', 'pageviews_2020')}}
12 |     
13 |     {% if is_incremental() %}
14 |         -- always rebuild up to the current day
15 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
16 |           and date(datehour) < current_date
17 |     {% else %}
18 |         -- this source table requires a partition filter regardless
19 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
20 |           and date(datehour) < current_date
21 |     {% endif %}
22 |     
23 | ),
24 | 
25 | pages_of_interest as (
26 |     
27 |     select * from {{ref('pages_of_interest')}}
28 |     
29 | ),
30 | 
31 | parsed as (
32 |     
33 |     select *,
34 |     
35 |         date(datehour) as date_day,
36 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
37 |         
38 |     from page_views
39 |     
40 | ),
41 | 
42 | tagged as (
43 |     
44 |     select * from parsed
45 |     left join pages_of_interest using (title, lang)
46 | 
47 | ),
48 | 
49 | agg as (
50 |     
51 |     select
52 |     
53 |         date_day,
54 |         lang,
55 |         title,
56 |         subject,
57 |         sum(views) as views
58 |         
59 |     from tagged
60 |     group by 1,2,3,4
61 |     
62 | ),
63 | 
64 | final as (
65 |     
66 |     select
67 |     
68 |         {{ dbt_utils.surrogate_key('date_day', 'lang', 'title') }} as id,
69 |         *
70 |     
71 |     from agg
72 |     
73 | )
74 | 
75 | select * from final
76 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge/m_aggregated.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'}
 5 | )}}
 6 | 
 7 | with page_views as (
 8 |     
 9 |     select * from {{source('wikipedia', 'pageviews_2020')}}
10 |     
11 |     {% if is_incremental() %}
12 |         -- always rebuild up to the current day
13 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
14 |           and date(datehour) < current_date
15 |     {% else %}
16 |         -- this source table requires a partition filter regardless
17 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
18 |           and date(datehour) < current_date
19 |     {% endif %}
20 |     
21 | ),
22 | 
23 | pages_of_interest as (
24 |     
25 |     select * from {{ref('pages_of_interest')}}
26 |     
27 | ),
28 | 
29 | parsed as (
30 |     
31 |     select *,
32 |     
33 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
34 |         
35 |     from page_views
36 |     
37 | ),
38 | 
39 | tagged as (
40 |     
41 |     select * from parsed
42 |     left join pages_of_interest using (title, lang)
43 | 
44 | ),
45 | 
46 | agg as (
47 |     
48 |     select
49 |     
50 |         datehour as date_hour,
51 |         subject,
52 |         lang,
53 |         sum(views) as total_views
54 |         
55 |     from tagged
56 |     group by 1,2,3
57 |     
58 | ),
59 | 
60 | final as (
61 |     
62 |     select
63 |     
64 |         {{ dbt_utils.surrogate_key('date_hour', 'subject', 'lang') }} as id,
65 |         *
66 |     
67 |     from agg
68 |     
69 | )
70 | 
71 | select * from final
72 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge/m_enriched.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'}
 5 | )}}
 6 | 
 7 | with page_views as (
 8 |     
 9 |     select * from {{source('wikipedia', 'pageviews_2020')}}
10 |     
11 |     {% if is_incremental() %}
12 |         -- always rebuild up to the current day
13 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
14 |           and date(datehour) < current_date
15 |     {% else %}
16 |         -- this source table requires a partition filter regardless
17 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
18 |           and date(datehour) < current_date
19 |     {% endif %}
20 |     
21 | ),
22 | 
23 | pages_of_interest as (
24 |     
25 |     select * from {{ref('pages_of_interest')}}
26 |     
27 | ),
28 | 
29 | parsed as (
30 |     
31 |     select *,
32 |     
33 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
34 |         
35 |     from page_views
36 |     
37 | ),
38 | 
39 | tagged as (
40 |     
41 |     select * from parsed
42 |     left join pages_of_interest using (title, lang)
43 | 
44 | ),
45 | 
46 | agg as (
47 |     
48 |     select
49 |     
50 |         datehour as date_hour,
51 |         lang,
52 |         title,
53 |         subject,
54 |         sum(views) as views
55 |         
56 |     from tagged
57 |     group by 1,2,3,4
58 |     
59 | ),
60 | 
61 | final as (
62 |     
63 |     select
64 |     
65 |         {{ dbt_utils.surrogate_key('date_hour', 'lang', 'title') }} as id,
66 |         *
67 |     
68 |     from agg
69 |     
70 | )
71 | 
72 | select * from final
73 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge/m_goldilocks.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_day', 'data_type': 'date'}
 5 | )}}
 6 | 
 7 | with page_views as (
 8 |     
 9 |     select * from {{source('wikipedia', 'pageviews_2020')}}
10 |     
11 |     {% if is_incremental() %}
12 |         -- always rebuild up to the current day
13 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
14 |           and date(datehour) < current_date
15 |     {% else %}
16 |         -- this source table requires a partition filter regardless
17 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
18 |           and date(datehour) < current_date
19 |     {% endif %}
20 |     
21 | ),
22 | 
23 | pages_of_interest as (
24 |     
25 |     select * from {{ref('pages_of_interest')}}
26 |     
27 | ),
28 | 
29 | parsed as (
30 |     
31 |     select *,
32 |     
33 |         date(datehour) as date_day,
34 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
35 |         
36 |     from page_views
37 |     
38 | ),
39 | 
40 | tagged as (
41 |     
42 |     select * from parsed
43 |     left join pages_of_interest using (title, lang)
44 | 
45 | ),
46 | 
47 | agg as (
48 |     
49 |     select
50 |     
51 |         date_day,
52 |         lang,
53 |         title,
54 |         subject,
55 |         sum(views) as views
56 |         
57 |     from tagged
58 |     group by 1,2,3,4
59 |     
60 | ),
61 | 
62 | final as (
63 |     
64 |     select
65 |     
66 |         {{ dbt_utils.surrogate_key('date_day', 'lang', 'title') }} as id,
67 |         *
68 |     
69 |     from agg
70 |     
71 | )
72 | 
73 | select * from final
74 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge_clustered/mc_aggregated.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     cluster_by = ['id']
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
35 |         
36 |     from page_views
37 |     
38 | ),
39 | 
40 | tagged as (
41 |     
42 |     select * from parsed
43 |     left join pages_of_interest using (title, lang)
44 | 
45 | ),
46 | 
47 | agg as (
48 |     
49 |     select
50 |     
51 |         datehour as date_hour,
52 |         subject,
53 |         lang,
54 |         sum(views) as total_views
55 |         
56 |     from tagged
57 |     group by 1,2,3
58 |     
59 | ),
60 | 
61 | final as (
62 |     
63 |     select
64 |     
65 |         {{ dbt_utils.surrogate_key('date_hour', 'subject', 'lang') }} as id,
66 |         *
67 |     
68 |     from agg
69 |     
70 | )
71 | 
72 | select * from final
73 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge_clustered/mc_enriched.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_hour', 'data_type': 'timestamp'},
 5 |     cluster_by = ['id']
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
35 |         
36 |     from page_views
37 |     
38 | ),
39 | 
40 | tagged as (
41 |     
42 |     select * from parsed
43 |     left join pages_of_interest using (title, lang)
44 | 
45 | ),
46 | 
47 | agg as (
48 |     
49 |     select
50 |     
51 |         datehour as date_hour,
52 |         lang,
53 |         title,
54 |         subject,
55 |         sum(views) as views
56 |         
57 |     from tagged
58 |     group by 1,2,3,4
59 |     
60 | ),
61 | 
62 | final as (
63 |     
64 |     select
65 |     
66 |         {{ dbt_utils.surrogate_key('date_hour', 'lang', 'title') }} as id,
67 |         *
68 |     
69 |     from agg
70 |     
71 | )
72 | 
73 | select * from final
74 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/merge_clustered/mc_goldilocks.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id',
 4 |     partition_by = {'field': 'date_day', 'data_type': 'date'},
 5 |     cluster_by = ['id']
 6 | )}}
 7 | 
 8 | with page_views as (
 9 |     
10 |     select * from {{source('wikipedia', 'pageviews_2020')}}
11 |     
12 |     {% if is_incremental() %}
13 |         -- always rebuild up to the current day
14 |         where date(datehour) >= date_sub(current_date, interval ({{var('new')}}) day)
15 |           and date(datehour) < current_date
16 |     {% else %}
17 |         -- this source table requires a partition filter regardless
18 |         where date(datehour) >= date_sub(current_date, interval ({{var('old')}}) day)
19 |           and date(datehour) < current_date
20 |     {% endif %}
21 |     
22 | ),
23 | 
24 | pages_of_interest as (
25 |     
26 |     select * from {{ref('pages_of_interest')}}
27 |     
28 | ),
29 | 
30 | parsed as (
31 |     
32 |     select *,
33 |     
34 |         date(datehour) as date_day,
35 |         replace(split(wiki, '.')[offset(0)], '-', '_') as lang
36 |         
37 |     from page_views
38 |     
39 | ),
40 | 
41 | tagged as (
42 |     
43 |     select * from parsed
44 |     left join pages_of_interest using (title, lang)
45 | 
46 | ),
47 | 
48 | agg as (
49 |     
50 |     select
51 |     
52 |         date_day,
53 |         lang,
54 |         title,
55 |         subject,
56 |         sum(views) as views
57 |         
58 |     from tagged
59 |     group by 1,2,3,4
60 |     
61 | ),
62 | 
63 | final as (
64 |     
65 |     select
66 |     
67 |         {{ dbt_utils.surrogate_key('date_day', 'lang', 'title') }} as id,
68 |         *
69 |     
70 |     from agg
71 |     
72 | )
73 | 
74 | select * from final
75 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/pages_of_interest.sql:
--------------------------------------------------------------------------------
 1 | with unioned as (
 2 |     
 3 |     {{ dbt_utils.union_relations([
 4 |         ref('seed_oss'),
 5 |         ref('seed_defs_op'),
 6 |         ref('seed_cov_bond')
 7 |     ]) }}
 8 |     
 9 | ),
10 | 
11 | by_subject as (
12 |     
13 |     select *,
14 |     
15 |         max(case when lang = 'en' then
16 |             lower(replace(title, '-', '_'))
17 |             end) over (partition by _dbt_source_relation) as subject
18 |         
19 |     from unioned
20 |     
21 | )
22 | 
23 | select * from by_subject
24 | 


--------------------------------------------------------------------------------
/bq-incrementals/models/wikipedia_source.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | sources:
4 |   - name: wikipedia
5 |     database: "bigquery-public-data"
6 |     tables:
7 |       - name: pageviews_2020
8 | 


--------------------------------------------------------------------------------
/bq-incrementals/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: fishtown-analytics/dbt_utils
3 |     version: 0.2.5
4 | 


--------------------------------------------------------------------------------
/business-hours/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | dbt_packages/
5 | logs/
6 | 


--------------------------------------------------------------------------------
/business-hours/README.md:
--------------------------------------------------------------------------------
 1 | Welcome to your new dbt project!
 2 | 
 3 | ### Using the starter project
 4 | 
 5 | Try running the following commands:
 6 | - dbt run
 7 | - dbt test
 8 | 
 9 | 
10 | ### Resources:
11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
13 | - Join the [chat](http://slack.getdbt.com/) on Slack for live discussions and support
14 | - Find [dbt events](https://events.getdbt.com) near you
15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
16 | 


--------------------------------------------------------------------------------
/business-hours/analysis/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/business-hours/analysis/.gitkeep


--------------------------------------------------------------------------------
/business-hours/dbt_project.yml:
--------------------------------------------------------------------------------
 1 |         
 2 | name: 'business_hours'
 3 | version: '1.0.0'
 4 | config-version: 2
 5 | profile: 'dbt-learn'
 6 | 
 7 | require-dbt-version: [">=1.0.0"] 
 8 | 
 9 | model-paths: ["models"]
10 | analysis-paths: ["analysis"]
11 | test-paths: ["tests"]
12 | seed-paths: ["seeds"]
13 | macro-paths: ["macros"]
14 | snapshot-paths: ["snapshots"]
15 | 
16 | target-path: "target" 
17 | clean-targets:         
18 |     - "target"
19 |     - "dbt_modules"
20 | 
21 | # these variables will need to be in H24 format!
22 | vars:
23 |   working_hour_start: 8
24 |   working_hour_end: 20
25 |   
26 | 
27 | models:
28 |   business_hours:
29 |     +materialized: view
30 |           
31 | seeds:
32 |   business_hours:
33 |     sample_tickets:
34 |       +column_types:
35 |         id: varchar
36 |         user_id: varchar
37 |         state: varchar
38 |         subject: varchar
39 |         conversation_created_at_business: timestamp_ntz
40 |         first_response_at_business: timestamp_ntz
41 |         first_closed_at_business: timestamp_ntz
42 |         last_closed_at_business: timestamp_ntz


--------------------------------------------------------------------------------
/business-hours/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/business-hours/macros/.gitkeep


--------------------------------------------------------------------------------
/business-hours/macros/attempt-1-macros/business_time_functions.sql:
--------------------------------------------------------------------------------
 1 | {#
 2 |     This file creates two macros - one calculates the number of weekdays between two dates, which may be useful for other projects
 3 | 
 4 |     The second uses the weekday macro to calculate the number of non-working hours between two dates.
 5 |     
 6 |     The third leverages the non-non_business_hours_between macro to generate the duration of business time between two dates
 7 | 
 8 | #}
 9 | 
10 | {% macro weekdays_between(start_date, end_date) %}
11 | 
12 |         datediff('day', {{ start_date }}, {{ end_date }} ) -
13 |         datediff('week', {{ start_date }}, dateadd('day', 1, {{ end_date }} )) -
14 |         datediff('week', {{ start_date }}, {{ end_date }} )
15 | 
16 | {% endmacro %}
17 | 
18 | 
19 | {# non_business_hours_between:
20 | 
21 |     Terms in this macro:
22 |     - weekdays_between:
23 |         returns the number of weekdays between two dates. This is used to evaluate the number of overnights that occur between the two dates.
24 |         i.e. Monday to Wednesday is 2 weekdays, ie two weeknights of non-business time. Friday to Monday evaluates to one overnight (8pm-12am Fri + 12am-8am Monday)
25 |         we multiply by 12 to convert the weekdays between to hours.
26 |     - evaluate weekends:
27 |         in order to compare if a weekend falls in between two dates, we can compare the regular datediff to the weekday datediff.
28 |         the difference is the number of weekend days (example, Friday to Monday, Datediff = 3, weekday = 1, 3-1 = 2)
29 |         muliply the difference by 24 hours per weekend day
30 | 
31 |  #}
32 | 
33 | {% macro non_business_hours_between(start_date, end_date) %}
34 |     {% set non_working_hours = (24 - (  var("working_hour_end")  -  var("working_hour_start") ))  %}
35 |     
36 |         coalesce(
37 |             (( {{ weekdays_between(start_date, end_date) }} ) *  {{ non_working_hours }}  )
38 |                 + ((datediff('day', {{ start_date }}, {{ end_date }} )
39 |                 - ({{ weekdays_between(start_date, end_date) }})
40 |             ) * 24 )::int,
41 |             0
42 |         )
43 | 
44 | {% endmacro %}
45 | 
46 | 
47 | {# 
48 |     
49 |     business_minutes_between:
50 |         This macro leverages the above macros to remove non-business time from the calculation of time durations. 
51 |         
52 |         the basic structure here is:
53 |             (date diff in minutes) - (non-business hours * 60) = business minutes
54 | 
55 |  #}
56 | 
57 | 
58 | {% macro business_minutes_between__1(start_date, end_date) %}
59 | 
60 |         datediff('minute', {{ start_date }}, {{ end_date }} )
61 |             - ( {{ non_business_hours_between( start_date, end_date ) }} * 60 )
62 | 
63 | {% endmacro %}
64 | 


--------------------------------------------------------------------------------
/business-hours/macros/attempt-2-subquery/business_time_functions.sql:
--------------------------------------------------------------------------------
 1 | {# 
 2 | 
 3 | ### PURPOSE ###
 4 | This macro calculates the total working minutes between two timestamps,
 5 | meaning, if a ticket begins on a Friday and then carries into Monday,
 6 | we do not want to count non-working hours (e.g. Saturdays) towards the
 7 | total time to respond/close.
 8 | 
 9 | ## MACROS + INPUTS ##
10 | * Macro 1: working_min_between
11 |     This macro takes the two timestamps and finds the total number
12 |     of working hours between the timestamps and multiples it by 60
13 |     to get the total working minutes between the two timestamps
14 | 
15 |     Example:
16 |     ticket_id = '14025'
17 |     first_message_at: 2021-07-09 11:29
18 |     first_closed_at: 2021-07-12 14:46
19 | 
20 |     The total business hours between these two timestamps
21 |     (2021-07-09 11:00:00 and 2021-07-12 14:00:00) is 14hr
22 |     NOTE: We do not include the 11:00 and 14:00 hours in this
23 |     because we will manually calculate the minutes from these
24 |     hours in the next macro
25 | 
26 | * Macro 2: business_minutes_between
27 |     This macro will do two things:
28 |      1. If the working minutes between is 0min, then 
29 |         just datediff the start and end timestamps to
30 |         find the minutes between.
31 |     2. If it's greater than 0, then we want to
32 |     add the start_minutes and end_minutes to the
33 |     hours between to get the total working minutes.
34 |     See example below for a walk-through explanation
35 | Example:
36 |     ticket_id = '14025'
37 |     first_message_at: 2021-07-09 11:29
38 |     first_closed_at: 2021-07-12 14:46
39 | The below macro will take three inputs into consideration:
40 |     1. The total working hours/minutes between the timestmaps
41 |         ^in this case -- 14 * 60 = 840min
42 |     2. The minutes from the start timestamp to the next hour
43 |         ^in this case -- 2021-07-09 11:29 --> 31min
44 |     3. The minutes from the end timestamp
45 |         ^in this case, 46min
46 |     We then add these together to see the total working business minutes
47 |     between the two timestamps which is 917min
48 | #}
49 | {%- macro working_min_between(start_date, end_date) -%}
50 |      ( select
51 |           coalesce(count_if(is_business_hour),0) * 60
52 |       from {{ ref('all_business_hours') }}
53 |       where date_hour > date_trunc('hour', {{ start_date }})
54 |       and date_hour < date_trunc('hour', {{ end_date }})
55 |      )
56 | {%- endmacro -%}
57 | 
58 | {%- macro business_minutes_between__2(start_date, end_date) -%}
59 |     coalesce(
60 |         case
61 |             -- take into account tickets opened and closed in same hour
62 |             when (date_trunc('hour', {{ start_date }} ) = date_trunc('hour', {{ end_date }} ))  
63 |                 then datediff('minute', {{ start_date }}, {{ end_date }})
64 |             else {{ working_min_between(start_date, end_date) }}
65 |                 + (60 - extract(minute from {{ start_date }}))
66 |                 + (extract(minute from {{ end_date }}))
67 |         end,
68 |         0
69 |     )
70 | {%- endmacro -%}


--------------------------------------------------------------------------------
/business-hours/models/all_business_hours.sql:
--------------------------------------------------------------------------------
  1 | --use macro to create one row per hour per day
  2 | with hours as (
  3 | 
  4 |     {{ dbt_utils.date_spine(
  5 |         datepart="hour",
  6 |         start_date="to_date('01/01/2017', 'mm/dd/yyyy')",
  7 |         end_date="dateadd(month, 1, current_date)"
  8 |        )
  9 |     }}
 10 | 
 11 | ),
 12 | 
 13 | /* -- if we had a seed for holidays, include it here
 14 | 
 15 | holidays as (
 16 | 
 17 |     select * from  ref('stg_company_holidays') 
 18 | 
 19 | ),
 20 | 
 21 | */
 22 | 
 23 | --convert hour to EST
 24 | converted_hours as (
 25 | 
 26 |     select distinct
 27 | 
 28 |         convert_timezone(
 29 |             'UTC',
 30 |             date_hour
 31 |         )::timestamp_ntz as date_hour
 32 | 
 33 |     from hours
 34 | ),
 35 | 
 36 | 
 37 | --the output of this CTE is two columns: the first is one row for every hour of
 38 | --the day date spine (from above). the second returns the same result if it falls
 39 | --within our support hours. in the future, as support potentially changes,
 40 | --this is where we will alter biz hours
 41 | 
 42 | business_hours as (
 43 | 
 44 |     select
 45 |         date_hour,
 46 | 
 47 |         case
 48 | --before we hired a rep in MST (M-F, 8am - 8pm EST)
 49 |             when date_hour::date < '2021-09-14'
 50 |                 and dayofweek(date_hour) not in (0,6)
 51 |                 and hour(date_hour) between 8 and 19
 52 |                 -- and holidays.date is null
 53 |                     then converted_hours.date_hour
 54 | 
 55 | -- after we hired international reps (covering Sunday 7pm to Friday 5pm)
 56 |             when date_hour::date >=  '2021-09-14'
 57 |                 and dayofweek(date_hour) = 0 --sundays after 7pm is fair game
 58 |                 and hour(date_hour) between 19 and 23
 59 |                 -- and holidays.date is null
 60 |                     then converted_hours.date_hour
 61 | 
 62 |             when date_hour::date >=  '2021-09-01'
 63 |                 and dayofweek(date_hour) between 1 and 4 --24/hr coverage M-Thurs
 64 |                 -- and holidays.date is null
 65 |                     then converted_hours.date_hour
 66 | 
 67 |             when date_hour::date >=  '2021-09-01'
 68 |                 and dayofweek(date_hour) = 5 --fridays, we rest after 8pm ET
 69 |                 and hour(date_hour) between 0 and 19
 70 |                 -- and holidays.date is null
 71 |                     then converted_hours.date_hour
 72 | 
 73 |         end as business_hour
 74 | 
 75 |     from converted_hours
 76 |     -- left join holidays
 77 |     --     on date_trunc(day, date_hour)::date = holidays.date
 78 | 
 79 | ),
 80 | 
 81 | --the output of this CTE adds an additional column to fill in missing values
 82 | --the purpose is to show 8am for times outside of business hours
 83 | --ex. 10:36pm at night will show 8:00am the next day
 84 | corrections as (
 85 | 
 86 |     select
 87 | 
 88 |         *,
 89 |         business_hour is not null as is_business_hour,
 90 |         lead(business_hour) ignore nulls over (
 91 |             partition by 1
 92 |             order by date_hour
 93 |         ) as adjusted_business_hour
 94 | 
 95 |      from business_hours
 96 | 
 97 | ),
 98 | 
 99 | --this cleans up the extra columns to properly calculate business hours.
100 | --the result is one row for every hour of every day with a mapped business hour.
101 | final as (
102 | 
103 |     select
104 | 
105 |         date_hour,
106 |         coalesce(business_hour, adjusted_business_hour) as business_hour,
107 |         is_business_hour
108 | 
109 |     from corrections
110 | 
111 | )
112 | 
113 | select * from final


--------------------------------------------------------------------------------
/business-hours/models/fct_support_tickets.sql:
--------------------------------------------------------------------------------
 1 | with 
 2 | 
 3 | tickets as (
 4 |     select * from {{ ref('sample_tickets') }}
 5 | ),
 6 | 
 7 | final as (
 8 |     
 9 |     select 
10 |         tickets.*,
11 | 
12 |         -- macros for attempt 1 -- nested macros
13 | 
14 |         -- measure weekdays between dates
15 |         {{ weekdays_between('conversation_created_at_business', 'first_response_at_business') }} as weekdays_to_first_response,
16 |         -- add up overnights + weekends
17 |         {{ non_business_hours_between('conversation_created_at_business', 'first_response_at_business') }} as non_working_hours,
18 |         -- final calculation
19 |         {{ business_minutes_between__1('conversation_created_at_business', 'first_response_at_business') }} as business_minutes__1,
20 |         
21 |         -- macros for attempt 2 -- subquery
22 |         
23 |         -- subquery to get working hours
24 |         {{ working_min_between('conversation_created_at_business', 'first_response_at_business') }} as working_min_subquery,
25 |         
26 |         -- adjustments + final calculation
27 |         {{ business_minutes_between__2('conversation_created_at_business', 'first_response_at_business') }} as business_minutes__2
28 | 
29 | 
30 |     
31 |     from tickets   
32 |     
33 | )
34 | 
35 | select * from final


--------------------------------------------------------------------------------
/business-hours/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 0.8.0


--------------------------------------------------------------------------------
/business-hours/seeds/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/business-hours/seeds/.gitkeep


--------------------------------------------------------------------------------
/business-hours/seeds/sample_tickets.csv:
--------------------------------------------------------------------------------
 1 | id,user_id,state,subject,conversation_created_at_business,first_response_at_business,first_closed_at_business,last_closed_at_business
 2 | 263c7e9022713fa39b62ac583fd26701,1234,closed,Porridge too hot,2021-03-24 19:29:23.000,2021-03-25 08:29:20.000,2021-01-28 08:00:00.000,2021-01-28 14:22:48.000
 3 | af23d77bb2d304f5649da474b81fedeb,1234,closed,Porridge too cold,2021-01-04 10:01:34.000,2021-01-05 09:54:29.000,2021-01-07 15:01:43.000,2021-01-07 15:01:43.000
 4 | e8b40b51ae64949c2f3bac987840f042,1234,closed,Porridge just right!,2021-01-07 19:54:44.000,2021-01-08 08:58:34.000,2021-01-13 15:14:04.000,2021-01-13 15:14:04.000
 5 | 793a0185de6dd851e395743c8c84b939,1234,closed,Bed too firm,2021-01-04 08:00:00.000,2021-01-05 09:53:02.000,2021-01-07 15:01:57.000,2021-01-07 15:01:57.000
 6 | f2721518d728f1a359517d1e1d432424,1234,closed,Bed too soft,2021-02-01 08:00:00.000,2021-02-02 08:18:24.000,2021-02-01 08:20:18.000,2021-02-02 08:18:36.000
 7 | 8995d903fb57323dbc6821a0aee9cfa4,1234,closed,Bed just right! Can I have it?,2021-01-20 13:03:09.000,2021-01-20 13:29:37.000,2021-01-20 15:00:57.000,2021-03-17 12:44:53.000
 8 | aa0dfd6e6b3722d485f29cb09e1a2683,1234,closed,Chair is too big,2021-01-28 17:13:50.000,2021-01-28 17:27:10.000,2021-01-29 11:45:36.000,2021-01-29 11:45:36.000
 9 | ee02aaa6e76c087e34211df44b694123,1234,closed,Chair is too small,2021-03-17 09:44:29.000,2021-03-17 09:51:07.000,2021-03-18 16:32:07.000,2021-03-18 16:32:07.000
10 | fc7ee5d4cbb678b7f85de637ff20496d,1234,closed,Chair is just right,2021-03-12 14:34:40.000,2021-03-12 14:57:09.000,2021-03-15 16:03:44.000,2021-03-15 16:03:44.000
11 | 618e94c101c6b0ffd3b0ac887dc9e30b,5432,open,Someone was in my home,2021-01-27 19:50:06.000,2021-01-27 19:51:38.000,,


--------------------------------------------------------------------------------
/business-hours/snapshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/business-hours/snapshots/.gitkeep


--------------------------------------------------------------------------------
/business-hours/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/business-hours/tests/.gitkeep


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/README.md:
--------------------------------------------------------------------------------
1 | # dynamic-data-masking-redshift
2 | Check out [this discourse article](https://discourse.getdbt.com/t/how-to-implement-dynamic-data-masking-on-redshift/2043)


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/data/employees.csv:
--------------------------------------------------------------------------------
1 | id,first_name,last_name,favorite_bagel_flavor
2 | 1,Tristan,Handy,everything
3 | 2,Drew,Banin,poppy seed
4 | 3,Connor,McArthur,sesame
5 | 


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'bagel_shop'
 2 | version: '0.1.0'
 3 | 
 4 | config-version: 2
 5 | profile: bagel_shop
 6 | 
 7 | source-paths: ["models"]
 8 | analysis-paths: ["analysis"]
 9 | test-paths: ["tests"]
10 | data-paths: ["data"]
11 | macro-paths: ["macros"]
12 | snapshot-paths: ["snapshots"]
13 | 
14 | target-path: "target"
15 | clean-targets:
16 |     - "target"
17 |     - "dbt_modules"
18 | 
19 | models:
20 | 


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/macros/apply_data_masking.sql:
--------------------------------------------------------------------------------
 1 | {% macro mask_column(column_name) %}
 2 |     -- logic that controls the masking
 3 |     case
 4 |         when current_user in ('claire') then {{ column_name }}
 5 |         else md5({{ column_name }})
 6 |     end
 7 | {% endmacro %}
 8 | 
 9 | {% macro create_data_masked_view(schema, columns_to_mask) %}
10 |     {% if execute %}
11 |     
12 |     {# get all columns in the relation #}
13 |     
14 |         {% set model_cols = adapter.get_columns_in_relation(this) %}
15 |         
16 |         {# create Relation object for masked view #}
17 |         
18 |         {%- set masked_view = api.Relation.create(
19 |               database=this.database,
20 |               schema=schema,
21 |               identifier=this.identifier) -%}
22 |               
23 |         {# create schema #}
24 |         
25 |         {% do adapter.create_schema(masked_view) %}
26 |         
27 |         {# create masked view in new schema for sensitive columns #}
28 |         
29 |         {% set view_sql %}
30 | 
31 |             drop view if exists {{ masked_view }};
32 | 
33 |             create view {{ masked_view }} as (
34 | 
35 |                 select
36 |                     {% for col in model_cols %}
37 |                         {% if col.name in columns_to_mask %}
38 |                         {{ mask_column(col.name) }} as {{ col.name }}
39 |                         {% else %}
40 |                         {{ col.name }}
41 |                         {% endif %}
42 |                         {{ "," if not loop.last }}
43 |                     {% endfor %}
44 |                 from {{ this }}
45 |             )
46 | 
47 |         {% endset %}
48 |         
49 |         {% do run_query(view_sql) %}
50 |         
51 |         {% do dbt_utils.log_info("Masked view created at: " ~  masked_view ) %}
52 |         
53 |     {% endif %}
54 |     
55 |     select 1=1
56 |     
57 | {% endmacro %}


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/models/employees_with_masking.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         post_hook="{{ create_data_masked_view(
 4 |             schema='public_analytics',
 5 |             columns_to_mask=['first_name', 'last_name']
 6 |         ) }}"
 7 |     )
 8 | }}
 9 | 
10 | select
11 |     -- this is the model sql
12 |     id,
13 |     first_name,
14 |     last_name,
15 |     favorite_bagel_flavor
16 | from {{ ref('employees') }}
17 | 


--------------------------------------------------------------------------------
/dynamic-data-masking-redshift/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: fishtown-analytics/dbt_utils
3 |     version: 0.6.3


--------------------------------------------------------------------------------
/insert_by_period/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_packages/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/insert_by_period/Makefile:
--------------------------------------------------------------------------------
 1 | test-databricks:
 2 | 	dbt deps
 3 | 	dbt seed --target databricks --full-refresh
 4 | 	dbt run -s test_insert_by_period --target databricks --full-refresh
 5 | 	dbt test
 6 | 	dbt run -s test_insert_by_period --target databricks --vars 'test_backfill: True'
 7 | 	dbt test --target databricks
 8 | 
 9 | test-all: test-databricks
10 | 	echo "Completed successfully"


--------------------------------------------------------------------------------
/insert_by_period/README.md:
--------------------------------------------------------------------------------
 1 | # Custom insert by period materialization
 2 | 
 3 | `insert_by_period` allows dbt to insert records into a table one period (i.e. day, week) at a time.
 4 | 
 5 | This materialization is appropriate for event data that can be processed in discrete periods. It is similar in concept to the built-in incremental materialization, but has the added benefit of building the model in chunks even during a full-refresh so is particularly useful for models where the initial run can be problematic.
 6 | 
 7 | Should a run of a model using this materialization be interrupted, a subsequent run will continue building the target table from where it was interrupted (granted the `--full-refresh` flag is omitted).
 8 | 
 9 | Progress is logged in the command line for easy monitoring.
10 | 
11 | ## Installation
12 | This is not a package on the Package Hub. To install it via git, add this to `packages.yml`:
13 | ```yaml
14 | packages:
15 |   - git: https://github.com/dbt-labs/dbt-labs-experimental-features
16 |     subdirectory: insert_by_period
17 |     revision: XXXX #optional but highly recommended. Provide a full git sha hash, e.g. 7180db61d26836b931aa6ef8ad9d70e7fb3a69fa. If not provided, uses the current HEAD.
18 | 
19 | ```
20 | 
21 | ## Usage:
22 | 
23 | ```sql
24 | {{
25 |   config(
26 |     materialized = "insert_by_period",
27 |     period = "day",
28 |     timestamp_field = "created_at",
29 |     start_date = "2018-01-01",
30 |     stop_date = "2018-06-01")
31 | }}
32 | with events as (
33 |   select *
34 |   from {{ ref('events') }}
35 |   where __PERIOD_FILTER__ -- This will be replaced with a filter in the materialization code
36 | )
37 | ....complex aggregates here....
38 | ```
39 | 
40 | **Configuration values:**
41 | 
42 | - `period`: period to break the model into, must be a valid [datepart](https://docs.aws.amazon.com/redshift/latest/dg/r_Dateparts_for_datetime_functions.html) (default='Week')
43 | - `timestamp_field`: the column name of the timestamp field that will be used to break the model into smaller queries
44 | - `start_date`: literal date or timestamp - generally choose a date that is earlier than the start of your data
45 | - `stop_date`: literal date or timestamp (default=current_timestamp)
46 | 
47 | **Caveats:**
48 | 
49 | - This materialization is compatible and tested for a subset of adapters for now: BigQuery, Databricks, PostgreSQL, Redshift and Snowflake.
50 | - This materialization can only be used for a model where records are not expected to change after they are created.
51 | - Any model post-hooks that use `{{ this }}` will fail using this materialization. For example:
52 | 
53 | ```yaml
54 | models:
55 |     project-name:
56 |         post-hook: "grant select on {{ this }} to db_reader"
57 | ```
58 | 
59 | A useful workaround is to change the above post-hook to:
60 | 
61 | ```yaml
62 |         post-hook: "grant select on {{ this.schema }}.{{ this.name }} to db_reader"
63 | ```
64 | 


--------------------------------------------------------------------------------
/insert_by_period/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Name your project! Project names should contain only lowercase characters
 3 | # and underscores. A good package name should reflect your organization's
 4 | # name or the intended use of these models
 5 | name: 'insert_by_period'
 6 | version: '1.0.0'
 7 | config-version: 2
 8 | 
 9 | # This package requires dbt version 1.3.0 or higher to be able to resolve
10 | # dbt.current_timestamp()
11 | require-dbt-version: [">=1.3.0", "<2.0.0"]
12 | 
13 | # This setting configures which "profile" dbt uses for this project.
14 | profile: 'insert_by_period'
15 | 
16 | dispatch:
17 |  - macro_namespace: dbt_utils
18 |    search_order: ['spark_utils', 'dbt_utils']
19 | 
20 | # These configurations specify where dbt should look for different types of files.
21 | # The `model-paths` config, for example, states that models in this project can be
22 | # found in the "models/" directory. You probably won't need to change these!
23 | model-paths: ["models"]
24 | analysis-paths: ["analyses"]
25 | test-paths: ["tests"]
26 | seed-paths: ["seeds"]
27 | macro-paths: ["macros"]
28 | snapshot-paths: ["snapshots"]
29 | 
30 | target-path: "target"  # directory which will store compiled SQL files
31 | clean-targets:         # directories to be removed by `dbt clean`
32 |   - "target"
33 |   - "dbt_packages"
34 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | .env/
6 | profiles.yml
7 | package-lock.yml


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/ci/sample.profiles.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # HEY! This file is used in the dbt-utils integrations tests with CircleCI.
 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :)
 4 | 
 5 | config:
 6 |     send_anonymous_usage_stats: False
 7 |     use_colors: True
 8 | 
 9 | integration_tests:
10 |   target: postgres
11 |   outputs:
12 |     postgres:
13 |       type: postgres
14 |       host: "{{ env_var('POSTGRES_TEST_HOST') }}"
15 |       user: "{{ env_var('POSTGRES_TEST_USER') }}"
16 |       pass: "{{ env_var('POSTGRES_TEST_PASS') }}"
17 |       port: "{{ env_var('POSTGRES_TEST_PORT') | as_number }}"
18 |       dbname: "{{ env_var('POSTGRES_TEST_DBNAME') }}"
19 |       schema: dbt_utils_integration_tests_postgres
20 |       threads: 5
21 | 
22 |     redshift:
23 |       type: redshift
24 |       host: "{{ env_var('REDSHIFT_TEST_HOST') }}"
25 |       user: "{{ env_var('REDSHIFT_TEST_USER') }}"
26 |       pass: "{{ env_var('REDSHIFT_TEST_PASS') }}"
27 |       dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
28 |       port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
29 |       schema: dbt_utils_integration_tests_redshift
30 |       threads: 5
31 | 
32 |     bigquery:
33 |       type: bigquery
34 |       method: service-account
35 |       keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
36 |       project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
37 |       schema: dbt_utils_integration_tests_bigquery
38 |       threads: 10
39 | 
40 |     snowflake:
41 |       type: snowflake
42 |       account: "{{ env_var('SNOWFLAKE_TEST_ACCOUNT') }}"
43 |       user: "{{ env_var('SNOWFLAKE_TEST_USER') }}"
44 |       password: "{{ env_var('SNOWFLAKE_TEST_PASSWORD') }}"
45 |       role: "{{ env_var('SNOWFLAKE_TEST_ROLE') }}"
46 |       database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
47 |       warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
48 |       schema: dbt_utils_integration_tests_snowflake
49 |       threads: 10
50 | 
51 |     databricks:
52 |       type: databricks
53 |       catalog: "{{ env_var('DATABRICKS_TEST_CATALOG') }}"
54 |       host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
55 |       http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
56 |       token: "{{ env_var('DATABRICKS_TEST_TOKEN') }}"
57 |       schema: dbt_utils_integration_tests_databricks
58 |       threads: 4
59 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/data/data_insert_by_period.csv:
--------------------------------------------------------------------------------
 1 | id,created_at
 2 | 1,2017-12-02
 3 | 2,2018-01-02
 4 | 3,2018-02-02
 5 | 4,2018-03-02
 6 | 5,2018-04-02
 7 | 6,2018-05-02
 8 | 7,2018-06-02
 9 | 8,2018-07-02
10 | 9,2018-08-02


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/data/data_insert_by_period_overwrite.csv:
--------------------------------------------------------------------------------
 1 | id,created_at
 2 | 3,2018-02-02
 3 | 6,2018-05-02
 4 | 5,2018-04-02
 5 | 2,2018-01-02
 6 | 6,2018-05-02
 7 | 4,2018-03-02
 8 | 3,2018-02-02
 9 | 4,2018-03-02
10 | 5,2018-04-02
11 | 2,2018-01-02
12 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: 'insert_by_period_integration_tests'
 3 | version: '1.0'
 4 | 
 5 | # require-dbt-version: inherit this from dbt-utils
 6 | 
 7 | config-version: 2
 8 | 
 9 | # This setting configures which "profile" dbt uses for this project.
10 | profile: 'integration_tests'
11 | 
12 | dispatch:
13 |  - macro_namespace: dbt_utils
14 |    search_order: ['spark_utils', 'dbt_utils', 'insert_by_period_integration_tests']
15 | 
16 | 
17 | model-paths: ["models"]
18 | analysis-paths: ["analysis"]
19 | test-paths: ["tests"]
20 | seed-paths: ["data"]
21 | macro-paths: ["macros"]
22 | 
23 | target-path: "target"  # directory which will store compiled SQL files
24 | clean-targets:         # directories to be removed by `dbt clean`
25 |     - "target"
26 |     - "dbt_modules"
27 |     - "dbt_packages"
28 | 
29 | seeds:
30 |   +quote_columns: false


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/insert_by_period/integration_tests/macros/.gitkeep


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/macros/assert_equal_values.sql:
--------------------------------------------------------------------------------
 1 | {% macro assert_equal_values(actual_object, expected_object) %}
 2 | {% if not execute %}
 3 | 
 4 |     {# pass #}
 5 | 
 6 | {% elif actual_object != expected_object %}
 7 | 
 8 |     {% set msg %}
 9 |     Expected did not match actual
10 | 
11 |     -----------
12 |     Actual:
13 |     -----------
14 |     --->{{ actual_object }}<---
15 | 
16 |     -----------
17 |     Expected:
18 |     -----------
19 |     --->{{ expected_object }}<---
20 | 
21 |     {% endset %}
22 | 
23 |     {{ log(msg, info=True) }}
24 | 
25 |     select 'fail'
26 | 
27 | {% else %}
28 | 
29 |     select 'ok' {{ limit_zero() }}
30 | 
31 | {% endif %}
32 | {% endmacro %}


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/macros/limit_zero.sql:
--------------------------------------------------------------------------------
 1 | {% macro my_custom_macro() %}
 2 |     whatever
 3 | {% endmacro %}
 4 | 
 5 | {% macro limit_zero() %}
 6 |     {{ return(adapter.dispatch('limit_zero', 'dbt_utils')()) }}
 7 | {% endmacro %}
 8 | 
 9 | {% macro default__limit_zero() %}
10 |     {{ return('limit 0') }}
11 | {% endmacro %}


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/macros/tests.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | {% test assert_equal(model, actual, expected) %}
 3 | select * from {{ model }} where {{ actual }} != {{ expected }}
 4 | 
 5 | {% endtest %}
 6 | 
 7 | 
 8 | {% test not_empty_string(model, column_name) %}
 9 | 
10 | select * from {{ model }} where {{ column_name }} = ''
11 | 
12 | {% endtest %}
13 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/models/expected_insert_by_period.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 | 	config(
 3 | 		materialized = 'view',
 4 | 		enabled=(project_name == 'insert_by_period_integration_tests'),
 5 | 	)
 6 | }}
 7 | 
 8 | select *
 9 | from {{ ref('data_insert_by_period') }}
10 | where id in (2, 3, 4, 5, 6)
11 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/models/expected_insert_by_period_overwrite.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 | 	config(
 3 | 		materialized = 'view',
 4 | 		enabled=(project_name == 'insert_by_period_integration_tests'),
 5 | 	)
 6 | }}
 7 | 
 8 | select *
 9 | from {{ ref('data_insert_by_period_overwrite') }}
10 | where id in (2, 3, 4, 5, 6)
11 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/models/schema.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: test_insert_by_period
 5 |     tests:
 6 |       - dbt_utils.equality:
 7 |           compare_model: ref('expected_insert_by_period')
 8 |           enabled: "{{not var('test_backfill', False)}}"
 9 |       - dbt_utils.equality:
10 |           compare_model: ref('expected_insert_by_period_overwrite')
11 |           enabled: "{{var('test_backfill', False)}}"
12 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/models/test_insert_by_period.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 | 	config(
 3 | 		materialized = 'insert_by_period',
 4 | 		period = 'month',
 5 | 		timestamp_field = 'cast(created_at as timestamp)',
 6 | 		start_date = '2018-01-01',
 7 | 		stop_date = '2018-03-01',
 8 | 		backfill = var('test_backfill', False),
 9 | 		enabled=(project_name == 'insert_by_period_integration_tests'),
10 | 	)
11 | }}
12 | 
13 | with events as (
14 | 	select *
15 | 	from {{ ref('data_insert_by_period') }}
16 | 	where __PERIOD_FILTER__
17 | )
18 | 
19 | select * from events
20 | 


--------------------------------------------------------------------------------
/insert_by_period/integration_tests/packages.yml:
--------------------------------------------------------------------------------
1 | 
2 | packages:
3 |     - local: ../
4 |     - package: dbt-labs/dbt_utils
5 |       version: [">0.9.0", "<2.0.0"]
6 |     - package: dbt-labs/spark_utils
7 |       version: 0.3.0
8 | 


--------------------------------------------------------------------------------
/insert_by_period/macros/create_relation_for_insert_by_period.sql:
--------------------------------------------------------------------------------
 1 | {% macro create_relation_for_insert_by_period(tmp_identifier, schema, type) -%}
 2 |     {{ return(adapter.dispatch('create_relation_for_insert_by_period', 'insert_by_period')(tmp_identifier, schema, type)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__create_relation_for_insert_by_period(tmp_identifier, schema, type) -%}
 6 |     {% do return (api.Relation.create(identifier=tmp_identifier,
 7 |                                                schema=schema, type=type)) %}
 8 | {%- endmacro %}
 9 | 
10 | {% macro postgres__create_relation_for_insert_by_period(tmp_identifier, schema, type) -%}
11 |     {% do return (api.Relation.create(identifier=tmp_identifier,
12 |                                                schema=None, type=type)) %}
13 | {%- endmacro %}
14 | 
15 | {% macro databricks__create_relation_for_insert_by_period(tmp_identifier, schema, type) -%}
16 |     {% do return (api.Relation.create(identifier=tmp_identifier,
17 |                                                schema=None, type=type)) %}
18 | {%- endmacro %}
19 | 


--------------------------------------------------------------------------------
/insert_by_period/macros/get_period_boundaries.sql:
--------------------------------------------------------------------------------
 1 | {% macro get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period, backfill, full_refresh_mode) -%}
 2 |     {{ return(adapter.dispatch('get_period_boundaries', 'insert_by_period')(target_schema, target_table, timestamp_field, start_date, stop_date, period, backfill, full_refresh_mode)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period, backfill, full_refresh_mode) -%}
 6 | 
 7 |   {% call statement('period_boundaries', fetch_result=True) -%}
 8 |     with data as (
 9 |       select
10 |           {% if backfill and not full_refresh_mode -%}
11 |             cast('{{start_date}}' as timestamp) as start_timestamp,
12 |           {%- else -%}
13 |             coalesce(max({{timestamp_field}}), cast('{{start_date}}' as timestamp)) as start_timestamp,
14 |           {%- endif %}
15 |           coalesce(
16 |             {{ dateadd('millisecond',
17 |                                 -1,
18 |                                 "cast(nullif('" ~ stop_date ~ "','') as timestamp)") }},
19 |             {{ dbt.current_timestamp() }}
20 |           ) as stop_timestamp
21 |       from {{adapter.quote(target_schema)}}.{{adapter.quote(target_table)}}
22 |     )
23 | 
24 |     select
25 |       start_timestamp,
26 |       stop_timestamp,
27 |       {{ datediff('start_timestamp',
28 |                            'stop_timestamp',
29 |                            period) }}  + 1 as num_periods
30 |     from data
31 |   {%- endcall %}
32 | 
33 | {%- endmacro %}
34 | 
35 | 
36 | {% macro bigquery__get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period, backfill, full_refresh_mode) -%}
37 | 
38 |   {% call statement('period_boundaries', fetch_result=True) -%}
39 |     with data as (
40 |       select
41 |           {% if backfill and not full_refresh_mode -%}
42 |           cast('{{start_date}}' as timestamp) as start_timestamp,
43 |           {%- else -%}
44 |           coalesce(max({{timestamp_field}}), cast('{{start_date}}' as timestamp)) as start_timestamp,
45 |           {%- endif %}
46 |           coalesce(datetime_add(cast(nullif('{{stop_date}}','') as timestamp), interval -1 millisecond), {{dbt.current_timestamp()}}) as stop_timestamp
47 |       from {{adapter.quote(target_schema)}}.{{adapter.quote(target_table)}}
48 |     )
49 | 
50 |     select
51 |       start_timestamp,
52 |       stop_timestamp,
53 |       {{ datediff('start_timestamp',
54 |                            'stop_timestamp',
55 |                            period) }}  + 1 as num_periods
56 |     from data
57 |   {%- endcall %}
58 | 
59 | {%- endmacro %}


--------------------------------------------------------------------------------
/insert_by_period/macros/get_period_sql.sql:
--------------------------------------------------------------------------------
 1 | {% macro get_period_sql(target_cols_csv, sql, timestamp_field, period, start_timestamp, stop_timestamp, offset) -%}
 2 |     {{ return(adapter.dispatch('get_period_sql', 'insert_by_period')(target_cols_csv, sql, timestamp_field, period, start_timestamp, stop_timestamp, offset)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__get_period_sql(target_cols_csv, sql, timestamp_field, period, start_timestamp, stop_timestamp, offset) -%}
 6 | 
 7 |   {%- set period_filter -%}
 8 |     ({{timestamp_field}} >  '{{start_timestamp}}'::timestamp + interval '{{offset}} {{period}}' and
 9 |      {{timestamp_field}} <= '{{start_timestamp}}'::timestamp + interval '{{offset}} {{period}}' + interval '1 {{period}}' and
10 |      {{timestamp_field}} <  '{{stop_timestamp}}'::timestamp)
11 |   {%- endset -%}
12 | 
13 |   {%- set filtered_sql = sql | replace("__PERIOD_FILTER__", period_filter) -%}
14 | 
15 |   select
16 |     {{target_cols_csv}}
17 |   from (
18 |     {{filtered_sql}}
19 |   ) target_cols
20 | 
21 | {%- endmacro %}
22 | 
23 | 
24 | {% macro bigquery__get_period_sql(target_cols_csv, sql, timestamp_field, period, start_timestamp, stop_timestamp, offset) -%}
25 | 
26 |   {%- set period_filter -%}
27 |     ({{timestamp_field}} >  cast(cast(timestamp('{{start_timestamp}}') as datetime) + interval {{offset}} {{period}} as timestamp) and
28 |      {{timestamp_field}} <= cast(cast(timestamp('{{start_timestamp}}') as datetime) + interval {{offset}} {{period}} + interval 1 {{period}} as timestamp) and
29 |      {{timestamp_field}} <  cast('{{stop_timestamp}}' as timestamp))
30 |   {%- endset -%}
31 | 
32 |   {%- set filtered_sql = sql | replace("__PERIOD_FILTER__", period_filter) -%}
33 | 
34 |   select
35 |     {{target_cols_csv}}
36 |   from (
37 |     {{filtered_sql}}
38 |   ) target_cols
39 | 
40 | {%- endmacro %}


--------------------------------------------------------------------------------
/insert_by_period/macros/get_rows_inserted.sql:
--------------------------------------------------------------------------------
 1 | {% macro get_rows_inserted(result) -%}
 2 |   {{ return(adapter.dispatch('get_rows_inserted', 'insert_by_period')(result)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__get_rows_inserted(result) %}
 6 |   
 7 |   {% if 'response' in result.keys() %} {# added in v0.19.0 #}
 8 |     {% set rows_inserted = result['response']['rows_affected'] %}
 9 |   {% else %} {# older versions #}
10 |     {% set rows_inserted = result['status'].split(" ")[2] | int %}
11 |   {% endif %}
12 | 
13 |   {{return(rows_inserted)}}
14 | 
15 | {% endmacro %}
16 | 
17 | {% macro databricks__get_rows_inserted(result) %}
18 |   
19 |   {% if 'data' in result.keys() %}
20 |     {% set rows_inserted = result['data'][0][0] | int %}
21 |   {% endif %}
22 | 
23 |   {{return(rows_inserted)}}
24 | 
25 | {% endmacro %}


--------------------------------------------------------------------------------
/insert_by_period/macros/insert_by_period_materialization.sql:
--------------------------------------------------------------------------------
  1 | {% materialization insert_by_period, default -%}
  2 |   {%- set timestamp_field = config.require('timestamp_field') -%}
  3 |   {%- set start_date = config.require('start_date') -%}
  4 |   {%- set stop_date = config.get('stop_date') or '' -%}
  5 |   {%- set period = config.get('period') or 'week' -%}
  6 |   {%- set backfill = config.get('backfill') or False -%}
  7 | 
  8 |   {%- if sql.find('__PERIOD_FILTER__') == -1 -%}
  9 |     {%- set error_message -%}
 10 |       Model '{{ model.unique_id }}' does not include the required string '__PERIOD_FILTER__' in its sql
 11 |     {%- endset -%}
 12 |     {{ exceptions.raise_compiler_error(error_message) }}
 13 |   {%- endif -%}
 14 | 
 15 |   {%- set identifier = model['name'] -%}
 16 | 
 17 |   {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
 18 |   {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, type='table') -%}
 19 | 
 20 |   {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%}
 21 |   {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
 22 | 
 23 |   {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
 24 |   {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%}
 25 | 
 26 |   {%- set should_truncate = (non_destructive_mode and full_refresh_mode and exists_as_table) -%}
 27 |   {%- set should_drop = (not should_truncate and (full_refresh_mode or exists_not_as_table)) -%}
 28 |   {%- set force_create = (flags.FULL_REFRESH and not flags.NON_DESTRUCTIVE) -%}
 29 | 
 30 |   -- setup
 31 |   {% if old_relation is none -%}
 32 |     -- noop
 33 |   {%- elif should_truncate -%}
 34 |     {{adapter.truncate_relation(old_relation)}}
 35 |   {%- elif should_drop -%}
 36 |     {{adapter.drop_relation(old_relation)}}
 37 |     {%- set old_relation = none -%}
 38 |   {%- endif %}
 39 | 
 40 |   {{ run_hooks(pre_hooks, inside_transaction=False) }}
 41 | 
 42 |   -- `BEGIN` happens here:
 43 |   {{ run_hooks(pre_hooks, inside_transaction=True) }}
 44 | 
 45 |   -- build model
 46 |   {% if force_create or old_relation is none -%}
 47 |     {# Create an empty target table -#}
 48 |     {% call statement('main') -%}
 49 |       {%- set empty_sql = sql | replace("__PERIOD_FILTER__", 'false') -%}
 50 |       {{create_table_as(False, target_relation, empty_sql)}}
 51 |     {%- endcall %}
 52 |   {%- endif %}
 53 | 
 54 |   {% set period_boundaries = insert_by_period.get_period_boundaries(
 55 |     schema,
 56 |     identifier,
 57 |     timestamp_field,
 58 |     start_date,
 59 |     stop_date,
 60 |     period,
 61 |     backfill,
 62 |     full_refresh_mode,
 63 |   ) %}
 64 |   {% set period_boundaries_results = load_result('period_boundaries')['data'][0] %}
 65 |   {%- set start_timestamp = period_boundaries_results[0] | string -%}
 66 |   {%- set stop_timestamp = period_boundaries_results[1] | string -%}
 67 |   {%- set num_periods = period_boundaries_results[2] | int -%}
 68 | 
 69 |   {% set target_columns = adapter.get_columns_in_relation(target_relation) %}
 70 |   {%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%}
 71 |   {%- set loop_vars = {'sum_rows_inserted': 0} -%}
 72 | 
 73 |   -- commit each period as a separate transaction
 74 |   {% for i in range(num_periods) -%}
 75 |     {%- set msg = "Running for " ~ period ~ " " ~ (i + 1) ~ " of " ~ (num_periods) -%}
 76 |     {{ print(msg) }}
 77 | 
 78 |     {%- set tmp_identifier = model['name'] ~ '__dbt_incremental_period' ~ i ~ '_tmp' -%}
 79 |     {%- set tmp_relation = insert_by_period.create_relation_for_insert_by_period(tmp_identifier, schema, 'table') -%}
 80 |     {% call statement() -%}
 81 |       {% set tmp_table_sql = insert_by_period.get_period_sql(target_cols_csv,
 82 |                                                        sql,
 83 |                                                        timestamp_field,
 84 |                                                        period,
 85 |                                                        start_timestamp,
 86 |                                                        stop_timestamp,
 87 |                                                        i) %}
 88 |       {{dbt.create_table_as(True, tmp_relation, tmp_table_sql)}}
 89 |     {%- endcall %}
 90 | 
 91 |     {{adapter.expand_target_column_types(from_relation=tmp_relation,
 92 |                                          to_relation=target_relation)}}
 93 |     {%- set name = 'main-' ~ i -%}
 94 |     {% call statement(name, fetch_result=True) -%}
 95 |       insert into {{target_relation}} ({{target_cols_csv}})
 96 |       (
 97 |           select
 98 |               {{target_cols_csv}}
 99 |           from {{tmp_relation.include(schema=True)}}
100 |       );
101 |     {%- endcall %}
102 |     {% set result = load_result('main-' ~ i) %}
103 |     
104 |     {% set rows_inserted = insert_by_period.get_rows_inserted(result) %}
105 | 
106 |     {%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%}
107 |     {%- if loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %} {% endif -%}
108 | 
109 |     {%- set msg = "Ran for " ~ period ~ " " ~ (i + 1) ~ " of " ~ (num_periods) ~ "; " ~ rows_inserted ~ " record(s) inserted" -%}
110 |     {{ print(msg) }}
111 | 
112 |   {%- endfor %}
113 | 
114 |   -- from the table mat
115 |   {% do create_indexes(target_relation) %}
116 | 
117 |   {{ run_hooks(post_hooks, inside_transaction=True) }}
118 | 
119 |   {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}
120 |   {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
121 | 
122 |   {% do persist_docs(target_relation, model) %}
123 | 
124 |   -- `COMMIT` happens here
125 |   {{ adapter.commit() }}
126 | 
127 |   {{ run_hooks(post_hooks, inside_transaction=False) }}
128 |   -- end from the table mat
129 | 
130 |   {%- set status_string = "INSERT " ~ loop_vars['sum_rows_inserted'] -%}
131 | 
132 |   {% call noop_statement('main', status_string) -%}
133 |     -- no-op
134 |   {%- endcall %}
135 | 
136 |   -- Return the relations created in this materialization
137 |   {{ return({'relations': [target_relation]}) }}
138 | 
139 | {%- endmaterialization %}


--------------------------------------------------------------------------------
/lambda-views/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/lambda-views/README.md:
--------------------------------------------------------------------------------
 1 | # Lambda views
 2 | 
 3 | ## Option 1:
 4 | Implement this without any macros.
 5 | 
 6 | ![Option 1 DAG](etc/option-1-dag.png)
 7 | 
 8 | 
 9 | Things to note:
10 | - Use of the `run_started_at` [variable](https://docs.getdbt.com/reference/dbt-jinja-functions/run_started_at/)
11 | - We've added some meta fields to make debugging easier
12 | 
13 | Pros:
14 | - Relatively easy to intuit what's going on
15 | 
16 | Cons:
17 | - SQL is re-used — two models have the transformation SQL (e.g. `page_views_current` and `page_views_historical`), and the SQL in the models that union together the two relations are very similar
18 | - Very brittle — have to remember to materialize each model appropriately
19 | 
20 | ## Option 2
21 | Use macros to reduce duplicated code:
22 | - Use a macro, `<model_name>_sql.sql` for the transformation SQL
23 | - Use macros, `lambda_filter` and `lambda_union` to template the `where` clauses and the `union` model
24 | 
25 | ![Option 2 DAG](etc/option-2-dag.png)
26 | 
27 | Things to note:
28 | - Removed the `__lambda_current` views, since you don't strictly need to materialize those in your warehouse
29 | - Optional var, `lambda_split`, that can be overridden for the cutoff time
30 | - Added logic for a unique key (though that may have performance impacts)
31 | - The `lambda_filter` macro relies on the model having a matching column in both the source and target table:
32 | ```sql
33 | where {{ column_name }} >= (select max({{ column_name }}) from {{ this }})
34 |     and {{ column_name }} < '{{ filter_time }}'
35 | ```
36 | 
37 | Pros:
38 | - Less duplicated code
39 | - Less chance of silly mistakes
40 | - Fewer objects materialized in the warehouse
41 | 
42 | Cons:
43 | - Harder to reason about — the model code lives separately to the models
44 | 
45 | 
46 | ## Thought experiment
47 | Use a custom materialization
48 | 
49 | **Note: This doesn't actually work**
50 | ## Alt-alt approach: custom materialization??
51 | 
52 | I've included a mockup in `models/thought_experiment_only`. As the name suggests, this is only a thought experiment.
53 | 
54 | **Pros:**
55 | * We can  _both_ keep model SQL within the model file _and_ write that SQL only once
56 | 
57 | **Cons:**
58 | * It obfuscates a _lot_ of logic into the materialization layer
59 | 
60 | **Challenges:**
61 | * How to pass `config` values down to the historical model? Namely `materialization` (table or incremental), `schema`, `alias`
62 | * How to "call" one materialization from another? We don't want to copy+paste all the logic from every adapter's `incremental` materialization into a new `lambda_view` materialization
63 | * Will dbt break in new and interesting ways if it creates multiple objects in a database for one model?
64 | 


--------------------------------------------------------------------------------
/lambda-views/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/lambda-views/data/.gitkeep


--------------------------------------------------------------------------------
/lambda-views/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'lambda_views'
 2 | version: '0.1.0'
 3 | config-version: 2
 4 | 
 5 | source-paths: ["models"]
 6 | analysis-paths: ["analysis"]
 7 | test-paths: ["tests"]
 8 | data-paths: ["data"]
 9 | macro-paths: ["macros"]
10 | snapshot-paths: ["snapshots"]
11 | 
12 | target-path: "target"
13 | clean-targets:
14 |     - "target"
15 |     - "dbt_modules"
16 | 
17 | models:
18 |   lambda_views:
19 |     option_1:
20 |       enabled: false
21 |     option_2:
22 |       enabled: true
23 |     thought_experiment:
24 |       enabled: false
25 | 


--------------------------------------------------------------------------------
/lambda-views/etc/option-1-dag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/lambda-views/etc/option-1-dag.png


--------------------------------------------------------------------------------
/lambda-views/etc/option-2-dag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/lambda-views/etc/option-2-dag.png


--------------------------------------------------------------------------------
/lambda-views/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/lambda-views/macros/.gitkeep


--------------------------------------------------------------------------------
/lambda-views/macros/lambda/lambda_filter.sql:
--------------------------------------------------------------------------------
 1 | {% macro lambda_filter(column_name) %}
 2 | 
 3 |     {% set materialized = config.require('materialized') %}
 4 |     {% set filter_time = var('lambda_split', run_started_at) %}
 5 | 
 6 |     {% if materialized == 'view' %}
 7 | 
 8 |         where {{ column_name }} >= '{{ filter_time }}'
 9 | 
10 |     {% elif is_incremental() %}
11 | 
12 |         where {{ column_name }} >= (select max({{ column_name }}) from {{ this }})
13 |           and {{ column_name }} < '{{ filter_time }}'
14 | 
15 |     {% else %}
16 | 
17 |         where {{ column_name }} < '{{ filter_time }}'
18 | 
19 |     {% endif %}
20 | 
21 | {% endmacro %}
22 | 


--------------------------------------------------------------------------------
/lambda-views/macros/lambda/lambda_union.sql:
--------------------------------------------------------------------------------
 1 | {% macro lambda_union(historical_relation, model_sql) %}
 2 | 
 3 | {% set unique_key = config.get('unique_key', none) %}
 4 | 
 5 | with historical as (
 6 | 
 7 |     select *,
 8 |         'historical' as _dbt_lambda_view_source,
 9 |         '{{ run_started_at }}' as _dbt_last_run_at
10 | 
11 |     from {{ historical_relation }}
12 | 
13 | ),
14 | 
15 | new_raw as (
16 | 
17 |     {{ model_sql }}
18 | 
19 | ),
20 | 
21 | new as (
22 | 
23 |     select *,
24 |         'new' as _dbt_lambda_view_source,
25 |         '{{ run_started_at }}' as _dbt_last_run_at
26 | 
27 |     from new_raw
28 | 
29 | ),
30 | 
31 | unioned as (
32 | 
33 |     select * from historical
34 | 
35 |     {% if unique_key %}
36 | 
37 |         where {{ unique_key }} not in (
38 |             select {{ unique_key }} from new
39 |         )
40 | 
41 |     {% endif %}
42 | 
43 |     union all
44 | 
45 |     select * from new
46 | 
47 | )
48 | 
49 | select * from unioned
50 | 
51 | {% endmacro %}
52 | 


--------------------------------------------------------------------------------
/lambda-views/macros/models/page_views_model_sql.sql:
--------------------------------------------------------------------------------
 1 | {% macro page_views_model_sql() %}
 2 | 
 3 |     with events as (
 4 | 
 5 |         select * from {{ source('snowplow','event') }}
 6 | 
 7 |         {{ lambda_filter(column_name = 'collector_tstamp') }}
 8 | 
 9 |     ),
10 | 
11 |     page_views as (
12 | 
13 |         select
14 |             domain_sessionid as session_id,
15 |             domain_userid as anonymous_user_id,
16 |             web_page_context.value:data.id::varchar as page_view_id,
17 |             page_url,
18 |             count(*) * 10 as approx_time_on_page,
19 |             min(derived_tstamp) as page_view_start,
20 |             max(collector_tstamp) as collector_tstamp
21 | 
22 |         from events,
23 |         lateral flatten (input => parse_json(contexts):data) web_page_context
24 | 
25 |         group by 1,2,3,4
26 | 
27 |     )
28 | 
29 |     select * from page_views
30 | 
31 | {% endmacro %}
32 | 


--------------------------------------------------------------------------------
/lambda-views/macros/models/sessions_model_sql.sql:
--------------------------------------------------------------------------------
 1 | {% macro sessions_model_sql() %}
 2 | 
 3 |     with page_views as (
 4 | 
 5 |         select * from {{ ref('page_views') }}
 6 | 
 7 |         {{ lambda_filter(column_name = 'collector_tstamp') }}
 8 | 
 9 |     ),
10 | 
11 |     sessions as (
12 | 
13 |         select
14 |             session_id,
15 |             anonymous_user_id,
16 | 
17 |             count(*) as page_views,
18 |             sum(approx_time_on_page) as total_time,
19 |             min(page_view_start) as session_start,
20 |             max(collector_tstamp) as collector_tstamp
21 | 
22 |         from page_views
23 | 
24 |         group by 1,2
25 | 
26 |     )
27 | 
28 |     select * from sessions
29 | 
30 | {% endmacro %}
31 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/page_views.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='view'
 4 |     )
 5 | }}
 6 | 
 7 | with historical as (
 8 | 
 9 |     select
10 |         *,
11 |         'historical' as _dbt_lambda_view_source,
12 |         '{{ run_started_at }}' as _dbt_last_run_at
13 | 
14 |     from {{ ref('page_views__lambda_historical') }}
15 | 
16 |     where collector_tstamp < '{{ run_started_at }}'
17 | 
18 | ),
19 | 
20 | new as (
21 | 
22 |     select
23 |         *,
24 |         'new' as _dbt_lambda_view_source,
25 |         '{{ run_started_at }}' as _dbt_last_run_at
26 | 
27 |     from {{ ref('page_views__lambda_current') }}
28 | 
29 |     where collector_tstamp >= '{{ run_started_at }}'
30 | 
31 | ),
32 | 
33 | 
34 | unioned as (
35 | 
36 |     select * from current_view
37 | 
38 |     union all
39 | 
40 |     select * from historical_table
41 | 
42 | )
43 | 
44 | select * from unioned
45 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/page_views__lambda_current.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='view'
 4 |     )
 5 | }}
 6 | 
 7 | with events as (
 8 | 
 9 |     select * from {{ source('snowplow','event') }}
10 |     where collector_tstamp >= '{{ run_started_at }}'
11 | 
12 | ),
13 | 
14 | page_views as (
15 | 
16 |     select
17 |         domain_sessionid as session_id,
18 |         domain_userid as anonymous_user_id,
19 |         web_page_context.value:data.id::varchar as page_view_id,
20 |         page_url,
21 |         count(*) * 10 as approx_time_on_page,
22 |         min(derived_tstamp) as page_view_start,
23 |         max(collector_tstamp) as collector_tstamp
24 | 
25 |     from events,
26 |     lateral flatten (input => parse_json(contexts):data) web_page_context
27 | 
28 |     group by 1,2,3,4
29 | 
30 | )
31 | 
32 | select * from page_views
33 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/page_views__lambda_historical.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='incremental',
 4 |         unique_key = 'page_view_id'
 5 |     )
 6 | }}
 7 | 
 8 | with events as (
 9 | 
10 |     select * from {{ source('snowplow','event') }}
11 |     {% if is_incremental() %}
12 |     where collector_tstamp >= (select max(collector_tstamp) from {{ this }})
13 |     {% endif %}
14 | 
15 | ),
16 | 
17 | page_views as (
18 | 
19 |     select
20 |         domain_sessionid as session_id,
21 |         domain_userid as anonymous_user_id,
22 |         web_page_context.value:data.id::varchar as page_view_id,
23 |         page_url,
24 |         count(*) * 10 as approx_time_on_page,
25 |         min(derived_tstamp) as page_view_start,
26 |         max(collector_tstamp) as collector_tstamp
27 | 
28 |     from events,
29 |     lateral flatten (input => parse_json(contexts):data) web_page_context
30 | 
31 |     group by 1,2,3,4
32 | 
33 | )
34 | 
35 | select * from page_views
36 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/sessions.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='view'
 4 |     )
 5 | }}
 6 | 
 7 | with historical as (
 8 | 
 9 |     select
10 |         *,
11 |         'historical' as _dbt_lambda_view_source,
12 |         '{{ run_started_at }}' as _dbt_last_run_at
13 | 
14 |     from {{ ref('sessions__lambda_historical') }}
15 | 
16 |     where collector_tstamp < '{{ run_started_at }}'
17 | 
18 | ),
19 | 
20 | new as (
21 | 
22 |     select
23 |         *,
24 |         'new' as _dbt_lambda_view_source,
25 |         '{{ run_started_at }}' as _dbt_last_run_at
26 | 
27 |     from {{ ref('sessions__lambda_current') }}
28 | 
29 |     where collector_tstamp >= '{{ run_started_at }}'
30 | 
31 | ),
32 | 
33 | unioned as (
34 | 
35 |     select * from current_view
36 | 
37 |     union all
38 | 
39 |     select * from historical_table
40 | 
41 | )
42 | 
43 | select * from unioned
44 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/sessions__lambda_current.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='view'
 4 |     )
 5 | }}
 6 | 
 7 | with page_views as (
 8 | 
 9 |     select * from {{ ref('page_views') }}
10 | 
11 |     where collector_tstamp >= '{{ run_started_at }}'
12 | 
13 | ),
14 | 
15 | sessions as (
16 | 
17 |     select
18 |         session_id,
19 |         anonymous_user_id,
20 | 
21 |         count(*) as page_views,
22 |         sum(approx_time_on_page) as total_time,
23 |         min(page_view_start) as session_start,
24 |         max(collector_tstamp) as collector_tstamp
25 | 
26 |     from page_views
27 | 
28 |     group by 1,2
29 | 
30 | )
31 | 
32 | select * from sessions
33 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_1/sessions__lambda_historical.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized='incremental',
 4 |         unique_key = 'session_id'
 5 |     )
 6 | }}
 7 | 
 8 | with page_views as (
 9 | 
10 |     select * from {{ ref('page_views') }}
11 | 
12 |     {% if is_incremental() %}
13 |     where collector_tstamp >= (select max(collector_tstamp) from {{ this }})
14 |     {% endif %}
15 | 
16 | ),
17 | 
18 | sessions as (
19 | 
20 |     select
21 |         session_id,
22 |         anonymous_user_id,
23 | 
24 |         count(*) as page_views,
25 |         sum(approx_time_on_page) as total_time,
26 |         min(page_view_start) as session_start,
27 |         max(collector_tstamp) as collector_tstamp
28 | 
29 |     from page_views
30 | 
31 |     group by 1,2
32 | 
33 | )
34 | 
35 | select * from sessions
36 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_2/page_views.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'view',
 4 |         unique_key = 'page_view_id'
 5 |     )
 6 | }}
 7 | 
 8 | {{ lambda_union(
 9 |     historical_relation = ref(this.name ~ '__lambda_historical'),
10 |     model_sql = page_views_model_sql()
11 | ) }}
12 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_2/page_views__lambda_historical.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'incremental',
 4 |         incremental_strategy = 'delete+insert',
 5 |         unique_key = 'page_view_id',
 6 |         schema = 'lambda_historical',
 7 |         alias = 'page_views'
 8 |     )
 9 | }}
10 | 
11 | {{ page_views_model_sql() }}
12 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_2/sessions.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'view',
 4 |         unique_key = 'session_id'
 5 |     )
 6 | }}
 7 | 
 8 | {{ lambda_union(
 9 |     historical_relation = ref(this.name ~ '__lambda_historical'),
10 |     model_sql = sessions_model_sql()
11 | ) }}
12 | 


--------------------------------------------------------------------------------
/lambda-views/models/option_2/sessions__lambda_historical.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'incremental',
 4 |         incremental_strategy = 'delete+insert',
 5 |         unique_key = 'session_id',
 6 |         schema = 'lambda_historical',
 7 |         alias = 'sessions'
 8 |     )
 9 | }}
10 | 
11 | {{ sessions_model_sql() }}
12 | 


--------------------------------------------------------------------------------
/lambda-views/models/sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | sources:
 3 |   - name: snowplow
 4 |     database: raw
 5 |     loaded_at_field: collector_tstamp
 6 |     freshness:
 7 |       error_after: {count: 1, period: hour}
 8 |     tables:
 9 |       - name: event
10 | 


--------------------------------------------------------------------------------
/lambda-views/models/thought_experiment/page_views.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'lambda_view',
 4 |         unique_key = 'page_view_id',
 5 |         historical_config = {
 6 |             'materialized': 'incremental',
 7 |             'schema': 'lambda_historical',
 8 |             'alias': 'page_views'
 9 |         }
10 |     )
11 | }}
12 | 
13 | with events as (
14 | 
15 | select * from {{ source('snowplow','event') }}
16 | 
17 | {{ lambda_filter('collector_tstamp') }}
18 | 
19 | ),
20 | 
21 | page_views as (
22 | 
23 | select
24 |     domain_sessionid as session_id,
25 |     domain_userid as anonymous_user_id,
26 |     web_page_context.value:data.id::varchar as page_view_id,
27 |     page_url,
28 |     count(*) * 10 as approx_time_on_page,
29 |     min(derived_tstamp) as page_view_start,
30 |     max(collector_tstamp) as collector_tstamp
31 | 
32 | from events,
33 | lateral flatten (input => parse_json(contexts):data) web_page_context
34 | 
35 | group by 1,2,3,4
36 | 
37 | )
38 | 
39 | select * from page_views
40 | 


--------------------------------------------------------------------------------
/lambda-views/models/thought_experiment/sessions.sql:
--------------------------------------------------------------------------------
 1 | {{
 2 |     config(
 3 |         materialized = 'lambda_view',
 4 |         unique_key = 'session_id',
 5 |         historical_config = {
 6 |             'materialized': 'incremental',
 7 |             'schema': 'lambda_historical',
 8 |             'alias': 'sessions'
 9 |         }
10 |     )
11 | }}
12 | 
13 | with page_views as (
14 | 
15 |     select * from {{ ref('page_views') }}
16 | 
17 |     {{ lambda_filter(column_name = 'collector_tstamp') }}
18 | 
19 | ),
20 | 
21 | sessions as (
22 | 
23 |     select
24 |         session_id,
25 |         anonymous_user_id,
26 | 
27 |         count(*) as page_views,
28 |         sum(approx_time_on_page) as total_time,
29 |         min(page_view_start) as session_start,
30 |         max(collector_tstamp) as collector_tstamp
31 | 
32 |     from page_views
33 | 
34 |     group by 1,2
35 | 
36 | )
37 | 
38 | select * from sessions
39 | 


--------------------------------------------------------------------------------
/materialized-views/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/materialized-views/README.md:
--------------------------------------------------------------------------------
 1 | ## dbt_labs_materialized_views
 2 | 
 3 | `dbt_labs_materialized_views` is a dbt project containing materializations, helper macros, and some builtin macro overrides that enable use of materialized views in your dbt project. It takes a conceptual approach similar to that of the existing `incremental` materialization:
 4 | - In a "full refresh" run, drop and recreate the MV from scratch.
 5 | - Otherwise, "refresh" the MV as appropriate. Depending on the database, that could require DML (`refresh`) or no action.
 6 | 
 7 | At any point, if the database object corresponding to a MV model exists instead as a table or standard view, dbt will attempt to drop it and recreate the model from scratch as a materialized view.
 8 | 
 9 | Materialized views vary significantly across databases, as do their current limitations. Be sure to read the documentation for your adapter.
10 | 
11 | If you're here, you may also like the [dbt-materialize](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) plugin, which enables dbt to materialize models as materialized views in [Materialize](https://materialize.io/).
12 | 
13 | ## Setup
14 | 
15 | ### General installation:
16 | 
17 | You can install the materialized-view funcionality using one of the following methods.
18 | 
19 | - Install this project as a package ([package-management docs](https://docs.getdbt.com/docs/building-a-dbt-project/package-management))
20 |   - [Local package](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#local-packages): by referencing the [`materialized-views`](https://github.com/dbt-labs/dbt-labs-experimental-features/tree/master/materialized-views) folder.
21 |   - [Git package](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#git-packages) using [project subdirectories](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#git-packages): again by referencing the [`materialized-views`](https://github.com/dbt-labs/dbt-labs-experimental-features/tree/master/materialized-views) folder.
22 | - Copy-paste the files from `macros/` (specifically `default` and your adapter) into your own project.
23 | 
24 | ### Extra installation steps for Postgres and Redshift
25 | 
26 | The Postgres and Redshift implementations both require overriding the builtin versions of some adapter macros. If you've installed `dbt_labs_materialized_views` as a local package, you can achieve this override by creating a file `macros/*.sql` in your project with the following contents:
27 | 
28 | ```sql
29 | {# postgres and redshift #}
30 | 
31 | {% macro drop_relation(relation) -%}
32 |   {{ return(dbt_labs_materialized_views.drop_relation(relation)) }}
33 | {% endmacro %}
34 | 
35 | {% macro postgres__list_relations_without_caching(schema_relation) %}
36 |   {{ return(dbt_labs_materialized_views.postgres__list_relations_without_caching(schema_relation)) }}
37 | {% endmacro %}
38 | 
39 | {% macro postgres_get_relations() %}
40 |   {{ return(dbt_labs_materialized_views.postgres_get_relations()) }}
41 | {% endmacro %}
42 | 
43 | {# redshift only #}
44 | 
45 | {% macro redshift__list_relations_without_caching(schema_relation) %}
46 |   {{ return(dbt_labs_materialized_views.redshift__list_relations_without_caching(schema_relation)) }}
47 | {% endmacro %}
48 | 
49 | {% macro load_relation(relation) %}
50 |   {{ return(dbt_labs_materialized_views.redshift_load_relation_or_mv(relation)) }}
51 | {% endmacro %}
52 | ```
53 | 
54 | ## Postgres
55 | 
56 | - Supported model configs: none
57 | - [docs](https://www.postgresql.org/docs/9.3/rules-materializedviews.html)
58 | 
59 | ## Redshift
60 | 
61 | - Supported model configs: `sort`, `dist`, `auto_refresh`
62 | - [docs](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-overview.html)
63 | - Anecdotally, `refresh materialized view ...` is very slow to run. By contrast, `auto_refresh` runs in the background, with minimal disruption to other workloads, at the risk of some small potential latency.
64 | - ❗ MVs do not support late binding, so if an underlying table is cascade-dropped, the MV will be dropped as well. This would be fine, except that MVs don't include their "true" dependencies in `pg_depend`. Instead, a materialized view claims to depend on a table relation called `mv_tbl__[MV_name]__0`, in place of the name of the true underlying table (https://github.com/awslabs/amazon-redshift-utils/issues/499). As such, dbt's runtime cache is unable to reliably know if a MV has been dropped when it cascade-drops the underlying table. This package requires an override of `load_relation()` to perform a "hard" check (database query of `stv_mv_info`) every time dbt's cache thinks a `materializedview` relation may already exist.
65 | - ❗ MVs do appear in `pg_views`, but the only way we can know that they're materialized views is that the `create materialized view` DDL appear in their `definition`, instead of just the SQL without DDL (standard views). There's another Redshift system table, `stv_mv_info`, but it can't effectively be joined with `pg_views` because they're different types of system tables.
66 | - ❗ If a column in the underlying table renamed, or removed and readded (e.g. varchar widening), the materialized view cannot be refreshed:
67 | ```
68 | Database Error in model test_mv (models/test_mv.sql)
69 |   Materialized view test_mv is unrefreshable as a column was renamed for a base table.
70 |   compiled SQL at target/run/dbt_labs_experimental_features_integration_tests/test_mv.sql
71 | ```
72 | 
73 | ## BigQuery
74 | 
75 | - Supported model configs: `auto_refresh`, `refresh_interval_minutes`
76 | - [docs](https://cloud.google.com/bigquery/docs/materialized-views-intro)
77 | - ❗ Although BQ does not have `drop ... cascade`, if the base table of a MV is dropped and recreated, the MV also needs to be dropped and recreated:
78 | ```
79 | Materialized view dbt-dev-168022:dbt_jcohen.test_mv references table dbt-dev-168022:dbt_jcohen.base_tbl which was deleted and recreated. The view must be deleted and recreated as well.
80 | ```
81 | 
82 | ## Snowflake
83 | 
84 | - Supported model configs: `secure`, `cluster_by`, `automatic_clustering`, `persist_docs` (relation only)
85 | - [docs](https://docs.snowflake.com/en/user-guide/views-materialized.html)
86 | - ❗ Note: Snowflake MVs are only enabled on enterprise accounts
87 | - ❗ Although Snowflake does not have `drop ... cascade`, if the base table table of a MV is dropped and recreated, the MV also needs to be dropped and recreated, otherwise the following error will appear:
88 | ```
89 | Failure during expansion of view 'TEST_MV': SQL compilation error: Materialized View TEST_MV is invalid.
90 | ```
91 | 


--------------------------------------------------------------------------------
/materialized-views/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'dbt_labs_materialized_views'
 2 | version: '0.2.0'
 3 | config-version: 2
 4 | require-dbt-version: ">=1.0.0"
 5 | 
 6 | model-paths: ["models"]
 7 | analysis-paths: ["analysis"]
 8 | test-paths: ["tests"]
 9 | seed-paths: ["seed"]
10 | macro-paths: ["macros"]
11 | snapshot-paths: ["snapshots"]
12 | 
13 | target-path: "target"
14 | clean-targets:
15 |     - "target"
16 |     - "dbt_modules"
17 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/Makefile:
--------------------------------------------------------------------------------
 1 | test-postgres:
 2 | 	dbt deps
 3 | 	dbt seed --target postgres --full-refresh
 4 | 	dbt run --target postgres --full-refresh --vars 'update: false'
 5 | 	dbt run --target postgres --vars 'update: true'
 6 | 	dbt test --target postgres
 7 | 
 8 | test-redshift:
 9 | 	dbt deps
10 | 	dbt seed --target redshift --full-refresh
11 | 	dbt run --target redshift --full-refresh --vars 'update: false'
12 | 	dbt run --target redshift --vars 'update: true'
13 | 	sleep 10	# wait for auto refresh
14 | 	dbt test --target redshift
15 | 
16 | test-snowflake:
17 | 	dbt deps
18 | 	dbt seed --profile garage-snowflake --full-refresh
19 | 	dbt run --profile garage-snowflake --full-refresh --vars 'update: false'
20 | 	dbt run --profile garage-snowflake --vars 'update: true'
21 | 	dbt test --profile garage-snowflake
22 | 
23 | test-bigquery:
24 | 	dbt deps
25 | 	dbt seed --target bigquery --full-refresh
26 | 	dbt run --target bigquery --full-refresh --vars 'update: false'
27 | 	dbt run --target bigquery --vars 'update: true'
28 | 	dbt test --target bigquery
29 | 
30 | test-all: test-postgres test-redshift test-snowflake test-bigquery
31 | 	echo "Completed successfully"
32 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: 'dbt_labs_materialized_views_integration_tests'
 3 | version: '0.2.0'
 4 | config-version: 2
 5 | 
 6 | profile: 'integration_tests'
 7 | 
 8 | model-paths: ["models"]
 9 | analysis-paths: ["analysis"] 
10 | test-paths: ["tests"]
11 | seed-paths: ["seed"]
12 | macro-paths: ["macros"]
13 | 
14 | target-path: "target"
15 | clean-targets:
16 |     - "target"
17 |     - "dbt_modules"
18 | 
19 | quoting:
20 |   identifier: false
21 |   schema: false
22 |     
23 | seeds:
24 |   quote_columns: false
25 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/macros/overrides.sql:
--------------------------------------------------------------------------------
 1 | {# postgres + redshift #}
 2 | 
 3 | {% macro drop_relation(relation) -%}
 4 |   {{ return(dbt_labs_materialized_views.drop_relation(relation)) }}
 5 | {% endmacro %}
 6 | 
 7 | {% macro postgres__list_relations_without_caching(schema_relation) %}
 8 |   {{ return(dbt_labs_materialized_views.postgres__list_relations_without_caching(schema_relation)) }}
 9 | {% endmacro %}
10 | 
11 | {% macro postgres_get_relations() %}
12 |   {{ return(dbt_labs_materialized_views.postgres_get_relations()) }}
13 | {% endmacro %}
14 | 
15 | {# redshift only #}
16 | 
17 | {% macro redshift__list_relations_without_caching(schema_relation) %}
18 |   {{ return(dbt_labs_materialized_views.redshift__list_relations_without_caching(schema_relation)) }}
19 | {% endmacro %}
20 | 
21 | {% macro load_relation(relation) %}
22 |   {% if adapter.type() == 'redshift' %}
23 |     {{ return(dbt_labs_materialized_views.redshift_load_relation_or_mv(relation)) }}
24 |   {% else %}
25 |     {{ return(dbt.load_relation(relation)) }}
26 |   {% endif %}
27 | {% endmacro %}
28 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/models/base_tbl.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'incremental',
 3 |     unique_key = 'id'
 4 | )}}
 5 | 
 6 | -- depends on: {{ref('seed_update')}}
 7 | -- depends on: {{ref('seed')}}
 8 | 
 9 | {% if is_incremental() %}
10 | 
11 | select * from {{ref('seed_update')}}
12 | 
13 | {% else %}
14 | 
15 | select * from {{ref('seed')}}
16 | 
17 | {% endif %}
18 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/models/schema.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: test_mv_manual
 5 |     tests:
 6 |       - dbt_utils.equality:
 7 |           compare_model: ref('expected')
 8 |   - name: test_mv_auto
 9 |     tests:
10 |       - dbt_utils.equality:
11 |           compare_model: ref('expected')
12 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/models/test_mv_auto.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'materialized_view',
 3 |     auto_refresh = true
 4 | )}}
 5 | 
 6 | select
 7 | 
 8 |     gender,
 9 |     count(*) as num
10 | 
11 | from {{ref('base_tbl')}}
12 | group by 1
13 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/models/test_mv_manual.sql:
--------------------------------------------------------------------------------
 1 | {{config(
 2 |     materialized = 'materialized_view',
 3 |     auto_refresh = false
 4 | )}}
 5 | 
 6 | select
 7 | 
 8 |     gender,
 9 |     count(*) as num
10 | 
11 | from {{ref('base_tbl')}}
12 | group by 1
13 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |     - local: ../
3 |     - package: fishtown-analytics/dbt_utils
4 |       version: 0.6.4
5 | 


--------------------------------------------------------------------------------
/materialized-views/integration_tests/seed/expected.csv:
--------------------------------------------------------------------------------
1 | gender,num
2 | Female,6
3 | Male,4


--------------------------------------------------------------------------------
/materialized-views/integration_tests/seed/seed.csv:
--------------------------------------------------------------------------------
1 | id,first_name,last_name,email,gender,ip_address
2 | 1,Jacqueline,Hunter,jhunter0@pbs.org,Male,59.80.20.168
3 | 2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35
4 | 3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243
5 | 4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175
6 | 5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136


--------------------------------------------------------------------------------
/materialized-views/integration_tests/seed/seed_update.csv:
--------------------------------------------------------------------------------
 1 | id,first_name,last_name,email,gender,ip_address
 2 | 1,Jacqueline,Hunter,jhunter0@pbs.org,Male,59.80.20.168
 3 | 2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35
 4 | 3,Gerald,Ryan,gryan2@com.com,Female,11.3.212.243
 5 | 4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175
 6 | 5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136
 7 | 6,Jack,Griffin,jgriffin5@t.co,Female,16.13.192.220
 8 | 7,Wanda,Arnold,warnold6@google.nl,Female,232.116.150.64
 9 | 8,Craig,Ortiz,cortiz7@sciencedaily.com,Male,199.126.106.13
10 | 9,Gary,Day,gday8@nih.gov,Male,35.81.68.186
11 | 10,Rose,Wright,rwright9@yahoo.co.jp,Female,236.82.178.100


--------------------------------------------------------------------------------
/materialized-views/macros/bigquery/adapters.sql:
--------------------------------------------------------------------------------
 1 | {% macro bigquery_options() %}
 2 |   {%- set opts = kwargs -%}
 3 |   {%- set options -%}
 4 |     OPTIONS({% for opt_key, opt_val in kwargs.items() if opt_val is not none %}
 5 |       {{ opt_key }}={{ opt_val }}{{ "," if not loop.last }}
 6 |     {%- endfor -%})
 7 |   {%- endset %}
 8 |   {%- do return(options) -%}
 9 | {%- endmacro -%}
10 | 
11 | {% macro bigquery__create_materialized_view_as(relation, sql, config) -%}
12 | 
13 |     {%- set enable_refresh = config.get('auto_refresh', none) -%}
14 |     {%- set refresh_interval_minutes = config.get('refresh_interval_minutes', none) -%}
15 |     {%- set sql_header = config.get('sql_header', none) -%}
16 | 
17 |     {{ sql_header if sql_header is not none }}
18 | 
19 |     create materialized view {{relation}}
20 |     {{ dbt_labs_materialized_views.bigquery_options(
21 |         enable_refresh=enable_refresh, 
22 |         refresh_interval_minutes=refresh_interval_minutes
23 |     ) }}
24 |     as (
25 |         {{sql}}
26 |     )
27 | 
28 | {% endmacro %}
29 | 
30 | 
31 | {% macro bigquery__refresh_materialized_view(relation, config) -%}
32 |     
33 |     {%- set is_auto_refresh = config.get('auto_refresh', true) %}
34 |     
35 |     {%- if is_auto_refresh == false -%} {# manual refresh #}
36 |     
37 |         {% set refresh_command %}
38 |         call bq.refresh_materialized_view('{{relation|replace("`","")}}')
39 |         {% endset %}
40 |         
41 |         {%- do return(refresh_command) -%}
42 |     
43 |     {%- else -%} {# automatic refresh #}
44 |     
45 |         {%- do log("Skipping materialized view " ~ relation ~ " because it is set
46 |             to refresh automatically") -%}
47 |             
48 |         {%- do return(none) -%}
49 |     
50 |     {%- endif -%}
51 | 
52 | {% endmacro %}
53 | 


--------------------------------------------------------------------------------
/materialized-views/macros/bigquery/materialized_view.sql:
--------------------------------------------------------------------------------
 1 | {% materialization materialized_view, adapter='bigquery' -%}
 2 | 
 3 |   {% set full_refresh_mode = (should_full_refresh()) %}
 4 | 
 5 |   {% set target_relation = this %}
 6 |   {% set existing_relation = load_relation(this) %}
 7 |   {% set tmp_relation = make_temp_relation(this) %}
 8 | 
 9 |   {{ run_hooks(pre_hooks) }}
10 | 
11 |   {% if existing_relation is none %}
12 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
13 |   {% elif existing_relation.is_view or existing_relation.is_table %}
14 |       {#-- Can't overwrite a view with a table - we must drop --#}
15 |       {{ log("Dropping relation " ~ target_relation ~ " because it is a " ~ existing_relation.type ~ " and this model is a materialized view.") }}
16 |       {% do adapter.drop_relation(existing_relation) %}
17 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
18 |   {% elif full_refresh_mode %}
19 |       {#-- create or replace not yet supported for materialized views --#}
20 |       {{ log("Dropping relation " ~ target_relation ~ " because replacing an existing materialized view is not supported.") }}
21 |       {% do adapter.drop_relation(existing_relation) %}
22 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
23 |   {% else %}
24 |       {% set build_sql = dbt_labs_materialized_views.refresh_materialized_view(target_relation, config) %}
25 |   {% endif %}
26 | 
27 |   {% if build_sql %}
28 |       {% call statement("main") %}
29 |           {{ build_sql }}
30 |       {% endcall %}
31 |   {% else %}
32 |     {{ store_result('main', 'SKIP') }}
33 |   {% endif %}
34 | 
35 |   {{ run_hooks(post_hooks) }}
36 |   
37 |   {% do persist_docs(target_relation, model) %}
38 | 
39 |   {{ return({'relations': [target_relation]}) }}
40 | 
41 | {%- endmaterialization %}
42 | 


--------------------------------------------------------------------------------
/materialized-views/macros/default/adapters.sql:
--------------------------------------------------------------------------------
 1 | {% macro create_materialized_view_as(relation, sql, config) %}
 2 |     {{ return(adapter.dispatch('create_materialized_view_as', macro_namespace = 'dbt_labs_materialized_views')(relation, sql, config)) }}
 3 | {% endmacro %}
 4 | 
 5 | {% macro default__create_materialized_view_as(relation, sql, config) -%}
 6 | 
 7 |     create materialized view {{relation}} as (
 8 |         {{sql}}
 9 |     )
10 | 
11 | {% endmacro %}
12 | 
13 | {% macro refresh_materialized_view(relation, config) %}
14 |     {{ return(adapter.dispatch('refresh_materialized_view', macro_namespace = 'dbt_labs_materialized_views')(relation, config)) }}
15 | {% endmacro %}
16 | 
17 | {% macro default__refresh_materialized_view(relation, config) -%}
18 | 
19 |     refresh materialized view {{relation}}
20 | 
21 | {% endmacro %}
22 | 
23 | {# override builtin behavior of adapter.drop_relation #}
24 | {% macro drop_relation(relation) -%}
25 |   {% set relation_type = 'materialized view' if relation.type == 'materializedview' else relation.type %}
26 |   {% call statement('drop_relation', auto_begin=False) -%}
27 |     drop {{ relation_type }} if exists {{ relation }} cascade
28 |   {%- endcall %}
29 | {% endmacro %}
30 | 


--------------------------------------------------------------------------------
/materialized-views/macros/default/materialized_view.sql:
--------------------------------------------------------------------------------
 1 | {% materialization materialized_view, default -%}
 2 | 
 3 |   {% set full_refresh_mode = (should_full_refresh()) %}
 4 | 
 5 |   {% set target_relation = this %}
 6 |   {% set existing_relation = load_relation(this) %}
 7 |   {% set tmp_relation = make_temp_relation(this) %}
 8 |   
 9 |   {{ run_hooks(pre_hooks, inside_transaction=False) }}
10 | 
11 |   -- `BEGIN` happens here:
12 |   {{ run_hooks(pre_hooks, inside_transaction=True) }}
13 | 
14 |   {% set to_drop = [] %}
15 |   
16 |   {% if existing_relation is none %}
17 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
18 |   
19 |   {% elif full_refresh_mode or existing_relation.type != 'materializedview' %}
20 |       {#-- Make sure the backup doesn't exist so we don't encounter issues with the rename below #}
21 |       {% set backup_identifier = existing_relation.identifier ~ "__dbt_backup" %}
22 |       {% set backup_relation = existing_relation.incorporate(path={"identifier": backup_identifier}) %}
23 |       {% do adapter.drop_relation(backup_relation) %}
24 | 
25 |       {% do adapter.rename_relation(target_relation, backup_relation) %}
26 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
27 |       {% do to_drop.append(backup_relation) %}
28 |   
29 |   {% else %}
30 |       {% set build_sql = dbt_labs_materialized_views.refresh_materialized_view(target_relation, config) %}
31 |   {% endif %}
32 | 
33 |   {% if build_sql %}
34 | 
35 |       {% call statement("main") %}
36 |           {{ build_sql }}
37 |       {% endcall %}
38 |       
39 |       {{ run_hooks(post_hooks, inside_transaction=True) }}
40 |       
41 |       {% do persist_docs(target_relation, model) %}
42 | 
43 |       -- `COMMIT` happens here
44 |       {% do adapter.commit() %}
45 |  
46 |   {% else %}
47 | 
48 |     {{ store_result('main', 'SKIP') }}
49 | 
50 |   {% endif %}  
51 | 
52 |   {% for rel in to_drop %}
53 |       {% do adapter.drop_relation(rel) %}
54 |   {% endfor %}
55 | 
56 |   {{ run_hooks(post_hooks, inside_transaction=False) }}
57 | 
58 |   {{ return({'relations': [target_relation]}) }}
59 | 
60 | {%- endmaterialization %}
61 | 


--------------------------------------------------------------------------------
/materialized-views/macros/postgres/adapters.sql:
--------------------------------------------------------------------------------
  1 | {% macro postgres__list_relations_without_caching(schema_relation) %}
  2 |   {% call statement('list_relations_without_caching', fetch_result=True) -%}
  3 |     select
  4 |       '{{ schema_relation.database }}' as database,
  5 |       tablename as name,
  6 |       schemaname as schema,
  7 |       'table' as type
  8 |     from pg_tables
  9 |     where schemaname ilike '{{ schema_relation.schema }}'
 10 |     union all
 11 |     select
 12 |       '{{ schema_relation.database }}' as database,
 13 |       viewname as name,
 14 |       schemaname as schema,
 15 |       'view' as type
 16 |     from pg_views
 17 |     where schemaname ilike '{{ schema_relation.schema }}'
 18 |     union all
 19 |     select
 20 |         '{{ schema_relation.database }}' as database,
 21 |         matviewname as name,
 22 |         schemaname as schema,
 23 |         'materializedview' as type
 24 |     from pg_matviews
 25 |     where schemaname ilike '{{ schema_relation.schema }}'
 26 |   {% endcall %}
 27 |   {{ return(load_result('list_relations_without_caching').table) }}
 28 | {% endmacro %}
 29 | 
 30 | 
 31 | {% macro postgres_get_relations () -%}
 32 | 
 33 |   {#
 34 |       -- in pg_depend, objid is the dependent, refobjid is the referenced object
 35 |       --  > a pg_depend entry indicates that the referenced object cannot be
 36 |       --  > dropped without also dropping the dependent object.
 37 |   #}
 38 | 
 39 |   {%- call statement('relations', fetch_result=True) -%}
 40 |     with relation as (
 41 |         select
 42 |             pg_rewrite.ev_class as class,
 43 |             pg_rewrite.oid as id
 44 |         from pg_rewrite
 45 |     ),
 46 |     class as (
 47 |         select
 48 |             oid as id,
 49 |             relname as name,
 50 |             relnamespace as schema,
 51 |             relkind as kind
 52 |         from pg_class
 53 |     ),
 54 |     dependency as (
 55 |         select
 56 |             pg_depend.objid as id,
 57 |             pg_depend.refobjid as ref
 58 |         from pg_depend
 59 |     ),
 60 |     schema as (
 61 |         select
 62 |             pg_namespace.oid as id,
 63 |             pg_namespace.nspname as name
 64 |         from pg_namespace
 65 |         where nspname != 'information_schema' and nspname not like 'pg\_%'
 66 |     ),
 67 |     referenced as (
 68 |         select
 69 |             relation.id AS id,
 70 |             referenced_class.name ,
 71 |             referenced_class.schema ,
 72 |             referenced_class.kind
 73 |         from relation
 74 |         join class as referenced_class on relation.class=referenced_class.id
 75 |         where referenced_class.kind in ('r', 'v', 'm')
 76 |     ),
 77 |     relationships as (
 78 |         select
 79 |             referenced.name as referenced_name,
 80 |             referenced.schema as referenced_schema_id,
 81 |             dependent_class.name as dependent_name,
 82 |             dependent_class.schema as dependent_schema_id,
 83 |             referenced.kind as kind
 84 |         from referenced
 85 |         join dependency on referenced.id=dependency.id
 86 |         join class as dependent_class on dependency.ref=dependent_class.id
 87 |         where
 88 |             (referenced.name != dependent_class.name or
 89 |              referenced.schema != dependent_class.schema)
 90 |     )
 91 | 
 92 |     select
 93 |         referenced_schema.name as referenced_schema,
 94 |         relationships.referenced_name as referenced_name,
 95 |         dependent_schema.name as dependent_schema,
 96 |         relationships.dependent_name as dependent_name
 97 |     from relationships
 98 |     join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id
 99 |     join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id
100 |     group by referenced_schema, referenced_name, dependent_schema, dependent_name
101 |     order by referenced_schema, referenced_name, dependent_schema, dependent_name;
102 | 
103 |   {%- endcall -%}
104 | 
105 |   {{ return(load_result('relations').table) }}
106 | {% endmacro %}
107 | 


--------------------------------------------------------------------------------
/materialized-views/macros/redshift/adapters.sql:
--------------------------------------------------------------------------------
  1 | {% macro redshift__create_materialized_view_as(relation, sql, config) -%}
  2 | 
  3 |   {%- set _dist = config.get('dist') -%}
  4 |   {%- set _sort_type = config.get(
  5 |           'sort_type',
  6 |           validator=validation.any['compound', 'interleaved']) -%}
  7 |   {%- set _sort = config.get(
  8 |           'sort',
  9 |           validator=validation.any[list, basestring]) -%}
 10 |   {%- set sql_header = config.get('sql_header', none) -%}
 11 |   {%- set auto_refresh = 'yes' if config.get('auto_refresh', false) else 'no' %}
 12 | 
 13 |   {{ sql_header if sql_header is not none }}
 14 | 
 15 |   create materialized view {{ relation }}
 16 |     {{ dist(_dist) }}
 17 |     {{ sort(_sort_type, _sort) }}
 18 |     auto refresh {{ auto_refresh }}
 19 |   as (
 20 |     {{ sql }}
 21 |   );
 22 | {%- endmacro %}
 23 | 
 24 | 
 25 | {% macro redshift__refresh_materialized_view(relation, config) -%}
 26 | 
 27 |     {%- set is_auto_refresh = config.get('auto_refresh', true) %}
 28 | 
 29 |     {%- if is_auto_refresh == false -%} {# manual refresh #}
 30 | 
 31 |         refresh materialized view {{relation}}
 32 |     
 33 |     {%- else -%} {# automatic refresh #}
 34 |     
 35 |         {%- do log("Skipping materialized view " ~ relation ~ " because it is set
 36 |             to refresh automatically") -%}
 37 |             
 38 |         {%- do return(none) -%}
 39 |     
 40 |     {%- endif -%}
 41 | 
 42 | {% endmacro %}
 43 | 
 44 | 
 45 | {% macro redshift__list_relations_without_caching(schema_relation) %}
 46 |   {#
 47 |     pretty silly, but this is the best Redshift has given us.
 48 |     we effectively can't join stv_mv_info here,
 49 |     because they're different types of sytem tables (pg_ vs. stv_)
 50 |   #}
 51 | 
 52 |   {% call statement('list_relations_without_caching', fetch_result=True) -%}
 53 |     select
 54 |       '{{ schema_relation.database }}' as database,
 55 |       tablename as name,
 56 |       schemaname as schema,
 57 |       'table' as type
 58 |     from pg_tables
 59 |     where schemaname ilike '{{ schema_relation.schema }}'
 60 |     union all
 61 |     select
 62 |       '{{ schema_relation.database }}' as database,
 63 |       viewname as name,
 64 |       schemaname as schema,
 65 |       case when definition ilike '%create materialized view%'
 66 |         then 'materializedview'
 67 |         else 'view'
 68 |         end as type
 69 |     from pg_views
 70 |     where schemaname ilike '{{ schema_relation.schema }}'
 71 |   {% endcall %}
 72 |   
 73 |   {{ return(load_result('list_relations_without_caching').table) }}
 74 | {% endmacro %}
 75 | 
 76 | 
 77 | {% macro redshift_load_relation_or_mv(relation) %}
 78 |   
 79 |   {% set rel = adapter.get_relation(
 80 |     database=relation.database,
 81 |     schema=relation.schema,
 82 |     identifier=relation.identifier
 83 |   ) -%}
 84 |   
 85 |   {% if rel.type == 'materializedview' and execute %}
 86 |   
 87 |     {# materialized views are not properly registered in pg_depend,
 88 |        so the cache can miss that they've been dropped
 89 |        https://github.com/awslabs/amazon-redshift-utils/issues/499 #}
 90 | 
 91 |     {% set hard_check_mv_sql %}
 92 | 
 93 |         select count(*) from stv_mv_info
 94 |         where schema = '{{ rel.schema }}'
 95 |         and name = '{{ rel.identifier }}'
 96 | 
 97 |     {% endset %}
 98 | 
 99 |     {% set result = run_query(hard_check_mv_sql)[0][0] %}
100 |     {% set mv_rel = rel if result > 0 else none %}
101 |     {% do return(mv_rel) %}
102 |   
103 |   {% else %}
104 |   
105 |     {% do return(rel) %}
106 |   
107 |   {% endif %}
108 | 
109 | {% endmacro %}
110 | 
111 | 


--------------------------------------------------------------------------------
/materialized-views/macros/snowflake/adapters.sql:
--------------------------------------------------------------------------------
 1 | {% macro snowflake__create_materialized_view_as(relation, sql, config) -%}
 2 |     {%- set secure = config.get('secure', default=false) -%}
 3 |     {%- set cluster_by_keys = config.get('cluster_by', default=none) -%}
 4 |     {%- set enable_automatic_clustering = config.get('automatic_clustering', default=false) -%}
 5 |     {%- set sql_header = config.get('sql_header', none) -%}
 6 | 
 7 |     {%- if cluster_by_keys is not none and cluster_by_keys is string -%}
 8 |         {%- set cluster_by_keys = [cluster_by_keys] -%}
 9 |     {%- endif -%}
10 |     {%- if cluster_by_keys is not none -%}
11 |         {%- set cluster_by_string = cluster_by_keys|join(", ")-%}
12 |     {% else %}
13 |         {%- set cluster_by_string = none -%}
14 |     {%- endif -%}
15 | 
16 |     {{ sql_header if sql_header is not none }}
17 | 
18 |     create or replace 
19 |         {% if secure -%} secure {%- endif %} 
20 |         materialized view {{relation}}
21 |     as (
22 |         {{sql}}
23 |     );
24 |     
25 |     {% if cluster_by_string is not none and not temporary -%}
26 |       alter materialized view {{relation}} cluster by ({{cluster_by_string}});
27 |     {%- endif -%}
28 |     {% if enable_automatic_clustering and cluster_by_string is not none and not temporary  -%}
29 |       alter materialized view {{relation}} resume recluster;
30 |     {%- endif -%}
31 | 
32 | {% endmacro %}
33 | 


--------------------------------------------------------------------------------
/materialized-views/macros/snowflake/materialized_view.sql:
--------------------------------------------------------------------------------
 1 | {% materialization materialized_view, adapter='snowflake' -%}
 2 | 
 3 |   {% set original_query_tag = set_query_tag() %}
 4 | 
 5 |   {% set full_refresh_mode = (should_full_refresh()) %}
 6 | 
 7 |   {% set target_relation = this %}
 8 |   {% set existing_relation = load_relation(this) %}
 9 |   {% set tmp_relation = make_temp_relation(this) %}
10 | 
11 |   {{ run_hooks(pre_hooks) }}
12 | 
13 |   {% if (existing_relation is none or full_refresh_mode) %}
14 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
15 |   {% elif existing_relation.is_view or existing_relation.is_table %}
16 |       {#-- Can't overwrite a view with a table - we must drop --#}
17 |       {{ log("Dropping relation " ~ target_relation ~ " because it is a " ~ existing_relation.type ~ " and this model is a materialized view.") }}
18 |       {% do adapter.drop_relation(existing_relation) %}
19 |       {% set build_sql = dbt_labs_materialized_views.create_materialized_view_as(target_relation, sql, config) %}
20 |   {% else %}
21 |       {# noop #}
22 |   {% endif %}
23 |   
24 |   {% if build_sql %}
25 |       {% call statement("main") %}
26 |           {{ build_sql }}
27 |       {% endcall %}
28 |   {% else %}
29 |     {{ store_result('main', 'SKIP') }}
30 |   {% endif %}
31 | 
32 |   {{ run_hooks(post_hooks) }}
33 |   
34 |   {% do persist_docs(target_relation, model) %}
35 |   
36 |   {% do unset_query_tag(original_query_tag) %}
37 | 
38 |   {{ return({'relations': [target_relation]}) }}
39 | 
40 | {%- endmaterialization %}
41 | 


--------------------------------------------------------------------------------
/read-external-iceberg/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | dbt_packages/
3 | logs/
4 | user.yml
5 | profiles.yml
6 | .DS_Store
7 | *.pyc
8 | __pycache__/ 


--------------------------------------------------------------------------------
/read-external-iceberg/README.md:
--------------------------------------------------------------------------------
 1 | # Reading external, unmanaged Iceberg tables as Sources
 2 | 
 3 | > [!WARNING]  
 4 | > This feature is experimental and subject to change at any time
 5 | 
 6 | An experimental extension to [dbt-labs/dbt-external-tables](https://github.com/dbt-labs/dbt-external-tables) that adds support for creating Iceberg tables pointing to external catalogs unmanaged by the warehouse of a dbt project.
 7 | 
 8 | for more context: see this discussion: [dbt-core#11171: Just the tip of the Iceberg](https://github.com/dbt-labs/dbt-core/discussions/11171) 
 9 | 
10 | 
11 | ## Supported databases
12 | 
13 | * Snowflake
14 | 
15 | ## Installation
16 | 
17 | ### Install this project as a package ([package-management docs](https://docs.getdbt.com/docs/building-a-dbt-project/package-management))
18 |   - [Local package](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#local-packages): by referencing this [`read-external-iceberg/`](https://github.com/dbt-labs/dbt-labs-experimental-features/tree/master/read-external-iceberg) folder.
19 |   - [Git package](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#git-packages) using [project subdirectories](https://docs.getdbt.com/docs/building-a-dbt-project/package-management#git-packages): again by referencing the [`read-external-iceberg`](https://github.com/dbt-labs/dbt-labs-experimental-features/tree/master/read-external-iceberg) folder.
20 | 
21 | ### Copy-paste the files from `macros/` into your own project
22 | 
23 | specifically those in `plugins/snowflake/`
24 | 
25 | 
26 | ## Configuration
27 | 
28 | You'll need some form of the below to make sure it works
29 | 
30 | ```yml
31 | dispatch:
32 |   - macro_namespace: dbt
33 |     search_order:
34 |       - <YOUR_PROJECT_NAME>
35 |       - read_external_iceberg #if you're installing as a pacakge
36 |       - dbt_external_tables
37 |       - dbt
38 | ```
39 | 
40 | 
41 | ## Usage
42 | 
43 | The exact same as [dbt-labs/dbt-external-tables](https://github.com/dbt-labs/dbt-external-tables)!
44 | 
45 | ## Sample usage
46 | 
47 | 
48 | ```yml
49 | version: 2
50 | sources:
51 |   - name: snowplow
52 |     database: analytics
53 |     schema: snowplow_external
54 |     loader: S3
55 |     loaded_at_field: collector_hour
56 |     
57 |     tables:
58 |       - name: my_iceberg_table
59 |         description: |
60 |           Iceberg table using an external AWS Glue or REST catalog
61 |           Additional Details: https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table#external-iceberg-catalog
62 |         external:
63 |           table_format: iceberg
64 |           # existing external volume
65 |           external_volume: my_external_volume                     
66 |           # existing catalog integration
67 |           catalog: my_catalog_integration
68 |           # name of the table in the external catalog
69 |           catalog_table_name: my_iceberg_table                  
70 |           # namespace of the namespace in the external catalog
71 |           # Hint: in AWS Glue this is the "Database"
72 |           catalog_namespace: my_iceberg_table_namespace
73 |           # optional; Specifies whether to replace invalid UTF-8 characters withthe Unicode replacement character in query results
74 |           replace_invalid_characters: true
75 |           # optional; Specifies whether Snowflake should automatically poll the external Iceberg catalog
76 |           # associated with the table for metadata updates when you use automated refresh
77 |           auto_refresh: true
78 |           # optional; Specifies a co
79 | ```


--------------------------------------------------------------------------------
/read-external-iceberg/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'read_external_iceberg'
 2 | version: '1.0.0'
 3 | config-version: 2
 4 | 
 5 | profile: 'read_external_iceberg'
 6 | 
 7 | model-paths: ["models"]
 8 | analysis-paths: ["analyses"]
 9 | test-paths: ["tests"]
10 | seed-paths: ["seeds"]
11 | macro-paths: ["macros"]
12 | snapshot-paths: ["snapshots"]
13 | 
14 | target-path: "target"
15 | clean-targets:
16 |   - "target"
17 |   - "dbt_packages"
18 | 


--------------------------------------------------------------------------------
/read-external-iceberg/macros/plugins/snowflake/create_iceberg_source.sql:
--------------------------------------------------------------------------------
 1 | {% macro snowflake_create_iceberg_source(source_node) %}
 2 | 
 3 |     {% set relation = api.Relation.create(
 4 |         database = source_node.database,
 5 |         schema = source_node.schema,
 6 |         identifier = source_node.identifier
 7 |     ) %}
 8 | 
 9 |     {% set required_configs = ['external_volume', 'catalog', 'catalog_table_name', 'catalog_namespace'] %}
10 |     {% set optional_configs = ['replace_invalid_characters', 'auto_refresh', 'comment'] %}
11 | 
12 |     {% set ddl %}
13 |         create or replace iceberg table {{ relation }}
14 |         {% for config in required_configs %}
15 |             {{ config }} = '{{ source_node.external.get(config) }}'
16 |         {%- endfor -%}
17 | 
18 |         {% for config in optional_configs %}
19 |             {% if config in source_node.external -%}
20 | 
21 |                 {%- if source_node.external.get(config) is boolean -%}
22 |                     {{ config }} = {{ source_node.external.get(config) }}
23 | 
24 |                 {%- else -%}
25 |                     {{ config }} = '{{ source_node.external.get(config) }}'
26 |                 {%- endif -%}
27 | 
28 |             {%- endif -%}
29 |         {%- endfor -%}
30 |         
31 |         ;
32 |     {% endset %}
33 | 
34 |     {{ ddl }}
35 | 
36 | {% endmacro %}


--------------------------------------------------------------------------------
/read-external-iceberg/macros/plugins/snowflake/get_external_build_plan.sql:
--------------------------------------------------------------------------------
 1 | {% macro snowflake__get_external_build_plan(source_node) %}
 2 | 
 3 |     {% set build_plan = [] %}
 4 |     
 5 |     {% set old_relation = adapter.get_relation(
 6 |         database = source_node.database,
 7 |         schema = source_node.schema,
 8 |         identifier = source_node.identifier
 9 |     ) %}
10 |     
11 |     {% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %}
12 | 
13 |     {% if source_node.external.get('table_format') == 'iceberg' %}
14 | 
15 |         {% set build_plan = build_plan + [
16 |             dbt_external_tables.create_external_schema(source_node),
17 |             dbt_external_tables.snowflake_create_iceberg_source(source_node)
18 |         ] %}
19 | 
20 |     {% elif source_node.external.get('snowpipe', none) is not none %}
21 |     
22 |         {% if create_or_replace %}
23 |             {% set build_plan = build_plan + [
24 |                 dbt_external_tables.create_external_schema(source_node),
25 |                 dbt_external_tables.snowflake_create_empty_table(source_node),
26 |                 dbt_external_tables.snowflake_get_copy_sql(source_node, explicit_transaction=true),
27 |                 dbt_external_tables.snowflake_create_snowpipe(source_node)
28 |             ] %}
29 |         {% else %}
30 |             {% set build_plan = build_plan + dbt_external_tables.snowflake_refresh_snowpipe(source_node) %}
31 |         {% endif %}
32 | 
33 |     {% else %}
34 |     
35 |         {% if create_or_replace %}
36 |             {% set build_plan = build_plan + [
37 |                 dbt_external_tables.create_external_schema(source_node),
38 |                 dbt_external_tables.create_external_table(source_node)
39 |             ] %}
40 |         {% else %}
41 |             {% set build_plan = build_plan + dbt_external_tables.refresh_external_table(source_node) %}
42 |         {% endif %}
43 |         
44 |     {% endif %}
45 | 
46 |     {% do return(build_plan) %}
47 | 
48 | {% endmacro %}
49 | 


--------------------------------------------------------------------------------
/read-external-iceberg/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_external_tables
3 |     version: 0.11.1  # Using a recent stable version 


--------------------------------------------------------------------------------
/snapshot-testing/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_modules/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/snapshot-testing/README.md:
--------------------------------------------------------------------------------
 1 | # Using snapshots to detect dbt model regressions
 2 | This dbt project is a worked example to demonstrate how to use snapshots to detect dbt model regressions. **Check out the full write-up [on Discourse](to-do).**
 3 | 
 4 | The SQL in this project is compatible with Snowflake¹.
 5 | 
 6 | If you want to run this project yourself to play with it (assuming you have
 7 | dbt installed):
 8 | 1. Clone this repo.
 9 | 2. `cd` into this directory
10 | 2. Create a profile named `acme`, or update the `profile:` key in the `dbt_project.yml` file to point to an existing profile ([docs](https://docs.getdbt.com/docs/configure-your-profile)).
11 | 3. Run `dbt seed`.
12 | 4. Run `dbt snapshot`.
13 | 4. Run `dbt test` — no test failures should occur.
14 | 5. Run `dbt snapshot` a second time — on this run, a regression should be introduced.
15 | 6. Run `dbt test` to see the failure.
16 | 7. Run `dbt run-operation historic_revenue_snapshot_cleanup` to move the rogue record into an audit table.
17 | 8. Run `dbt test` again to see the healed failure.
18 | 
19 | -----
20 | 1. We decided to _not_ check that the SQL in this project is multi-warehouse compatible — it _might_ work on other warehouses!
21 | 


--------------------------------------------------------------------------------
/snapshot-testing/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/snapshot-testing/data/.gitkeep


--------------------------------------------------------------------------------
/snapshot-testing/data/fct_orders.csv:
--------------------------------------------------------------------------------
1 | order_id,customer_id,order_date,amount
2 | 1,1,2020-01-01,10
3 | 2,3,2020-01-01,20
4 | 3,94,2020-01-02,1
5 | 4,50,2020-01-03,25
6 | 5,64,2020-01-03,17
7 | 6,54,2020-01-04,6
8 | 


--------------------------------------------------------------------------------
/snapshot-testing/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'acme'
 2 | # config-version: 2
 3 | version: 1.0.0
 4 | require-dbt-version: ">=0.17.1"
 5 | 
 6 | profile: acme
 7 | 
 8 | source-paths: ["models"]
 9 | analysis-paths: ["analysis"]
10 | test-paths: ["tests"]
11 | data-paths: ["data"]
12 | macro-paths: ["macros"]
13 | snapshot-paths: ["snapshots"]
14 | 
15 | target-path: "target"  # directory which will store compiled SQL files
16 | clean-targets:         # directories to be removed by `dbt clean`
17 |     - "target"
18 |     - "dbt_modules"
19 | 


--------------------------------------------------------------------------------
/snapshot-testing/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/snapshot-testing/macros/.gitkeep


--------------------------------------------------------------------------------
/snapshot-testing/macros/historic_revenue_snapshot_cleanup.sql:
--------------------------------------------------------------------------------
 1 | {% macro historic_revenue_snapshot_cleanup() %}
 2 |     {% set create_sql %}
 3 |         create table if not exists dbt_snapshots.historic_revenue_snapshot_invalidated as (
 4 |             select
 5 |                 *,
 6 |                 current_timestamp as _inserted_at
 7 |             from {{ ref('historic_revenue_snapshot') }}
 8 |             limit 0
 9 |         )
10 |     {% endset %}
11 |     {% set insert_sql %}
12 |         insert into dbt_snapshots.historic_revenue_snapshot_invalidated (
13 |             select
14 |                 *,
15 |                 current_timestamp as _inserted_at
16 |             from {{ ref('historic_revenue_snapshot') }}
17 |             where dbt_valid_to is not null
18 |         );
19 |     {% endset %}
20 | 
21 |   {% set delete_sql %}
22 |   delete from {{ ref('historic_revenue_snapshot') }} where dbt_valid_to is not null
23 |   {% endset %}
24 | 
25 |   {% do run_query('begin') %}
26 |   {% do run_query(create_sql) %}
27 |   {% do run_query(insert_sql) %}
28 |   {% do run_query(delete_sql) %}
29 |   {% do run_query('commit') %}
30 | 
31 | {% endmacro %}
32 | 


--------------------------------------------------------------------------------
/snapshot-testing/macros/test_is_null.sql:
--------------------------------------------------------------------------------
 1 | {% macro test_is_null(model) %}
 2 | 
 3 | {% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
 4 | 
 5 | select count(*) as validation_errors
 6 | from {{ model }}
 7 | where not({{ column_name }} is null)
 8 | 
 9 | {% endmacro %}
10 | 


--------------------------------------------------------------------------------
/snapshot-testing/snapshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/snapshot-testing/snapshots/.gitkeep


--------------------------------------------------------------------------------
/snapshot-testing/snapshots/historic_revenue_snapshot.sql:
--------------------------------------------------------------------------------
 1 | {% snapshot historic_revenue_snapshot %}
 2 | 
 3 |     {{
 4 |         config(
 5 |           target_schema='dbt_snapshots',
 6 |           strategy='check',
 7 |           unique_key='date_day',
 8 |           check_cols=['total_revenue']
 9 |         )
10 |     }}
11 | 
12 |     select
13 |         order_date as date_day,
14 |         sum(amount) as total_revenue
15 |     from {{ ref('fct_orders') }}
16 | 
17 |     {# we're going to use this hack to make a record disappear on the second run of this #}
18 |     {% if adapter.get_relation(this.database, this.schema, this.table) is not none %}
19 |     where order_id != 4
20 |     {% endif %}
21 | 
22 |     group by 1
23 | 
24 | 
25 | {% endsnapshot %}
26 | 


--------------------------------------------------------------------------------
/snapshot-testing/snapshots/schema.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | snapshots:
4 |   - name: historic_revenue_snapshot
5 |     columns:
6 |       - name: dbt_valid_to
7 |         tests:
8 |           - is_null
9 | 


--------------------------------------------------------------------------------
/snapshot-testing/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dbt-labs/dbt-labs-experimental-features/458f0a49f165e55f5dcac45e54226f215fda3d07/snapshot-testing/tests/.gitkeep


--------------------------------------------------------------------------------