├── README.md └── learn-dbt ├── .gitignore ├── README.md ├── analysis └── .gitkeep ├── data ├── .gitkeep └── country_codes.csv ├── dbt_project.yml ├── macros ├── .gitkeep ├── group_by.sql ├── renaming_segments.sql └── suspend_warehouse.sql ├── models ├── example │ ├── cumulative_orders_by_date.sql │ ├── incremental_time.sql │ ├── my_first_dbt_model.sql │ ├── my_second_dbt_model.sql │ ├── playing_with_tests.sql │ ├── schema.yml │ └── snowflake_customer_purchases.sql └── new │ ├── dates.sql │ └── schema.yml ├── snapshots ├── .gitkeep └── first_model_snapshot.sql └── tests ├── .gitkeep ├── assert_under_100m.sql └── assert_under_10_percent_null.sql /README.md: -------------------------------------------------------------------------------- 1 | # Learn DBT from Scratch 2 | ### JR Tests 3 | 4 | This repository accompanies my Learn DBT from Scratch course. Throughout the course, some of the topics we cover include: 5 | 1. Setting up DBT 6 | 2. Connecting DBT to Snowflake 7 | 3. Getting Started with DBT Models & Tests 8 | 4. Deploying on a Schedule & DBT Cloud 9 | 5. Advanced DBT Topics 10 | 6. Best Practices 11 | 12 | The files found in this repository are the same ones that are used throughout the course. Feel free to reference any of the code here if you like. I do recommend trying to code along with the course before copying from this repository. Also, some of the models (such as the example models) get used over and over again, so the final code here might not match the code in a given lesson. 13 | -------------------------------------------------------------------------------- /learn-dbt/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /learn-dbt/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | Try running the following commands: 6 | - dbt run 7 | - dbt test 8 | 9 | 10 | ### Resources: 11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/overview) 12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 13 | - Join the [chat](http://slack.getdbt.com/) on Slack for live discussions and support 14 | - Find [dbt events](https://events.getdbt.com) near you 15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 16 | -------------------------------------------------------------------------------- /learn-dbt/analysis/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyholtzman/jrtests-learn-dbt/963ba7861013520d1bfb8f80cf6851683a00f109/learn-dbt/analysis/.gitkeep -------------------------------------------------------------------------------- /learn-dbt/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyholtzman/jrtests-learn-dbt/963ba7861013520d1bfb8f80cf6851683a00f109/learn-dbt/data/.gitkeep -------------------------------------------------------------------------------- /learn-dbt/data/country_codes.csv: -------------------------------------------------------------------------------- 1 | country_code,country_name 2 | US,United States 3 | MX,Mexico 4 | GB,Great Britain -------------------------------------------------------------------------------- /learn-dbt/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'jrtests_learn_dbt' 6 | version: '1.0.0' 7 | 8 | # This setting configures which "profile" dbt uses for this project. 9 | profile: 'tutorial-jrtests-snowflake-db' 10 | 11 | # These configurations specify where dbt should look for different types of files. 12 | # The `source-paths` config, for example, states that models in this project can be 13 | # found in the "models/" directory. You probably won't need to change these! 14 | source-paths: ["models"] 15 | analysis-paths: ["analysis"] 16 | test-paths: ["tests"] 17 | data-paths: ["data"] 18 | macro-paths: ["macros"] 19 | snapshot-paths: ["snapshots"] 20 | 21 | target-path: "target" # directory which will store compiled SQL files 22 | clean-targets: # directories to be removed by `dbt clean` 23 | - "target" 24 | - "dbt_modules" 25 | 26 | on-run-start: 27 | - "create table if not exists audit (model text, state text, time timestamp_ltz)" 28 | 29 | on-run-end: 30 | - 'grant usage on schema analytics.dbt to role analyst' 31 | - 'grant select on all tables in schema analytics.dbt to role analyst' 32 | - 'grant select on all views in schema analytics.dbt to role analyst' 33 | 34 | 35 | # Configuring models 36 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 37 | 38 | # In this example config, we tell dbt to build all models in the example/ directory 39 | # as tables. These settings can be overridden in the individual model files 40 | # using the `{{ config(...) }}` macro. 41 | models: 42 | jrtests_learn_dbt: 43 | # Applies to all files under models/example/ 44 | example: 45 | materialized: table 46 | vars: 47 | my_first_variable: True 48 | my_second_variable: 2020 49 | my_third_variable: 1 50 | pre-hook: "insert into dbt.audit (model, state, time) values ('{{this.name}}', 'starting model deployment', current_timestamp)" 51 | -------------------------------------------------------------------------------- /learn-dbt/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyholtzman/jrtests-learn-dbt/963ba7861013520d1bfb8f80cf6851683a00f109/learn-dbt/macros/.gitkeep -------------------------------------------------------------------------------- /learn-dbt/macros/group_by.sql: -------------------------------------------------------------------------------- 1 | {% macro group_by(n) %} 2 | GROUP BY 3 | {% for i in range(1, n + 1) %} 4 | {{ i }} 5 | {% if not loop.last %} , {% endif %} 6 | {% endfor %} 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /learn-dbt/macros/renaming_segments.sql: -------------------------------------------------------------------------------- 1 | {% macro rename_segments(column_name) %} 2 | CASE 3 | WHEN {{column_name}} in ('BUILDING', 'HOUSEHOLD', 'FURNITURE') 4 | THEN 'segment_1' 5 | ELSE 'segment_2' 6 | END 7 | {% endmacro %} 8 | -------------------------------------------------------------------------------- /learn-dbt/macros/suspend_warehouse.sql: -------------------------------------------------------------------------------- 1 | {% macro suspend(warehouse_name) %} 2 | 3 | {% set sql %} 4 | alter warehouse {{warehouse_name}} suspend 5 | {% endset %} 6 | 7 | {% set table = run_query(sql) %} 8 | {% do table.print_table() %} 9 | 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /learn-dbt/models/example/cumulative_orders_by_date.sql: -------------------------------------------------------------------------------- 1 | with orders as ( 2 | SELECT * 3 | FROM {{source('sample', 'orders')}} 4 | 5 | ) 6 | 7 | select distinct 8 | o_orderdate, 9 | sum(o_totalprice) over (order by o_orderdate) as cumulative_sales 10 | 11 | from orders 12 | 13 | {% if target.name == 'dev' %} 14 | where year(o_orderdate) = 1996 15 | {% endif %} 16 | 17 | order by o_orderdate 18 | -------------------------------------------------------------------------------- /learn-dbt/models/example/incremental_time.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='incremental', unique_key='t_time') }} 2 | 3 | select * 4 | from "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF10TCL"."TIME_DIM" 5 | where to_time(concat(T_HOUR::varchar, ':', T_MINUTE, ':', T_SECOND)) <= current_time 6 | 7 | {% if is_incremental() %} 8 | and t_time > (select max(t_time) from {{ this }}) 9 | {% endif %} 10 | -------------------------------------------------------------------------------- /learn-dbt/models/example/my_first_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ config(materialized='table', alias='first_model', tags=["nightly", "example"] ) }} 3 | 4 | with source_data as ( 5 | 6 | select 1 as id, 'NJ' as state, '2020-02-01 00:01:00.000'::timestamp as updated_at 7 | union all 8 | select null as id, 'CT' as state, '2020-01-01 00:00:00.000'::timestamp as updated_at 9 | union all 10 | select 4 as id, 'VT' as state, '2020-01-01 00:00:00.000'::timestamp as updated_at 11 | 12 | ) 13 | 14 | select * 15 | from source_data 16 | -------------------------------------------------------------------------------- /learn-dbt/models/example/my_second_dbt_model.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Use the `ref` function to select from other models 3 | 4 | select * 5 | from {{ ref('my_first_dbt_model') }} 6 | -------------------------------------------------------------------------------- /learn-dbt/models/example/playing_with_tests.sql: -------------------------------------------------------------------------------- 1 | with sample_customer as ( 2 | SELECT * 3 | FROM {{ source('sample2', 'customer') }} 4 | ) 5 | 6 | select 7 | c_custkey, 8 | c_mktsegment, 9 | {{rename_segments('c_mktsegment')}} mkt_segment_adjusted, 10 | c_acctbal 11 | from sample_customer 12 | -------------------------------------------------------------------------------- /learn-dbt/models/example/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: my_first_dbt_model 6 | description: "A starter dbt model" 7 | columns: 8 | - name: id 9 | description: "The primary key for this table" 10 | tests: 11 | - unique 12 | 13 | - name: my_second_dbt_model 14 | description: "A starter dbt model" 15 | columns: 16 | - name: id 17 | description: "The primary key for this table" 18 | tests: 19 | - unique 20 | - relationships: 21 | to: ref('my_first_dbt_model') 22 | field: id 23 | - name: snowflake_customer_purchases 24 | description: "the total purchases for each customer in snowflake sample data" 25 | columns: 26 | - name: c_custkey 27 | tests: 28 | - unique 29 | - not_null 30 | - name: cumulative_orders_by_date 31 | - name: incremental_time 32 | - name: playing_with_tests 33 | columns: 34 | - name: c_custkey 35 | tests: 36 | - unique 37 | - not_null 38 | - name: c_mktsegment 39 | tests: 40 | - accepted_values: 41 | values: ['BUILDING', 'AUTOMOBILE', 'MACHINERY', 'HOUSEHOLD', 'FURNITURE'] 42 | 43 | sources: 44 | - name: sample 45 | database: snowflake_sample_data # Tell dbt to look for the source in the "raw" database 46 | schema: tpch_sf1 47 | tables: 48 | - name: customer 49 | columns: 50 | - name: c_custkey 51 | tests: 52 | - unique 53 | - not_null 54 | - name: orders 55 | - name: sample2 56 | database: snowflake_sample_data 57 | schema: tpch_sf001 58 | tables: 59 | - name: customer 60 | -------------------------------------------------------------------------------- /learn-dbt/models/example/snowflake_customer_purchases.sql: -------------------------------------------------------------------------------- 1 | with sample_customer as ( 2 | SELECT * 3 | FROM {{ source('sample', 'customer') }} 4 | ), 5 | 6 | sample_orders as ( 7 | SELECT * 8 | FROM {{ source('sample', 'orders') }} 9 | ) 10 | 11 | SELECT 12 | c.c_custkey, 13 | c.c_name, 14 | c.c_nationkey as nation, 15 | sum(o.o_totalprice) as total_order_price 16 | from sample_customer c 17 | LEFT JOIN sample_orders o 18 | ON c.c_custkey = o.o_custkey 19 | 20 | {{group_by(3)}} 21 | -------------------------------------------------------------------------------- /learn-dbt/models/new/dates.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='incremental', unique_key = 'd_date') }} 2 | 3 | 4 | select * 5 | from "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF10TCL"."DATE_DIM" 6 | where d_date <= current_date 7 | 8 | {% if is_incremental() %} 9 | and d_date > (select max(d_date) from {{ this }}) 10 | {% endif %} 11 | -------------------------------------------------------------------------------- /learn-dbt/models/new/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: dates 6 | -------------------------------------------------------------------------------- /learn-dbt/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyholtzman/jrtests-learn-dbt/963ba7861013520d1bfb8f80cf6851683a00f109/learn-dbt/snapshots/.gitkeep -------------------------------------------------------------------------------- /learn-dbt/snapshots/first_model_snapshot.sql: -------------------------------------------------------------------------------- 1 | {% snapshot first_model_snapshot %} 2 | {{ 3 | config( 4 | target_database='analytics', 5 | target_schema='snapshots', 6 | unique_key='id', 7 | 8 | strategy='timestamp', 9 | updated_at='updated_at', 10 | ) 11 | }} 12 | 13 | -- Pro-Tip: Use sources in snapshots! 14 | select * from {{ ref('my_first_dbt_model') }} 15 | 16 | {% endsnapshot %} 17 | -------------------------------------------------------------------------------- /learn-dbt/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyholtzman/jrtests-learn-dbt/963ba7861013520d1bfb8f80cf6851683a00f109/learn-dbt/tests/.gitkeep -------------------------------------------------------------------------------- /learn-dbt/tests/assert_under_100m.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(c_acctbal) as total 2 | FROM {{ ref('playing_with_tests') }} 3 | 4 | HAVING sum(c_acctbal) >= 100000000 5 | -------------------------------------------------------------------------------- /learn-dbt/tests/assert_under_10_percent_null.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(case when id is null then 1 else 0 end) / count(*) as total_nulls 3 | 4 | FROM {{ ref('my_first_dbt_model') }} 5 | 6 | having sum(case when id is null then 1 else 0 end) / count(*) > .4 7 | --------------------------------------------------------------------------------