├── integration_tests ├── tmp │ └── materialized │ │ └── .gitkeep ├── .gitignore ├── packages.yml ├── package-lock.yml ├── models │ ├── last_ever │ │ ├── last_ever.yml │ │ └── dataset__last_ever_1.sql │ ├── last_after │ │ ├── last_after.yml │ │ └── dataset__last_after_1.sql │ ├── last_before │ │ ├── last_before.yml │ │ └── dataset__last_before_1.sql │ ├── first_before │ │ ├── first_before.yml │ │ └── dataset__first_before_1.sql │ ├── aggregate_after │ │ ├── aggregate_after.yml │ │ └── dataset__aggregate_after_1.sql │ ├── aggregate_before │ │ ├── aggregate_before.yml │ │ └── dataset__aggregate_before_1.sql │ ├── aggregate_all_ever │ │ ├── aggregate_all_ever.yml │ │ └── dataset__aggregate_all_ever_1.sql │ ├── aggregate_in_between │ │ ├── aggregate_in_between.yml │ │ └── dataset__aggregate_in_between_1.sql │ ├── nth_ever │ │ ├── nth_ever.yml │ │ └── dataset__nth_ever_1.sql │ ├── last_in_between │ │ ├── dataset__last_in_between_1.sql │ │ ├── dataset__last_in_between_3.sql │ │ ├── dataset__last_in_between_2.sql │ │ └── last_in_between.yml │ ├── first_after │ │ ├── dataset__first_after_1.sql │ │ ├── dataset__first_after_2.sql │ │ ├── first_after.yml │ │ └── dataset__first_after_3.sql │ ├── first_in_between │ │ ├── dataset__first_in_between_1.sql │ │ ├── dataset__first_in_between_2.sql │ │ ├── first_in_between.yml │ │ └── dataset__first_in_between_3.sql │ └── first_ever │ │ ├── first_ever.yml │ │ └── dataset__first_ever_1.sql ├── seeds │ ├── aggregate_after │ │ ├── output │ │ │ └── output__aggregate_after_1.csv │ │ └── input__aggregate_after.csv │ ├── aggregate_all_ever │ │ ├── output │ │ │ └── output__aggregate_all_ever_1.csv │ │ └── input__aggregate_all_ever.csv │ ├── nth_ever │ │ ├── output │ │ │ └── output__nth_ever_1.csv │ │ └── input__nth_ever.csv │ ├── aggregate_before │ │ ├── output │ │ │ └── output__aggregate_before_1.csv │ │ └── input__aggregate_before.csv │ ├── last_after │ │ ├── output │ │ │ └── output__last_after_1.csv │ │ └── input__last_after.csv │ ├── last_in_between │ │ ├── output │ │ │ ├── output__last_in_between_1.csv │ │ │ ├── output__last_in_between_2.csv │ │ │ └── output__last_in_between_3.csv │ │ └── input__last_in_between.csv │ ├── first_in_between │ │ ├── output │ │ │ ├── output__first_in_between_1.csv │ │ │ ├── output__first_in_between_2.csv │ │ │ └── output__first_in_between_3.csv │ │ └── input__first_in_between.csv │ ├── first_after │ │ ├── output │ │ │ ├── output__first_after_1.csv │ │ │ ├── output__first_after_3.csv │ │ │ └── output__first_after_2.csv │ │ └── input__first_after.csv │ ├── aggregate_in_between │ │ ├── output │ │ │ └── output__aggregate_in_between_1.csv │ │ └── input__aggregate_in_between.csv │ ├── last_ever │ │ └── output │ │ │ └── output__last_ever_1.csv │ ├── first_ever │ │ └── output │ │ │ └── output__first_ever_1.csv │ ├── last_before │ │ └── output │ │ │ └── output__last_before_1.csv │ ├── first_before │ │ └── output │ │ │ └── output__first_before_1.csv │ └── example__activity_stream.csv ├── profiles.yml └── dbt_project.yml ├── macros ├── utils │ ├── aliasing │ │ ├── alias_joined_activity.sql │ │ ├── alias_column.sql │ │ ├── alias_cte.sql │ │ └── alias_appended_activity.sql │ ├── aggregations │ │ ├── sum.sql │ │ ├── count.sql │ │ ├── max.sql │ │ ├── min.sql │ │ └── _min_or_max.sql │ ├── constants │ │ ├── appended.sql │ │ └── primary.sql │ ├── helpers │ │ ├── ltrim.sql │ │ ├── parse_column.sql │ │ └── json_unpack_key.sql │ └── columns.sql ├── relationships │ ├── all_ever.sql │ ├── append_only │ │ ├── aggregate_all_ever.sql │ │ ├── last_after.sql │ │ ├── first_after.sql │ │ ├── last_before.sql │ │ ├── aggregate_after.sql │ │ ├── aggregate_before.sql │ │ ├── first_before.sql │ │ ├── last_in_between.sql │ │ ├── first_in_between.sql │ │ └── aggregate_in_between.sql │ ├── first_ever.sql │ ├── last_ever.sql │ └── nth_ever.sql ├── activity.sql └── dataset.sql ├── .gitattributes ├── .gitignore ├── scripts └── ci.sh ├── .github ├── actions │ └── run_ci │ │ └── action.yml └── workflows │ └── ci.yml ├── pyproject.toml ├── dbt_project.yml ├── Dockerfile ├── .devcontainer └── devcontainer.json ├── README.md └── LICENSE /integration_tests/tmp/materialized/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /macros/utils/aliasing/alias_joined_activity.sql: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.sql linguist-detectable 2 | *.sql linguist-language=sql 3 | -------------------------------------------------------------------------------- /macros/utils/aggregations/sum.sql: -------------------------------------------------------------------------------- 1 | {% macro sum() %} 2 | sum({{ caller() }}) 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/aggregations/count.sql: -------------------------------------------------------------------------------- 1 | {% macro count() %} 2 | count({{ caller() }}) 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /integration_tests/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dbt_packages/ 3 | logs/ 4 | tmp/materialized/*.csv 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /macros/utils/constants/appended.sql: -------------------------------------------------------------------------------- 1 | {% macro appended() %} 2 | {% do return("appended") %} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/utils/constants/primary.sql: -------------------------------------------------------------------------------- 1 | {% macro primary() %} 2 | 3 | {% do return("stream") %} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | 4 | - package: dbt-labs/dbt_utils 5 | version: 1.0.0 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dbt_modules/ 3 | logs/ 4 | .env 5 | .user.yml 6 | .vscode 7 | .dbt_env 8 | .venv 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /macros/utils/aggregations/max.sql: -------------------------------------------------------------------------------- 1 | {% macro max() %} 2 | 3 | {% do return(dbt_activity_schema._min_or_max("max", caller())) %} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /macros/utils/aggregations/min.sql: -------------------------------------------------------------------------------- 1 | {% macro min() %} 2 | 3 | {% do return(dbt_activity_schema._min_or_max("min", caller())) %} 4 | 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /scripts/ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -ex 3 | 4 | main () { 5 | cd integration_tests 6 | dbt deps 7 | dbt build -x 8 | } 9 | 10 | main 11 | -------------------------------------------------------------------------------- /.github/actions/run_ci/action.yml: -------------------------------------------------------------------------------- 1 | name: 'Run CI' 2 | description: 'Run the CI using the main Dockerfile.' 3 | runs: 4 | using: 'docker' 5 | image: '../../../Dockerfile' 6 | -------------------------------------------------------------------------------- /integration_tests/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | - package: dbt-labs/dbt_utils 4 | version: 1.0.0 5 | sha1_hash: d8201c61aaba8113c1c54aec72b97fc4ccc7fbd8 6 | -------------------------------------------------------------------------------- /integration_tests/models/last_ever/last_ever.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__last_ever_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__last_ever_1") 9 | -------------------------------------------------------------------------------- /integration_tests/models/last_after/last_after.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__last_after_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__last_after_1") 9 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_after/output/output__aggregate_after_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,aggregate_after_visit_page_activity_id 2 | 2,1,2022-01-02 22:10:11,0,3 3 | 10,7,2022-01-08 22:10:11,0,3 4 | -------------------------------------------------------------------------------- /integration_tests/models/last_before/last_before.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__last_before_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__last_before_1") 9 | -------------------------------------------------------------------------------- /integration_tests/models/first_before/first_before.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__first_before_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__first_before_1") 9 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_all_ever/output/output__aggregate_all_ever_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,aggregate_all_ever_visit_page_activity_id 2 | 2,1,2022-01-02 22:10:11,0,4 3 | 10,7,2022-01-08 22:10:11,0,4 4 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_after/aggregate_after.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__aggregate_after_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__aggregate_after_1") 9 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_before/aggregate_before.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__aggregate_before_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__aggregate_before_1") 9 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_all_ever/aggregate_all_ever.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__aggregate_all_ever_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__aggregate_all_ever_1") 9 | -------------------------------------------------------------------------------- /integration_tests/seeds/nth_ever/output/output__nth_ever_1.csv: -------------------------------------------------------------------------------- 1 | activity_occurrence,nth_ever_3_visit_page_activity_occurrence,nth_ever_4_visit_page_activity_occurrence,nth_ever_5_visit_page_activity_occurrence 2 | 2,3.000000,4.000000, 3 | 2,3.000000,4.000000, 4 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_in_between/aggregate_in_between.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__aggregate_in_between_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__aggregate_in_between_1") 9 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_before/output/output__aggregate_before_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,aggregate_before_visit_page_activity_id,aggregate_before_added_to_cart_activity_id 2 | 7,1,2022-01-05 22:10:15,100,3,2 3 | 15,7,2022-01-11 22:10:15,100,3,2 4 | -------------------------------------------------------------------------------- /integration_tests/models/nth_ever/nth_ever.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dataset__nth_ever_1 5 | 6 | description: A dataset model used to test basic functionality of the `nth_ever` relationship. 7 | tests: 8 | - dbt_utils.equality: 9 | compare_model: ref("output__nth_ever_1") 10 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_after/output/output__last_after_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,last_after_visit_page_activity_id,last_after_visit_page_entity_uuid,last_after_visit_page_ts,last_after_visit_page_revenue_impact 2 | 3,1,2022-01-02 22:10:11,0,7,1,2022-01-06 22:10:11,0 3 | 9,7,2022-01-08 22:10:11,0,13,7,2022-01-12 22:10:11,0 4 | -------------------------------------------------------------------------------- /integration_tests/profiles.yml: -------------------------------------------------------------------------------- 1 | config: 2 | send_anonymous_usage_stats: False 3 | use_colors: True 4 | 5 | integration_tests: 6 | outputs: 7 | duckdb: 8 | type: duckdb 9 | path: ":memory:" 10 | database: memory 11 | external_root: tmp/materialized # Materialize Models as CSV for Inspection 12 | target: duckdb 13 | -------------------------------------------------------------------------------- /integration_tests/models/last_after/dataset__last_after_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__last_after"), 4 | dbt_activity_schema.activity(dbt_activity_schema.nth_ever(1), "signed up"), 5 | [ 6 | dbt_activity_schema.activity(dbt_activity_schema.last_after(), "visit page") 7 | ] 8 | ) 9 | }} 10 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_in_between/output/output__last_in_between_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,last_in_between_visit_page_activity_id 2 | 3,1,2022-01-02 22:10:11,signed up,,"[{""signed up"": 1}]",0,,1,,7 3 | 9,7,2022-01-08 22:10:11,signed up,,"[{""signed up"": 1}]",0,,1,,13 4 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_in_between/output/output__last_in_between_2.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,last_in_between_visit_page_feature_json,last_in_between_visit_page_activity_occurrence,last_in_between_visit_page_ts 2 | 3,1,2022-01-02 22:10:11,0,"[{""visited page"": 1}]",4,2022-01-06 22:10:11 3 | 9,7,2022-01-08 22:10:11,0,"[{""visited page"": 1}]",4,2022-01-12 22:10:11 4 | -------------------------------------------------------------------------------- /macros/utils/helpers/ltrim.sql: -------------------------------------------------------------------------------- 1 | {%- macro ltrim(col, characters=none) -%} 2 | {{ return(adapter.dispatch("ltrim", "dbt_activity_schema")(col, characters)) }} 3 | {%- endmacro -%} 4 | 5 | 6 | {%- macro default__ltrim(col, characters) -%} 7 | 8 | {% if characters %} 9 | ltrim({{ col }}, {{ characters }}) 10 | {% else %} 11 | ltrim({{ col }}) 12 | {% endif %} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_in_between/output/output__first_in_between_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,first_in_between_bought_something_feature_json,first_in_between_bought_something_ts,first_in_between_bought_something_revenue_impact 2 | 22,1,2022-01-02 22:10:11,0,"{""type"": 1}",2022-01-05 22:10:11,100.000000 3 | 99,7,2022-01-08 22:10:11,0,"{""type"": 1}",2022-01-11 22:10:11,100.000000 4 | -------------------------------------------------------------------------------- /macros/relationships/all_ever.sql: -------------------------------------------------------------------------------- 1 | {% macro all_ever_join_clause() %} 2 | (true) 3 | {% endmacro %} 4 | 5 | {% macro all_ever() %} 6 | 7 | {% do return(namespace( 8 | name="all_ever", 9 | aggregation_func=dbt_activity_schema.min, 10 | join_clause=dbt_activity_schema.all_ever_join_clause, 11 | where_clause=dbt_activity_schema.all_ever_join_clause() 12 | )) %} 13 | 14 | {% endmacro %} 15 | -------------------------------------------------------------------------------- /macros/relationships/append_only/aggregate_all_ever.sql: -------------------------------------------------------------------------------- 1 | {% macro aggregate_all_ever_join_clause(i) %} 2 | (true) 3 | {% endmacro %} 4 | 5 | {% macro aggregate_all_ever(aggregation_func=dbt_activity_schema.count) %} 6 | 7 | {% do return(namespace( 8 | name="aggregate_all_ever", 9 | aggregation_func=aggregation_func, 10 | join_clause=dbt_activity_schema.aggregate_all_ever_join_clause 11 | )) %} 12 | 13 | {% endmacro %} 14 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_after/output/output__first_after_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,first_after_bought_something_feature_json,first_after_bought_something_ts 2 | 3,1,2022-01-02 22:10:11,signed up,,"{""type"": 1}",0,,1,,"{""type"": 1}",2022-01-05 22:10:11 3 | 9,7,2022-01-08 22:10:11,signed up,,"{""type"": 1}",0,,1,,"{""type"": 1}",2022-01-11 22:10:11 4 | -------------------------------------------------------------------------------- /macros/utils/helpers/parse_column.sql: -------------------------------------------------------------------------------- 1 | {% macro parse_column(table_alias, column) %} 2 | 3 | {% set columns = dbt_activity_schema.columns() %} 4 | {%- if column not in columns.values() -%} 5 | {%- set parsed_column = dbt_activity_schema.json_unpack_key(table_alias ~ '.' ~ columns.feature_json, column) -%} 6 | {%- else -%} 7 | {%- set parsed_column = table_alias ~ '.' ~ column -%} 8 | {%- endif -%} 9 | 10 | {% do return(parsed_column) %} 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_after/dataset__aggregate_after_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__aggregate_after"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "signed up"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.aggregate_after(), 8 | "visit page", 9 | ["activity_id"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/last_ever/dataset__last_ever_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("example__activity_stream"), 4 | dbt_activity_schema.activity(dbt_activity_schema.last_ever(),"visited page"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.last_ever(), 8 | "bought something", 9 | ["feature_json", "ts"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/last_in_between/dataset__last_in_between_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__last_in_between"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "signed up"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.last_in_between(), 8 | "visit page", 9 | ["activity_id"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/last_before/dataset__last_before_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("example__activity_stream"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(),"bought something"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.last_before(), 8 | "visited page", 9 | ["feature_json", "ts"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/first_before/dataset__first_before_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("example__activity_stream"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "bought something"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.first_before(), 8 | "visited page", 9 | ["feature_json", "ts"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_after/output/output__first_after_3.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,first_after_visit_page_feature_json,first_after_visit_page_activity_occurrence,first_after_visit_page_ts 2 | 3,1,2022-01-02 22:10:11,signed up,,"{""type"": 1}",0,,1,,"{""type"": 1}",4,2022-01-06 22:10:11 3 | 9,7,2022-01-08 22:10:11,signed up,,"{""type"": 1}",0,,1,,"{""type"": 1}",4,2022-01-12 22:10:11 4 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_all_ever/dataset__aggregate_all_ever_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__aggregate_all_ever"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "signed up"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.aggregate_all_ever(), 8 | "visit page", 9 | ["activity_id"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_in_between/output/output__last_in_between_3.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,last_in_between_bought_something_activity_id,last_in_between_bought_something_revenue_impact 2 | 2,1,2022-01-01 22:10:11,0,, 3 | 4,1,2022-01-03 22:10:11,0,, 4 | 5,1,2022-01-04 22:10:11,0,61,100.000000 5 | 7,1,2022-01-06 22:10:11,0,, 6 | 8,7,2022-01-07 22:10:11,0,, 7 | 10,7,2022-01-09 22:10:11,0,, 8 | 11,7,2022-01-10 22:10:11,0,121,100.000000 9 | 13,7,2022-01-12 22:10:11,0,, 10 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | # Triggers the workflow on push or pull request events but only for the main branch 5 | pull_request: 6 | branches: [ main ] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | env: 12 | DBT_PROFILES_DIR: . # Use integration_tests/profiles.yml 13 | 14 | steps: 15 | - name: Checkout Branch 16 | uses: actions/checkout@v2 17 | 18 | - name: CI in Dockerfile 19 | uses: ./.github/actions/run_ci 20 | -------------------------------------------------------------------------------- /integration_tests/models/last_in_between/dataset__last_in_between_3.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__last_in_between"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "visit page"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.last_in_between(), 8 | "bought something", 9 | ["activity_id", "revenue_impact"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/last_in_between/dataset__last_in_between_2.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__last_in_between"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "signed up"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.last_in_between(), 8 | "visit page", 9 | ["feature_json", "activity_occurrence", "ts"] 10 | ) 11 | ] 12 | ) 13 | }} 14 | -------------------------------------------------------------------------------- /integration_tests/models/first_after/dataset__first_after_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__first_after"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.all_ever(), 6 | "signed up" 7 | ), 8 | [ 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.first_after(), 11 | "bought something", 12 | ["feature_json", "ts"] 13 | ) 14 | ] 15 | ) 16 | }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/first_after/dataset__first_after_2.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__first_after"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.all_ever(), 6 | "visit page" 7 | ), 8 | [ 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.first_after(), 11 | "bought something", 12 | ["feature_json", "ts"] 13 | ) 14 | ] 15 | ) 16 | }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/first_after/first_after.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__first_after_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__first_after_1") 9 | 10 | - name: dataset__first_after_2 11 | tests: 12 | - dbt_utils.equality: 13 | compare_model: ref("output__first_after_2") 14 | 15 | - name: dataset__first_after_3 16 | tests: 17 | - dbt_utils.equality: 18 | compare_model: ref("output__first_after_3") 19 | -------------------------------------------------------------------------------- /macros/relationships/first_ever.sql: -------------------------------------------------------------------------------- 1 | {% macro first_ever_join_clause(alias=dbt_activity_schema.appended()) %} 2 | ( 3 | {{ alias }}.{{ dbt_activity_schema.columns().activity_occurrence }} = 1 4 | ) 5 | {% endmacro %} 6 | 7 | {% macro first_ever() %} 8 | 9 | {% do return(namespace( 10 | name="first_ever", 11 | aggregation_func=dbt_activity_schema.min, 12 | join_clause=dbt_activity_schema.first_ever_join_clause, 13 | where_clause=dbt_activity_schema.first_ever_join_clause(dbt_activity_schema.primary()) 14 | )) %} 15 | 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /macros/relationships/last_ever.sql: -------------------------------------------------------------------------------- 1 | {% macro last_ever_join_clause(alias=dbt_activity_schema.appended()) %} 2 | ( 3 | {{ alias }}.{{ dbt_activity_schema.columns().activity_repeated_at }} is null 4 | ) 5 | {% endmacro %} 6 | 7 | {% macro last_ever() %} 8 | 9 | {% do return(namespace( 10 | name="last_ever", 11 | aggregation_func=dbt_activity_schema.min, 12 | join_clause=dbt_activity_schema.last_ever_join_clause, 13 | where_clause=dbt_activity_schema.last_ever_join_clause(dbt_activity_schema.primary()) 14 | )) %} 15 | 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /integration_tests/models/first_in_between/dataset__first_in_between_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__first_in_between"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.all_ever(), 6 | "signed up" 7 | ), 8 | [ 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.first_in_between(), 11 | "bought something", 12 | ["feature_json", "ts", "revenue_impact"] 13 | ) 14 | ] 15 | ) 16 | }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/first_in_between/dataset__first_in_between_2.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__first_in_between"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.all_ever(), 6 | "visit page" 7 | ), 8 | [ 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.first_in_between(), 11 | "bought something", 12 | ["feature_json", "ts", "revenue_impact"] 13 | ) 14 | ] 15 | ) 16 | }} 17 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_in_between/output/output__aggregate_in_between_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,aggregate_in_between_added_to_cart_activity_id,aggregate_in_between_bought_something_revenue_impact,aggregate_in_between_bought_something_activity_id 2 | 1,1,2022-01-01 22:10:11,0,0,,0 3 | 3,1,2022-01-03 22:10:11,0,2,,0 4 | 6,1,2022-01-04 22:10:14,0,0,100,1 5 | 8,1,2022-01-06 22:10:16,0,0,,0 6 | 9,7,2022-01-07 22:10:11,0,0,,0 7 | 11,7,2022-01-09 22:10:11,0,2,,0 8 | 14,7,2022-01-10 22:10:14,0,0,100,1 9 | 16,7,2022-01-12 22:10:16,0,0,,0 10 | -------------------------------------------------------------------------------- /integration_tests/models/first_in_between/first_in_between.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__first_in_between_1 6 | tests: 7 | - dbt_utils.equality: 8 | compare_model: ref("output__first_in_between_1") 9 | 10 | - name: dataset__first_in_between_2 11 | tests: 12 | - dbt_utils.equality: 13 | compare_model: ref("output__first_in_between_2") 14 | 15 | - name: dataset__first_in_between_3 16 | tests: 17 | - dbt_utils.equality: 18 | compare_model: ref("output__first_in_between_3") 19 | -------------------------------------------------------------------------------- /integration_tests/models/first_ever/first_ever.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__first_ever_1 6 | description: dataset used to validate that the `first_ever` relationship is implemented properly. Also selects a parsed json feature from both the primary and appended activity to validate that json parsing works as expected when developers specify columns packed in the `feature_json` in the `included_columns` argument of the `activity` macro. 7 | tests: 8 | - dbt_utils.equality: 9 | compare_model: ref("output__first_ever_1") 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "dbt-activity-schema" 3 | version = "0.4.1" 4 | package-mode = false 5 | description = "A dbt package to create models within the Activity Schema data model framework." 6 | authors = ["Teghan Nightengale ", "Bryce Codell "] 7 | license = "GNU" 8 | readme = "README.md" 9 | 10 | [tool.poetry.dependencies] 11 | python = "^3.10" 12 | dbt-core = "^1.7.0" 13 | dbt-duckdb = "^1.7.3" 14 | 15 | [build-system] 16 | requires = ["poetry-core"] 17 | build-backend = "poetry.core.masonry.api" 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_in_between/output/output__first_in_between_2.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,first_in_between_bought_something_feature_json,first_in_between_bought_something_ts,first_in_between_bought_something_revenue_impact 2 | 11,1,2022-01-01 22:10:11,0,,, 3 | 33,1,2022-01-03 22:10:11,0,,, 4 | 44,1,2022-01-04 22:10:11,0,"{""type"": 1}",2022-01-05 22:10:11,100.000000 5 | 77,1,2022-01-06 22:10:11,0,,, 6 | 88,7,2022-01-07 22:10:11,0,,, 7 | 1010,7,2022-01-09 22:10:11,0,,, 8 | 1111,7,2022-01-10 22:10:11,0,"{""type"": 1}",2022-01-11 22:10:11,100.000000 9 | 1414,7,2022-01-12 22:10:11,0,,, 10 | -------------------------------------------------------------------------------- /macros/relationships/append_only/last_after.sql: -------------------------------------------------------------------------------- 1 | {% macro last_after_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | ) 10 | {% endmacro %} 11 | 12 | {% macro last_after() %} 13 | 14 | {% do return(namespace( 15 | name="last_after", 16 | aggregation_func=dbt_activity_schema.max, 17 | join_clause=dbt_activity_schema.last_after_join_clause 18 | )) %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /macros/relationships/append_only/first_after.sql: -------------------------------------------------------------------------------- 1 | {% macro first_after_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | ) 10 | {% endmacro %} 11 | 12 | {% macro first_after() %} 13 | 14 | {% do return(namespace( 15 | name="first_after", 16 | aggregation_func=dbt_activity_schema.min, 17 | join_clause=dbt_activity_schema.first_after_join_clause 18 | )) %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /integration_tests/models/first_ever/dataset__first_ever_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("example__activity_stream"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.all_ever(), 6 | "visited page", 7 | var("dbt_activity_schema").get("included_columns") + ["visited_page"] 8 | ), 9 | [ 10 | dbt_activity_schema.activity( 11 | dbt_activity_schema.first_ever(), 12 | "signed up", 13 | ["feature_json", "ts", "signed_up"] 14 | ) 15 | ] 16 | ) 17 | }} 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_in_between/output/output__first_in_between_3.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,feature_json,first_in_between_visit_page_feature_json,first_in_between_visit_page_activity_occurrence,first_in_between_visit_page_ts,first_in_between_bought_something_revenue_impact,first_in_between_bought_something_activity_id,first_in_between_bought_something_ts 2 | 22,1,2022-01-02 22:10:11,0,"{""type"": 1}","{""type"": 1}",4.000000,2022-01-06 22:10:11,100.000000,55,2022-01-05 22:10:11 3 | 99,7,2022-01-08 22:10:11,0,"{""type"": 1}","{""type"": 1}",4.000000,2022-01-12 22:10:11,100.000000,1212,2022-01-11 22:10:11 4 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_after/output/output__first_after_2.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,revenue_impact,first_after_bought_something_feature_json,first_after_bought_something_ts 2 | 2,1,2022-01-01 22:10:11,0,"{""type"": 1}",2022-01-05 22:10:11 3 | 4,1,2022-01-03 22:10:11,0,"{""type"": 1}",2022-01-05 22:10:11 4 | 5,1,2022-01-04 22:10:11,0,"{""type"": 1}",2022-01-05 22:10:11 5 | 8,7,2022-01-07 22:10:11,0,"{""type"": 1}",2022-01-11 22:10:11 6 | 10,7,2022-01-09 22:10:11,0,"{""type"": 1}",2022-01-11 22:10:11 7 | 11,7,2022-01-10 22:10:11,0,"{""type"": 1}",2022-01-11 22:10:11 8 | 7,1,2022-01-06 22:10:11,0,, 9 | 13,7,2022-01-12 22:10:11,0,, 10 | -------------------------------------------------------------------------------- /macros/relationships/nth_ever.sql: -------------------------------------------------------------------------------- 1 | {% macro nth_ever_join_clause(nth_occurance, alias=dbt_activity_schema.appended()) %} 2 | ( 3 | {{ alias }}.{{ dbt_activity_schema.columns().activity_occurrence }} = {{ nth_occurance }} 4 | ) 5 | {% endmacro %} 6 | 7 | {% macro nth_ever(nth_occurance) %} 8 | 9 | {% do return(namespace( 10 | name="nth_ever", 11 | aggregation_func=dbt_activity_schema.min, 12 | nth_occurance=nth_occurance, 13 | join_clause=dbt_activity_schema.nth_ever_join_clause, 14 | where_clause=dbt_activity_schema.nth_ever_join_clause(nth_occurance, dbt_activity_schema.primary()) 15 | )) %} 16 | 17 | {% endmacro %} 18 | -------------------------------------------------------------------------------- /macros/relationships/append_only/last_before.sql: -------------------------------------------------------------------------------- 1 | {% macro last_before_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.ts }}, '1900-01-01'::timestamp) 9 | ) 10 | {% endmacro %} 11 | 12 | {% macro last_before() %} 13 | 14 | {% do return(namespace( 15 | name="last_before", 16 | aggregation_func=dbt_activity_schema.max, 17 | join_clause=dbt_activity_schema.last_before_join_clause 18 | )) %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /macros/relationships/append_only/aggregate_after.sql: -------------------------------------------------------------------------------- 1 | {% macro aggregate_after_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | ) 10 | {% endmacro %} 11 | 12 | {% macro aggregate_after(aggregation_func=dbt_activity_schema.count) %} 13 | 14 | {% do return(namespace( 15 | name="aggregate_after", 16 | aggregation_func=aggregation_func, 17 | join_clause=dbt_activity_schema.aggregate_after_join_clause 18 | )) %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /macros/relationships/append_only/aggregate_before.sql: -------------------------------------------------------------------------------- 1 | {% macro aggregate_before_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} < {{ primary() }}.{{- columns.ts }} 9 | ) 10 | {% endmacro %} 11 | 12 | {% macro aggregate_before(aggregation_func=dbt_activity_schema.count) %} 13 | 14 | {% do return(namespace( 15 | name="aggregate_before", 16 | aggregation_func=aggregation_func, 17 | join_clause=dbt_activity_schema.aggregate_before_join_clause 18 | )) %} 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_before/dataset__aggregate_before_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__aggregate_before"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "bought something"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.aggregate_before(), 8 | "visit page", 9 | ["activity_id"] 10 | ), 11 | dbt_activity_schema.activity( 12 | dbt_activity_schema.aggregate_before(), 13 | "added to cart", 14 | ["activity_id"] 15 | ) 16 | ] 17 | ) 18 | }} 19 | -------------------------------------------------------------------------------- /macros/relationships/append_only/first_before.sql: -------------------------------------------------------------------------------- 1 | {% macro first_before_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{ columns.activity_occurrence }} = 1 9 | and {{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.activity_repeated_at }}, '2100-01-01'::timestamp) 10 | ) 11 | {% endmacro %} 12 | 13 | {% macro first_before() %} 14 | 15 | {% do return(namespace( 16 | name="first_before", 17 | aggregation_func=dbt_activity_schema.min, 18 | join_clause=dbt_activity_schema.first_before_join_clause 19 | )) %} 20 | 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /macros/relationships/append_only/last_in_between.sql: -------------------------------------------------------------------------------- 1 | {% macro last_in_between_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | and ( 10 | {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} 11 | or {{ primary() }}.{{- columns.activity_repeated_at }} is null 12 | ) 13 | ) 14 | {% endmacro %} 15 | 16 | {% macro last_in_between() %} 17 | 18 | {% do return(namespace( 19 | name="last_in_between", 20 | aggregation_func=dbt_activity_schema.max, 21 | join_clause=dbt_activity_schema.last_in_between_join_clause 22 | )) %} 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /macros/relationships/append_only/first_in_between.sql: -------------------------------------------------------------------------------- 1 | {% macro first_in_between_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | and ( 10 | {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} 11 | or {{ primary() }}.{{- columns.activity_repeated_at }} is null 12 | ) 13 | ) 14 | {% endmacro %} 15 | 16 | {% macro first_in_between() %} 17 | 18 | {% do return(namespace( 19 | name="first_in_between", 20 | aggregation_func=dbt_activity_schema.min, 21 | join_clause=dbt_activity_schema.first_in_between_join_clause 22 | )) %} 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Project name. 3 | name: 'dbt_activity_schema' 4 | version: '0.4.1' 5 | config-version: 2 6 | require-dbt-version: [">=1.3.0", "<2.0.0"] 7 | 8 | # The "profile" dbt uses for this project. 9 | profile: 'dbt_activity_schema' 10 | 11 | # Configuration paths. 12 | model-paths: ["models"] 13 | analysis-paths: ["analysis"] 14 | test-paths: ["tests"] 15 | seed-paths: ["data"] 16 | macro-paths: ["macros"] 17 | snapshot-paths: ["snapshots"] 18 | 19 | target-path: "target" 20 | clean-targets: 21 | - "target" 22 | - "dbt_modules" 23 | 24 | vars: 25 | included_columns: 26 | - activity_id 27 | - ts 28 | - customer 29 | - anonymous_customer_id 30 | - activity 31 | - activity_occurrence 32 | - activity_repeated_at 33 | - feature_json 34 | - revenue_impact 35 | - link 36 | column_mappings: {} 37 | -------------------------------------------------------------------------------- /macros/utils/helpers/json_unpack_key.sql: -------------------------------------------------------------------------------- 1 | {% macro json_unpack_key(json_col, key) %} 2 | {{ return(adapter.dispatch("json_unpack_key", "dbt_activity_schema")(json_col, key))}} 3 | {% endmacro %} 4 | 5 | {# params 6 | 7 | key: str 8 | The name of the key to unpack from the activity schema feature_json column. 9 | #} 10 | 11 | {% macro default__json_unpack_key(json_col, key) -%} 12 | 13 | {% if caller %} 14 | 15 | json_extract_path_text({{ caller }}) 16 | 17 | {% else %} 18 | 19 | json_extract_path_text({{ json_col }}, {{dbt.string_literal(key) }}) 20 | 21 | {% endif %} 22 | 23 | {%- endmacro %} 24 | 25 | {% macro bigquery__json_unpack_key(json_col, key) -%} 26 | 27 | {% if caller %} 28 | 29 | json_extract({{ caller }}) 30 | 31 | {% else %} 32 | 33 | json_extract({{ json_col }}, {{dbt.string_literal("$."~key) }}) 34 | 35 | {% endif %} 36 | 37 | {%- endmacro %} 38 | -------------------------------------------------------------------------------- /macros/utils/aliasing/alias_column.sql: -------------------------------------------------------------------------------- 1 | {% macro alias_column(column_name, i=none) %} 2 | {{ return(adapter.dispatch("alias_column", "dbt_activity_schema")(column_name, i))}} 3 | {% endmacro %} 4 | 5 | 6 | {%- macro default__alias_column(column_name, i) -%} 7 | 8 | {# Generate the alias for the stream and it's appended activities. 9 | 10 | params: 11 | 12 | column_name: str 13 | The name of the column that will be aliased. 14 | 15 | i: int 16 | The cardinality of the appended activity, and thus the self join of the 17 | Activity Schema. Used to rejoin the Activity Schema multiple times, for 18 | multiple appended activities, with each being given a unique alias. 19 | 20 | #} 21 | 22 | {% set alias %} 23 | {{ dbt_activity_schema.appended() }}.{{ column_name }} 24 | {% endset %} 25 | 26 | {% do return(alias) %} 27 | 28 | {%- endmacro -%} 29 | -------------------------------------------------------------------------------- /macros/relationships/append_only/aggregate_in_between.sql: -------------------------------------------------------------------------------- 1 | {% macro aggregate_in_between_join_clause(i) %} 2 | 3 | {% set primary = dbt_activity_schema.primary %} 4 | {% set columns = dbt_activity_schema.columns() %} 5 | {% set appended = dbt_activity_schema.appended %} 6 | 7 | ( 8 | {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} 9 | and ( 10 | {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} 11 | or {{ primary() }}.{{- columns.activity_repeated_at }} is null 12 | ) 13 | ) 14 | {% endmacro %} 15 | 16 | {% macro aggregate_in_between(aggregation_func=dbt_activity_schema.count) %} 17 | 18 | {% do return(namespace( 19 | name="aggregate_in_between", 20 | aggregation_func=aggregation_func, 21 | join_clause=dbt_activity_schema.aggregate_in_between_join_clause 22 | )) %} 23 | 24 | {% endmacro %} 25 | -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Project Name 3 | name: dbt_activity_schema_integration_tests 4 | version: '1.0.0' 5 | 6 | # This setting configures which "profile" dbt uses for this project. 7 | profile: 'integration_tests' 8 | 9 | config-version: 2 10 | 11 | model-paths: ["models"] 12 | macro-paths: ["macros"] 13 | test-paths: ["tests"] 14 | 15 | target-path: "target" 16 | clean-targets: ["target", "dbt_packages"] 17 | 18 | models: 19 | +materialized: external 20 | +format: csv 21 | 22 | vars: 23 | dbt_activity_schema: 24 | included_columns: 25 | - activity_id 26 | - entity_uuid 27 | - ts 28 | - revenue_impact 29 | column_mappings: 30 | customer: entity_uuid 31 | anonymous_customer_id: anonymous_entity_uuid 32 | 33 | seeds: 34 | dbt_activity_schema_integration_tests: 35 | +column_types: 36 | ACTIVITY_REPEATED_AT: TIMESTAMP 37 | -------------------------------------------------------------------------------- /macros/utils/columns.sql: -------------------------------------------------------------------------------- 1 | {% macro columns() %} 2 | {{ return(adapter.dispatch("columns", "dbt_activity_schema")())}} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__columns() %} 7 | 8 | {% set column_names = 9 | dict( 10 | activity_id = "activity_id", 11 | ts = "ts", 12 | customer = "customer", 13 | anonymous_customer_id = "anonymous_customer_id", 14 | activity = "activity", 15 | activity_occurrence = "activity_occurrence", 16 | activity_repeated_at = "activity_repeated_at", 17 | feature_json = "feature_json", 18 | revenue_impact = "revenue_impact", 19 | link = "link" 20 | ) 21 | %} 22 | 23 | {# Update names using the `column_mappings` project var. #} 24 | {% do column_names.update(var("column_mappings", var("dbt_activity_schema", {}).get("column_mappings", {}))) %} 25 | 26 | {% do return(column_names) %} 27 | 28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /macros/utils/aliasing/alias_cte.sql: -------------------------------------------------------------------------------- 1 | {%- macro alias_cte(activity, i) -%} 2 | {{ return(adapter.dispatch("alias_cte", "dbt_activity_schema")(activity, i))}} 3 | {% endmacro %} 4 | 5 | 6 | {%- macro default__alias_cte(activity, i) -%} 7 | 8 | {# Generate the alias for the stream and it's appended activities. 9 | 10 | params: 11 | 12 | activity: activity (class) 13 | The activity used to create the alias with a meaningful name for the 14 | compiled dataset. 15 | 16 | i: int 17 | The cardinality of the appended activity, and thus the self join of the 18 | Activity Schema. Used to rejoin the Activity Schema multiple times, for 19 | multiple appended activities, with each being given a unique alias. 20 | 21 | #} 22 | 23 | {% set alias %} 24 | append_and_aggregate__{{ i }}__{{ activity.relationship.name }} 25 | {% endset %} 26 | 27 | {% do return(alias) %} 28 | 29 | {%- endmacro -%} 30 | -------------------------------------------------------------------------------- /integration_tests/models/aggregate_in_between/dataset__aggregate_in_between_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__aggregate_in_between"), 4 | dbt_activity_schema.activity(dbt_activity_schema.all_ever(), "visit page"), 5 | [ 6 | dbt_activity_schema.activity( 7 | dbt_activity_schema.aggregate_in_between(), 8 | "added to cart", 9 | ["activity_id"] 10 | ), 11 | dbt_activity_schema.activity( 12 | dbt_activity_schema.aggregate_in_between(dbt_activity_schema.sum), 13 | "bought something", 14 | ["revenue_impact"] 15 | ), 16 | dbt_activity_schema.activity( 17 | dbt_activity_schema.aggregate_in_between(), 18 | "bought something", 19 | ["activity_id"] 20 | ) 21 | ] 22 | ) 23 | }} 24 | -------------------------------------------------------------------------------- /integration_tests/models/nth_ever/dataset__nth_ever_1.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | dbt_activity_schema.dataset( 3 | ref("input__nth_ever"), 4 | dbt_activity_schema.activity( 5 | dbt_activity_schema.nth_ever(2), 6 | "visit page", 7 | ["activity_occurrence"] 8 | ), 9 | [ 10 | dbt_activity_schema.activity( 11 | dbt_activity_schema.nth_ever(3), 12 | "visit page", 13 | ["activity_occurrence"] 14 | ), 15 | dbt_activity_schema.activity( 16 | dbt_activity_schema.nth_ever(4), 17 | "visit page", 18 | ["activity_occurrence"] 19 | ), 20 | dbt_activity_schema.activity( 21 | dbt_activity_schema.nth_ever(5), 22 | "visit page", 23 | ["activity_occurrence"] 24 | ), 25 | ] 26 | ) 27 | }} 28 | -------------------------------------------------------------------------------- /integration_tests/models/last_in_between/last_in_between.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | 5 | - name: dataset__last_in_between_1 6 | description: A dataset model used to test basic functionality of `last_in_between` relationship. 7 | tests: 8 | - dbt_utils.equality: 9 | compare_model: ref("output__last_in_between_1") 10 | 11 | - name: dataset__last_in_between_2 12 | description: A dataset model used to test a case where the `last_in_between` value of a column from an appended activity is not the maximum value for that column. 13 | tests: 14 | - dbt_utils.equality: 15 | compare_model: ref("output__last_in_between_2") 16 | 17 | - name: dataset__last_in_between_3 18 | description: A dataset model used to test a case where the `last_in_between` value of a column from an appended activity is not the maximum value for that column. 19 | tests: 20 | - dbt_utils.equality: 21 | compare_model: ref("output__last_in_between_3") 22 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_after/input__first_after.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 2,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 3,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 4,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:11 5 | 5,2022-01-04 22:10:11,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:11 6 | 6,2022-01-05 22:10:11,1,bought something,,"{""type"": 1}",100,,1, 7 | 7,2022-01-06 22:10:11,1,visit page,,"{""type"": 1}",0,,4, 8 | 8,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 9 | 9,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 10 | 10,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:11 11 | 11,2022-01-10 22:10:11,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:11 12 | 12,2022-01-11 22:10:11,7,bought something,,"{""type"": 1}",100,,1, 13 | 13,2022-01-12 22:10:11,7,visit page,,"{""type"": 1}",0,,4, 14 | -------------------------------------------------------------------------------- /integration_tests/models/first_after/dataset__first_after_3.sql: -------------------------------------------------------------------------------- 1 | {% set join_condition %} 2 | json_extract({{ dbt_activity_schema.primary() }}.feature_json, 'type') 3 | = json_extract({{ dbt_activity_schema.appended() }}.feature_json, 'type') 4 | {% endset %} 5 | 6 | {{ 7 | dbt_activity_schema.dataset( 8 | ref("input__first_after"), 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.all_ever(), 11 | "signed up", 12 | [ 13 | "activity_id", 14 | "entity_uuid", 15 | "ts", 16 | "revenue_impact", 17 | "feature_json" 18 | ] 19 | ), 20 | [ 21 | dbt_activity_schema.activity( 22 | dbt_activity_schema.first_after(), 23 | "visit page", 24 | [ 25 | "feature_json", 26 | "activity_occurrence", 27 | "ts" 28 | ], 29 | additional_join_condition=join_condition 30 | ) 31 | ] 32 | ) 33 | }} 34 | -------------------------------------------------------------------------------- /macros/utils/aliasing/alias_appended_activity.sql: -------------------------------------------------------------------------------- 1 | {% macro alias_appended_activity(activity, column_name) %} 2 | {{ return(adapter.dispatch("alias_appended_activity", "dbt_activity_schema")(activity, column_name))}} 3 | {% endmacro %} 4 | 5 | 6 | {% macro default__alias_appended_activity(activity, column_name) %} 7 | 8 | {# Generate the name of appended columns in `dataset.sql`. 9 | 10 | params: 11 | 12 | activity: activity (class) 13 | The appended activity object, containing the string attributes to be concatenated in the 14 | column alias prefix. 15 | 16 | column_name: str 17 | The name of the column that will be aliased. 18 | #} 19 | 20 | {% set name = activity.relationship.name %} 21 | {% if activity.relationship.name == 'nth_ever' %} 22 | {% set name -%} 23 | {{ name }}_{{ activity.relationship.nth_occurance }} 24 | {%- endset %} 25 | {% endif %} 26 | 27 | {% set concatenated_activity_alias %} 28 | {{ name -}}_{{- activity.name | replace(" ", "_") -}}_{{- column_name -}} 29 | {% endset %} 30 | 31 | {% do return(concatenated_activity_alias) %} 32 | 33 | {% endmacro %} 34 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_after/input__last_after.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 2,2022-01-01 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,1,2022-01-03 22:10:11 3 | 3,2022-01-02 22:10:11,1,signed up,,"[{""signed up"": 1}]",0,,1, 4 | 4,2022-01-03 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,2,2022-01-04 22:10:11 5 | 5,2022-01-04 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,3,2022-01-06 22:10:11 6 | 6,2022-01-05 22:10:11,1,bought something,,"[{""bought something"": 1}]",100,,1, 7 | 7,2022-01-06 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,4, 8 | 8,2022-01-07 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,1,2022-01-09 22:10:11 9 | 9,2022-01-08 22:10:11,7,signed up,,"[{""signed up"": 1}]",0,,1, 10 | 10,2022-01-09 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,2,2022-01-10 22:10:11 11 | 11,2022-01-10 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,3,2022-01-12 22:10:11 12 | 12,2022-01-11 22:10:11,7,bought something,,"[{""bought something"": 1}]",100,,1, 13 | 13,2022-01-12 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,4, 14 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_in_between/input__first_in_between.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 11,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 22,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 33,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:11 5 | 44,2022-01-04 22:10:11,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:11 6 | 55,2022-01-05 22:10:11,1,bought something,,"{""type"": 1}",100,,1,2022-01-05 22:10:12 7 | 66,2022-01-05 22:10:12,1,bought something,,"{""type"": 2}",99,,2, 8 | 77,2022-01-06 22:10:11,1,visit page,,"{""type"": 1}",0,,4, 9 | 88,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 10 | 99,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 11 | 1010,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:11 12 | 1111,2022-01-10 22:10:11,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:11 13 | 1212,2022-01-11 22:10:11,7,bought something,,"{""type"": 1}",100,,1,2022-01-11 22:10:12 14 | 1313,2022-01-11 22:10:12,7,bought something,,"{""type"": 2}",99,,2, 15 | 1414,2022-01-12 22:10:11,7,visit page,,"{""type"": 1}",0,,4, 16 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_ever/output/output__last_ever_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,last_ever_bought_something_feature_json,last_ever_bought_something_ts 2 | 29,1,2022-01-28 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-01-30 22:10:11 3 | 32,4,2022-01-31 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-02 22:10:11 4 | 35,7,2022-02-03 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-05 22:10:11 5 | 38,10,2022-02-06 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-08 22:10:11 6 | 41,13,2022-02-09 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-11 22:10:11 7 | 44,16,2022-02-12 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-14 22:10:11 8 | 47,19,2022-02-15 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-17 22:10:11 9 | 50,22,2022-02-18 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-20 22:10:11 10 | 53,25,2022-02-21 22:10:11,visited page,,{"visited_page": 1},0,,2,,{"bought_something": 1},2022-02-23 22:10:11 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.9-bullseye 2 | 3 | ENV POETRY_VERSION=1.8.2 \ 4 | POETRY_HOME=/usr/local \ 5 | POETRY_VIRTUALENVS_CREATE=false \ 6 | DBT_PROFILES_DIR=. \ 7 | PROJECT_DIR=/workspaces/dbt-activity-schema/ 8 | 9 | # Add BUILDARCH from Global Docker Build Args 10 | ARG BUILDARCH 11 | ARG DUCKDB_VERSION="v0.10.1" 12 | 13 | # Install DuckDB from Binary 14 | RUN export ARCH=$( \ 15 | if [ "$BUILDARCH" = "amd64" ]; then \ 16 | echo "amd64"; \ 17 | elif [ "$BUILDARCH" = "arm64" ]; then \ 18 | echo "aarch64"; \ 19 | else \ 20 | echo "Got BUILDARCH=${BUILDARCH}. Must be one of amd64, arm64" && exit 1; \ 21 | fi \ 22 | ) && wget "https://github.com/duckdb/duckdb/releases/download/${DUCKDB_VERSION}/duckdb_cli-linux-${ARCH}.zip" && \ 23 | unzip duckdb_cli-linux-${ARCH}.zip -d /usr/local/bin/ && \ 24 | chmod +x /usr/local/bin/duckdb && \ 25 | rm duckdb_cli-linux-${ARCH}.zip 26 | 27 | RUN apt-get update \ 28 | && apt-get install -y vim nano \ 29 | && curl -sSL https://install.python-poetry.org | python - \ 30 | && apt-get clean 31 | 32 | WORKDIR $PROJECT_DIR 33 | 34 | COPY ["*poetry.lock", "pyproject.toml", "$PROJECT_DIR"] 35 | RUN poetry install --no-interaction 36 | 37 | ENTRYPOINT ["./scripts/ci.sh"] 38 | -------------------------------------------------------------------------------- /integration_tests/seeds/nth_ever/input__nth_ever.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 1,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 2,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 3,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:14 5 | 4,2022-01-04 22:10:12,1,added to cart,,"{""type"": 4}",0,,1,2022-01-04 22:10:13 6 | 5,2022-01-04 22:10:13,1,added to cart,,"{""type"": 4}",0,,2, 7 | 6,2022-01-04 22:10:14,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:16 8 | 7,2022-01-05 22:10:15,1,bought something,,"{""type"": 1}",100,,1, 9 | 8,2022-01-06 22:10:16,1,visit page,,"{""type"": 1}",0,,4, 10 | 9,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 11 | 10,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 12 | 11,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:14 13 | 12,2022-01-10 22:10:12,7,added to cart,,"{""type"": 4}",0,,1,2022-01-10 22:10:13 14 | 13,2022-01-10 22:10:13,7,added to cart,,"{""type"": 4}",0,,2, 15 | 14,2022-01-10 22:10:14,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:16 16 | 15,2022-01-11 22:10:15,7,bought something,,"{""type"": 1}",100,,1, 17 | 16,2022-01-12 22:10:16,7,visit page,,"{""type"": 1}",0,,4, 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_after/input__aggregate_after.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 1,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 2,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 3,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:14 5 | 4,2022-01-04 22:10:12,1,added to cart,,"{""type"": 4}",0,,1,2022-01-04 22:10:13 6 | 5,2022-01-04 22:10:13,1,added to cart,,"{""type"": 4}",0,,2, 7 | 6,2022-01-04 22:10:14,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:16 8 | 7,2022-01-05 22:10:15,1,bought something,,"{""type"": 1}",100,,1, 9 | 8,2022-01-06 22:10:16,1,visit page,,"{""type"": 1}",0,,4, 10 | 9,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 11 | 10,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 12 | 11,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:14 13 | 12,2022-01-10 22:10:12,7,added to cart,,"{""type"": 4}",0,,1,2022-01-10 22:10:13 14 | 13,2022-01-10 22:10:13,7,added to cart,,"{""type"": 4}",0,,2, 15 | 14,2022-01-10 22:10:14,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:16 16 | 15,2022-01-11 22:10:15,7,bought something,,"{""type"": 1}",100,,1, 17 | 16,2022-01-12 22:10:16,7,visit page,,"{""type"": 1}",0,,4, 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_all_ever/input__aggregate_all_ever.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 1,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 2,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 3,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:14 5 | 4,2022-01-04 22:10:12,1,added to cart,,"{""type"": 4}",0,,1,2022-01-04 22:10:13 6 | 5,2022-01-04 22:10:13,1,added to cart,,"{""type"": 4}",0,,2, 7 | 6,2022-01-04 22:10:14,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:16 8 | 7,2022-01-05 22:10:15,1,bought something,,"{""type"": 1}",100,,1, 9 | 8,2022-01-06 22:10:16,1,visit page,,"{""type"": 1}",0,,4, 10 | 9,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 11 | 10,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 12 | 11,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:14 13 | 12,2022-01-10 22:10:12,7,added to cart,,"{""type"": 4}",0,,1,2022-01-10 22:10:13 14 | 13,2022-01-10 22:10:13,7,added to cart,,"{""type"": 4}",0,,2, 15 | 14,2022-01-10 22:10:14,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:16 16 | 15,2022-01-11 22:10:15,7,bought something,,"{""type"": 1}",100,,1, 17 | 16,2022-01-12 22:10:16,7,visit page,,"{""type"": 1}",0,,4, 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_before/input__aggregate_before.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 1,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 2,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 3,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:14 5 | 4,2022-01-04 22:10:12,1,added to cart,,"{""type"": 4}",0,,1,2022-01-04 22:10:13 6 | 5,2022-01-04 22:10:13,1,added to cart,,"{""type"": 4}",0,,2, 7 | 6,2022-01-04 22:10:14,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:16 8 | 7,2022-01-05 22:10:15,1,bought something,,"{""type"": 1}",100,,1, 9 | 8,2022-01-06 22:10:16,1,visit page,,"{""type"": 1}",0,,4, 10 | 9,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 11 | 10,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 12 | 11,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:14 13 | 12,2022-01-10 22:10:12,7,added to cart,,"{""type"": 4}",0,,1,2022-01-10 22:10:13 14 | 13,2022-01-10 22:10:13,7,added to cart,,"{""type"": 4}",0,,2, 15 | 14,2022-01-10 22:10:14,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:16 16 | 15,2022-01-11 22:10:15,7,bought something,,"{""type"": 1}",100,,1, 17 | 16,2022-01-12 22:10:16,7,visit page,,"{""type"": 1}",0,,4, 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/aggregate_in_between/input__aggregate_in_between.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 1,2022-01-01 22:10:11,1,visit page,,"{""type"": 1}",0,,1,2022-01-03 22:10:11 3 | 2,2022-01-02 22:10:11,1,signed up,,"{""type"": 1}",0,,1, 4 | 3,2022-01-03 22:10:11,1,visit page,,"{""type"": 2}",0,,2,2022-01-04 22:10:14 5 | 4,2022-01-04 22:10:12,1,added to cart,,"{""type"": 4}",0,,1,2022-01-04 22:10:13 6 | 5,2022-01-04 22:10:13,1,added to cart,,"{""type"": 4}",0,,2, 7 | 6,2022-01-04 22:10:14,1,visit page,,"{""type"": 2}",0,,3,2022-01-06 22:10:16 8 | 7,2022-01-05 22:10:15,1,bought something,,"{""type"": 1}",100,,1, 9 | 8,2022-01-06 22:10:16,1,visit page,,"{""type"": 1}",0,,4, 10 | 9,2022-01-07 22:10:11,7,visit page,,"{""type"": 1}",0,,1,2022-01-09 22:10:11 11 | 10,2022-01-08 22:10:11,7,signed up,,"{""type"": 1}",0,,1, 12 | 11,2022-01-09 22:10:11,7,visit page,,"{""type"": 2}",0,,2,2022-01-10 22:10:14 13 | 12,2022-01-10 22:10:12,7,added to cart,,"{""type"": 4}",0,,1,2022-01-10 22:10:13 14 | 13,2022-01-10 22:10:13,7,added to cart,,"{""type"": 4}",0,,2, 15 | 14,2022-01-10 22:10:14,7,visit page,,"{""type"": 2}",0,,3,2022-01-12 22:10:16 16 | 15,2022-01-11 22:10:15,7,bought something,,"{""type"": 1}",100,,1, 17 | 16,2022-01-12 22:10:16,7,visit page,,"{""type"": 1}",0,,4, 18 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_in_between/input__last_in_between.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 2,2022-01-01 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,1,2022-01-03 22:10:11 3 | 3,2022-01-02 22:10:11,1,signed up,,"[{""signed up"": 1}]",0,,1, 4 | 4,2022-01-03 22:10:11,1,visit page,,"[{""visited page"": 2}]",0,,2,2022-01-04 22:10:11 5 | 5,2022-01-04 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,3,2022-01-06 22:10:11 6 | 6,2022-01-05 22:10:11,1,bought something,,"[{""bought something"": 1}]",101,,1,2022-01-05 22:10:12 7 | 61,2022-01-05 22:10:12,1,bought something,,"[{""bought something"": 1}]",100,,2, 8 | 7,2022-01-06 22:10:11,1,visit page,,"[{""visited page"": 1}]",0,,4, 9 | 8,2022-01-07 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,1,2022-01-09 22:10:11 10 | 9,2022-01-08 22:10:11,7,signed up,,"[{""signed up"": 1}]",0,,1, 11 | 10,2022-01-09 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,2,2022-01-10 22:10:11 12 | 11,2022-01-10 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,3,2022-01-12 22:10:11 13 | 12,2022-01-11 22:10:11,7,bought something,,"[{""bought something"": 1}]",101,,1,2022-01-11 22:10:12 14 | 121,2022-01-11 22:10:12,7,bought something,,"[{""bought something"": 1}]",100,,2, 15 | 13,2022-01-12 22:10:11,7,visit page,,"[{""visited page"": 1}]",0,,4, 16 | -------------------------------------------------------------------------------- /integration_tests/models/first_in_between/dataset__first_in_between_3.sql: -------------------------------------------------------------------------------- 1 | {% set join_condition %} 2 | json_extract({{ dbt_activity_schema.primary() }}.feature_json, 'type') 3 | = json_extract({{ dbt_activity_schema.appended() }}.feature_json, 'type') 4 | {% endset %} 5 | 6 | {{ 7 | dbt_activity_schema.dataset( 8 | ref("input__first_in_between"), 9 | dbt_activity_schema.activity( 10 | dbt_activity_schema.all_ever(), 11 | "signed up", 12 | [ 13 | "activity_id", 14 | "entity_uuid", 15 | "ts", 16 | "revenue_impact", 17 | "feature_json" 18 | ] 19 | ), 20 | [ 21 | dbt_activity_schema.activity( 22 | dbt_activity_schema.first_in_between(), 23 | "visit page", 24 | [ 25 | "feature_json", 26 | "activity_occurrence", 27 | "ts" 28 | ], 29 | additional_join_condition=join_condition 30 | ), 31 | dbt_activity_schema.activity( 32 | dbt_activity_schema.first_in_between(), 33 | "bought something", 34 | [ 35 | "revenue_impact", 36 | "activity_id", 37 | "ts" 38 | ] 39 | ) 40 | ] 41 | ) 42 | }} 43 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/python 3 | { 4 | "name": "dbt-activity-schema", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | // "image": "mcr.microsoft.com/devcontainers/python:0-3.10", 7 | "build": { 8 | "dockerfile": "../Dockerfile", 9 | "context": ".." 10 | }, 11 | "customizations": { 12 | "vscode": { 13 | "extensions": [ 14 | "stkb.rewrap", 15 | "yzhang.markdown-all-in-one", 16 | "innoverio.vscode-dbt-power-user@0.13.0", 17 | "streetsidesoftware.code-spell-checker" 18 | ], 19 | "settings": { 20 | "files.insertFinalNewline": true, 21 | "files.trimFinalNewlines": true, 22 | "files.trimTrailingWhitespace": true, 23 | "files.associations": { 24 | "/workspaces/dbt-activity-schema/**/*.sql": "jinja-sql" 25 | }, 26 | "rewrap.autoWrap.enabled": true 27 | } 28 | } 29 | } 30 | 31 | // Features to add to the dev container. More info: https://containers.dev/features. 32 | // "features": {}, 33 | 34 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 35 | // "forwardPorts": [], 36 | 37 | // Use 'postCreateCommand' to run commands after the container is created. 38 | // "postCreateCommand": "pip3 install --user -r requirements.txt", 39 | 40 | // Configure tool-specific properties. 41 | // "customizations": {}, 42 | 43 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 44 | // "remoteUser": "root" 45 | } 46 | -------------------------------------------------------------------------------- /macros/utils/aggregations/_min_or_max.sql: -------------------------------------------------------------------------------- 1 | {% macro _min_or_max(min_or_max, qualified_col) %} 2 | 3 | {% set aggregation = "min" if min_or_max == "min" else "max" %} 4 | {% set column_name = qualified_col.split(".")[-1].strip() %} 5 | {% set qualified_ts_col = "{}.{}".format(dbt_activity_schema.appended(), dbt_activity_schema.columns().ts )%} 6 | {% set columns = dbt_activity_schema.columns() %} 7 | 8 | 9 | {# Set type to cast back to after aggregation. #} 10 | {# TODO: Refactor column abstraction to contain types. #} 11 | {% if column_name in [ 12 | columns.ts, 13 | columns.activity_repeated_at 14 | ] %} 15 | {% set type = dbt.type_timestamp() %} 16 | {% elif column_name in [ 17 | columns.activity_occurrence, 18 | columns.revenue_impact 19 | ] %} 20 | {% set type = dbt.type_numeric() %} 21 | {% else %} 22 | {% set type = dbt.type_string() %} 23 | {% endif %} 24 | 25 | {# Prepend ts column and aggregate. See here for details: https://tinyurl.com/mwfz6xm4 #} 26 | {% set ts_concatenated_and_aggregated_col %} 27 | {{ aggregation }}( 28 | {{ dbt.concat([ 29 | dbt.safe_cast(qualified_ts_col, dbt.type_string()), 30 | dbt.safe_cast(qualified_col, dbt.type_string()) 31 | ]) }} 32 | ) 33 | {% endset %} 34 | 35 | {# Aggregate ts column before trimming, so it is not required in GROUP BY. #} 36 | {% set aggregated_ts_col %} 37 | {{ aggregation }}( {{ dbt.safe_cast(qualified_ts_col, dbt.type_string()) }} ) 38 | {% endset %} 39 | 40 | {# Calculate length of column without prepended & aggregated ts column. #} 41 | {% set retain_n_rightmost_characters %} 42 | {{ dbt.length(ts_concatenated_and_aggregated_col) }} - {{ dbt.length(aggregated_ts_col) }} 43 | {% endset %} 44 | 45 | {# Remove prepended & aggregated ts column. #} 46 | {% set output %} 47 | {{ dbt.safe_cast( 48 | dbt.right( 49 | ts_concatenated_and_aggregated_col, 50 | retain_n_rightmost_characters 51 | ), type) }} 52 | {% endset %} 53 | 54 | {% do return(output) %} 55 | 56 | {% endmacro %} 57 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_ever/output/output__first_ever_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,visited_page,link,activity_occurrence,activity_repeated_at,first_ever_signed_up_feature_json,first_ever_signed_up_ts,first_ever_signed_up_signed_up 2 | 2,1,2022-01-01 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-01-28 22:10:11,{"signed_up": 1},2022-01-02 22:10:11,1 3 | 5,4,2022-01-04 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-01-31 22:10:11,{"signed_up": 1},2022-01-05 22:10:11,1 4 | 8,7,2022-01-07 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-03 22:10:11,{"signed_up": 1},2022-01-08 22:10:11,1 5 | 11,10,2022-01-10 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-06 22:10:11,{"signed_up": 1},2022-01-11 22:10:11,1 6 | 14,13,2022-01-13 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-09 22:10:11,{"signed_up": 1},2022-01-14 22:10:11,1 7 | 17,16,2022-01-16 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-12 22:10:11,{"signed_up": 1},2022-01-17 22:10:11,1 8 | 20,19,2022-01-19 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-15 22:10:11,{"signed_up": 1},2022-01-20 22:10:11,1 9 | 23,22,2022-01-22 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-18 22:10:11,{"signed_up": 1},2022-01-23 22:10:11,1 10 | 26,25,2022-01-25 22:10:11,visited page,,{"visited_page": 1},0,1,,1,2022-02-21 22:10:11,{"signed_up": 1},2022-01-26 22:10:11,1 11 | 29,1,2022-01-28 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-02 22:10:11,1 12 | 32,4,2022-01-31 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-05 22:10:11,1 13 | 35,7,2022-02-03 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-08 22:10:11,1 14 | 38,10,2022-02-06 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-11 22:10:11,1 15 | 41,13,2022-02-09 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-14 22:10:11,1 16 | 44,16,2022-02-12 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-17 22:10:11,1 17 | 47,19,2022-02-15 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-20 22:10:11,1 18 | 50,22,2022-02-18 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-23 22:10:11,1 19 | 53,25,2022-02-21 22:10:11,visited page,,{"visited_page": 1},0,1,,2,,{"signed_up": 1},2022-01-26 22:10:11,1 20 | -------------------------------------------------------------------------------- /integration_tests/seeds/last_before/output/output__last_before_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,last_before_visited_page_feature_json,last_before_visited_page_ts 2 | 31,1,2022-01-30 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-28 22:10:11 3 | 34,4,2022-02-02 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-31 22:10:11 4 | 37,7,2022-02-05 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-03 22:10:11 5 | 40,10,2022-02-08 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-06 22:10:11 6 | 43,13,2022-02-11 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-09 22:10:11 7 | 46,16,2022-02-14 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-12 22:10:11 8 | 49,19,2022-02-17 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-15 22:10:11 9 | 52,22,2022-02-20 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-18 22:10:11 10 | 55,25,2022-02-23 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-02-21 22:10:11 11 | 4,1,2022-01-03 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-01-30 22:10:11,{"visited_page": 1},2022-01-01 22:10:11 12 | 7,4,2022-01-06 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-02 22:10:11,{"visited_page": 1},2022-01-04 22:10:11 13 | 10,7,2022-01-09 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-05 22:10:11,{"visited_page": 1},2022-01-07 22:10:11 14 | 13,10,2022-01-12 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-08 22:10:11,{"visited_page": 1},2022-01-10 22:10:11 15 | 16,13,2022-01-15 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-11 22:10:11,{"visited_page": 1},2022-01-13 22:10:11 16 | 19,16,2022-01-18 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-14 22:10:11,{"visited_page": 1},2022-01-16 22:10:11 17 | 22,19,2022-01-21 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-17 22:10:11,{"visited_page": 1},2022-01-19 22:10:11 18 | 25,22,2022-01-24 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-20 22:10:11,{"visited_page": 1},2022-01-22 22:10:11 19 | 28,25,2022-01-27 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-23 22:10:11,{"visited_page": 1},2022-01-25 22:10:11 20 | -------------------------------------------------------------------------------- /integration_tests/seeds/first_before/output/output__first_before_1.csv: -------------------------------------------------------------------------------- 1 | activity_id,entity_uuid,ts,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at,first_before_visited_page_feature_json,first_before_visited_page_ts 2 | 4,1,2022-01-03 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-01-30 22:10:11,{"visited_page": 1},2022-01-01 22:10:11 3 | 7,4,2022-01-06 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-02 22:10:11,{"visited_page": 1},2022-01-04 22:10:11 4 | 10,7,2022-01-09 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-05 22:10:11,{"visited_page": 1},2022-01-07 22:10:11 5 | 13,10,2022-01-12 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-08 22:10:11,{"visited_page": 1},2022-01-10 22:10:11 6 | 16,13,2022-01-15 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-11 22:10:11,{"visited_page": 1},2022-01-13 22:10:11 7 | 19,16,2022-01-18 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-14 22:10:11,{"visited_page": 1},2022-01-16 22:10:11 8 | 22,19,2022-01-21 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-17 22:10:11,{"visited_page": 1},2022-01-19 22:10:11 9 | 25,22,2022-01-24 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-20 22:10:11,{"visited_page": 1},2022-01-22 22:10:11 10 | 28,25,2022-01-27 22:10:11,bought something,,{"bought_something": 1},100,,1,2022-02-23 22:10:11,{"visited_page": 1},2022-01-25 22:10:11 11 | 31,1,2022-01-30 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-01 22:10:11 12 | 34,4,2022-02-02 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-04 22:10:11 13 | 37,7,2022-02-05 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-07 22:10:11 14 | 40,10,2022-02-08 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-10 22:10:11 15 | 43,13,2022-02-11 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-13 22:10:11 16 | 46,16,2022-02-14 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-16 22:10:11 17 | 49,19,2022-02-17 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-19 22:10:11 18 | 52,22,2022-02-20 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-22 22:10:11 19 | 55,25,2022-02-23 22:10:11,bought something,,{"bought_something": 1},100,,2,,{"visited_page": 1},2022-01-25 22:10:11 20 | -------------------------------------------------------------------------------- /macros/activity.sql: -------------------------------------------------------------------------------- 1 | {% macro activity( 2 | relationship, 3 | activity_name, 4 | included_columns=var("included_columns", var("dbt_activity_schema", 5 | {}).get("included_columns", dbt_activity_schema.columns().values() | list)), 6 | additional_join_condition="true" 7 | ) %} 8 | 9 | {{ return(adapter.dispatch("activity", "dbt_activity_schema")( 10 | relationship, 11 | activity_name, 12 | included_columns, 13 | additional_join_condition 14 | )) }} 15 | 16 | {% endmacro %} 17 | 18 | {% macro default__activity( 19 | relationship, 20 | activity_name, 21 | included_columns, 22 | additional_join_condition 23 | ) %} 24 | 25 | {# An activity to include in the dataset. 26 | 27 | params: 28 | 29 | relationship: relationship 30 | The relationship that defines the how the appended activity is joined to 31 | the primary activity. 32 | 33 | activity_name: str 34 | The string identifier of the activity in the Activity Stream to join to 35 | the primary activity. 36 | 37 | included_columns: List[str] 38 | List of columns to join to the primary activity, defaults to the 39 | `included_columns` vars if it is set, otherwise defaults to the columns 40 | defined in columns.sql. 41 | 42 | additional_join_condition: str 43 | A valid sql boolean to condition the join of the appended activity. Can 44 | optionally contain the python f-string placeholders "{primary}" and 45 | "{appended}" in the string; these will be compiled with the correct 46 | aliases. 47 | 48 | Eg: 49 | 50 | "json_extract({primary}.feature_json, 'dim1') 51 | = "json_extract({appended}.feature_json, 'dim1')" 52 | 53 | The "{primary}" and "{appended}" placholders correctly compiled 54 | depending on the cardinatity of the joined activity in the 55 | `appended_activities` list argument to `dataset.sql`. 56 | 57 | Compiled: 58 | 59 | "json_extract(stream.feature_json, 'dim1') 60 | = "json_extract(stream_3.feature_json, 'dim1')" 61 | 62 | Given that the appended activity was 3rd in the `appended_activities` 63 | list argument. 64 | #} 65 | 66 | {% set columns = dbt_activity_schema.columns() %} 67 | 68 | {# Required for the joins, but not necessarily included in the final result. #} 69 | {% set required_columns = [ 70 | columns.activity_id, 71 | columns.activity, 72 | columns.ts, 73 | columns.customer, 74 | columns.activity_occurrence, 75 | columns.activity_repeated_at 76 | ] %} 77 | 78 | {% for col in included_columns %} 79 | {% if col in required_columns %} 80 | {% do required_columns.remove(col) %} 81 | {% endif %} 82 | {% endfor %} 83 | 84 | {% do return(namespace( 85 | name = activity_name, 86 | included_columns = included_columns, 87 | required_columns = required_columns, 88 | relationship = relationship, 89 | additional_join_condition = additional_join_condition 90 | )) %} 91 | 92 | {% endmacro %} 93 | -------------------------------------------------------------------------------- /integration_tests/seeds/example__activity_stream.csv: -------------------------------------------------------------------------------- 1 | activity_id,ts,entity_uuid,activity,anonymous_entity_uuid,feature_json,revenue_impact,link,activity_occurrence,activity_repeated_at 2 | 2,2022-01-01 22:10:11,1,visited page,,{"visited_page": 1},0,,1,2022-01-28 22:10:11 3 | 3,2022-01-02 22:10:11,1,signed up,,{"signed_up": 1},0,,1,2022-01-29 22:10:11 4 | 4,2022-01-03 22:10:11,1,bought something,,{"bought_something": 1},100,,1,2022-01-30 22:10:11 5 | 5,2022-01-04 22:10:11,4,visited page,,{"visited_page": 1},0,,1,2022-01-31 22:10:11 6 | 6,2022-01-05 22:10:11,4,signed up,,{"signed_up": 1},0,,1,2022-02-01 22:10:11 7 | 7,2022-01-06 22:10:11,4,bought something,,{"bought_something": 1},100,,1,2022-02-02 22:10:11 8 | 8,2022-01-07 22:10:11,7,visited page,,{"visited_page": 1},0,,1,2022-02-03 22:10:11 9 | 9,2022-01-08 22:10:11,7,signed up,,{"signed_up": 1},0,,1,2022-02-04 22:10:11 10 | 10,2022-01-09 22:10:11,7,bought something,,{"bought_something": 1},100,,1,2022-02-05 22:10:11 11 | 11,2022-01-10 22:10:11,10,visited page,,{"visited_page": 1},0,,1,2022-02-06 22:10:11 12 | 12,2022-01-11 22:10:11,10,signed up,,{"signed_up": 1},0,,1,2022-02-07 22:10:11 13 | 13,2022-01-12 22:10:11,10,bought something,,{"bought_something": 1},100,,1,2022-02-08 22:10:11 14 | 14,2022-01-13 22:10:11,13,visited page,,{"visited_page": 1},0,,1,2022-02-09 22:10:11 15 | 15,2022-01-14 22:10:11,13,signed up,,{"signed_up": 1},0,,1,2022-02-10 22:10:11 16 | 16,2022-01-15 22:10:11,13,bought something,,{"bought_something": 1},100,,1,2022-02-11 22:10:11 17 | 17,2022-01-16 22:10:11,16,visited page,,{"visited_page": 1},0,,1,2022-02-12 22:10:11 18 | 18,2022-01-17 22:10:11,16,signed up,,{"signed_up": 1},0,,1,2022-02-13 22:10:11 19 | 19,2022-01-18 22:10:11,16,bought something,,{"bought_something": 1},100,,1,2022-02-14 22:10:11 20 | 20,2022-01-19 22:10:11,19,visited page,,{"visited_page": 1},0,,1,2022-02-15 22:10:11 21 | 21,2022-01-20 22:10:11,19,signed up,,{"signed_up": 1},0,,1,2022-02-16 22:10:11 22 | 22,2022-01-21 22:10:11,19,bought something,,{"bought_something": 1},100,,1,2022-02-17 22:10:11 23 | 23,2022-01-22 22:10:11,22,visited page,,{"visited_page": 1},0,,1,2022-02-18 22:10:11 24 | 24,2022-01-23 22:10:11,22,signed up,,{"signed_up": 1},0,,1,2022-02-19 22:10:11 25 | 25,2022-01-24 22:10:11,22,bought something,,{"bought_something": 1},100,,1,2022-02-20 22:10:11 26 | 26,2022-01-25 22:10:11,25,visited page,,{"visited_page": 1},0,,1,2022-02-21 22:10:11 27 | 27,2022-01-26 22:10:11,25,signed up,,{"signed_up": 1},0,,1,2022-02-22 22:10:11 28 | 28,2022-01-27 22:10:11,25,bought something,,{"bought_something": 1},100,,1,2022-02-23 22:10:11 29 | 29,2022-01-28 22:10:11,1,visited page,,{"visited_page": 1},0,,2, 30 | 30,2022-01-29 22:10:11,1,signed up,,{"signed_up": 1},0,,2, 31 | 31,2022-01-30 22:10:11,1,bought something,,{"bought_something": 1},100,,2, 32 | 32,2022-01-31 22:10:11,4,visited page,,{"visited_page": 1},0,,2, 33 | 33,2022-02-01 22:10:11,4,signed up,,{"signed_up": 1},0,,2, 34 | 34,2022-02-02 22:10:11,4,bought something,,{"bought_something": 1},100,,2, 35 | 35,2022-02-03 22:10:11,7,visited page,,{"visited_page": 1},0,,2, 36 | 36,2022-02-04 22:10:11,7,signed up,,{"signed_up": 1},0,,2, 37 | 37,2022-02-05 22:10:11,7,bought something,,{"bought_something": 1},100,,2, 38 | 38,2022-02-06 22:10:11,10,visited page,,{"visited_page": 1},0,,2, 39 | 39,2022-02-07 22:10:11,10,signed up,,{"signed_up": 1},0,,2, 40 | 40,2022-02-08 22:10:11,10,bought something,,{"bought_something": 1},100,,2, 41 | 41,2022-02-09 22:10:11,13,visited page,,{"visited_page": 1},0,,2, 42 | 42,2022-02-10 22:10:11,13,signed up,,{"signed_up": 1},0,,2, 43 | 43,2022-02-11 22:10:11,13,bought something,,{"bought_something": 1},100,,2, 44 | 44,2022-02-12 22:10:11,16,visited page,,{"visited_page": 1},0,,2, 45 | 45,2022-02-13 22:10:11,16,signed up,,{"signed_up": 1},0,,2, 46 | 46,2022-02-14 22:10:11,16,bought something,,{"bought_something": 1},100,,2, 47 | 47,2022-02-15 22:10:11,19,visited page,,{"visited_page": 1},0,,2, 48 | 48,2022-02-16 22:10:11,19,signed up,,{"signed_up": 1},0,,2, 49 | 49,2022-02-17 22:10:11,19,bought something,,{"bought_something": 1},100,,2, 50 | 50,2022-02-18 22:10:11,22,visited page,,{"visited_page": 1},0,,2, 51 | 51,2022-02-19 22:10:11,22,signed up,,{"signed_up": 1},0,,2, 52 | 52,2022-02-20 22:10:11,22,bought something,,{"bought_something": 1},100,,2, 53 | 53,2022-02-21 22:10:11,25,visited page,,{"visited_page": 1},0,,2, 54 | 54,2022-02-22 22:10:11,25,signed up,,{"signed_up": 1},0,,2, 55 | 55,2022-02-23 22:10:11,25,bought something,,{"bought_something": 1},100,,2, 56 | -------------------------------------------------------------------------------- /macros/dataset.sql: -------------------------------------------------------------------------------- 1 | {% macro dataset( 2 | activity_stream, 3 | primary_activity, 4 | appended_activities=[] 5 | ) %} {{ return(adapter.dispatch("dataset", "dbt_activity_schema")( 6 | activity_stream, 7 | primary_activity, 8 | appended_activities 9 | )) }} {% endmacro %} 10 | 11 | {% macro default__dataset( 12 | activity_stream, 13 | primary_activity, 14 | appended_activities 15 | ) %} 16 | 17 | {# Create a derived dataset using self-joins from an Activity Stream model. 18 | 19 | params: 20 | 21 | activity_stream: ref() | str 22 | The dbt `ref()` or a CTE name that contains the required columns. 23 | 24 | primary_activity: activity (class) 25 | The primary activity of the derived dataset. 26 | 27 | appended_activities: List[ activity (class) ] 28 | The list of appended activities to self-join to the primary activity. 29 | #} 30 | 31 | {% set columns = dbt_activity_schema.columns() %} 32 | {% set primary = dbt_activity_schema.primary %} 33 | {% set appended = dbt_activity_schema.appended %} 34 | {% set alias_cte = dbt_activity_schema.alias_cte %} 35 | {% set alias_column = dbt_activity_schema.alias_column %} 36 | {% set alias_appended_activity = dbt_activity_schema.alias_appended_activity %} 37 | {% set render_join = dbt_activity_schema.render_additional_join_condition %} 38 | {% set render_agg = dbt_activity_schema.render_aggregation %} 39 | 40 | with 41 | 42 | filter_activity_stream_using_primary_activity as ( 43 | select 44 | {% for col in primary_activity.included_columns + primary_activity.required_columns %} 45 | {{ dbt_activity_schema.parse_column(primary(), col) }} as {{ col }}{%- if not loop.last -%},{%- endif %} 46 | {% endfor %} 47 | 48 | from {{ activity_stream }} as {{ primary() }} 49 | 50 | where {{ primary() }}.{{ columns.activity }} = {{ dbt.string_literal(primary_activity.name) }} 51 | and {{ primary_activity.relationship.where_clause }} 52 | ), 53 | 54 | {% for activity in appended_activities %}{% set i = loop.index %} 55 | 56 | {{ alias_cte(activity, i) }} as ( 57 | select 58 | 59 | -- Primary Activity Columns 60 | {% for col in primary_activity.included_columns + primary_activity.required_columns %} 61 | {{ primary() }}.{{- col }}, 62 | {% endfor %} 63 | 64 | {% for col in activity.included_columns %} 65 | {%- set parsed_col = dbt_activity_schema.parse_column(appended(), col) -%} 66 | {% call activity.relationship.aggregation_func() %} 67 | {{ parsed_col }} 68 | {% endcall %} as {{ dbt_activity_schema.alias_appended_activity(activity, col) }} 69 | {% if not loop.last %},{% endif %} 70 | {% endfor %} 71 | 72 | from filter_activity_stream_using_primary_activity as {{ primary() }} 73 | 74 | left join {{ activity_stream }} as {{ appended() }} 75 | on ( 76 | -- Join on Customer UUID Column 77 | {{ appended() }}.{{ columns.customer }} = {{ primary() }}.{{ columns.customer }} 78 | 79 | -- Join the Correct Activity 80 | and {{ appended() }}.{{- columns.activity }} = {{ dbt.string_literal(activity.name) }} 81 | 82 | -- Relationship Specific Join Conditions 83 | and ( 84 | {# nth_ever_join_clause relies on instantiated nth_occurance arg, in 85 | addition to the i passed to the join #} 86 | {% if activity.relationship.name == "nth_ever" %} 87 | {{ activity.relationship.join_clause(activity.relationship.nth_occurance) }} 88 | {% else %} 89 | {{ activity.relationship.join_clause() }} 90 | {% endif %} 91 | ) 92 | -- Additional Join Condition 93 | and ( {{ activity.additional_join_condition }} ) 94 | ) 95 | 96 | group by 97 | {% for col in primary_activity.included_columns + primary_activity.required_columns %} 98 | {{ primary() }}.{{ col }}{%- if not loop.last -%},{%- endif %} 99 | {% endfor %} 100 | ), 101 | 102 | {% endfor %} 103 | 104 | rejoin_aggregated_activities as ( 105 | select 106 | 107 | {% for col in primary_activity.included_columns %} 108 | {{ primary() }}.{{ col }}, 109 | {% endfor %} 110 | 111 | {% for activity in appended_activities %}{% set i = loop.index %}{% set last_outer_loop = loop.last %} 112 | {% for col in activity.included_columns %} 113 | {{ alias_cte(activity, i) }}.{{ alias_appended_activity(activity, col) }}{% if not (last_outer_loop and loop.last) %},{% endif %} 114 | {% endfor %} 115 | {% endfor %} 116 | 117 | from filter_activity_stream_using_primary_activity as {{ primary() }} 118 | 119 | {% for activity in appended_activities %}{% set i = loop.index %} 120 | 121 | left join {{ alias_cte(activity, i) }} 122 | on {{ alias_cte(activity, i) }}.{{ columns.activity_id }} = {{ primary() }}.{{ columns.activity_id }} 123 | 124 | {% endfor %} 125 | ) 126 | 127 | select * from rejoin_aggregated_activities 128 | 129 | {% endmacro %} 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dbt-activity-schema 2 | 3 | A [dbt](https://docs.getdbt.com/docs/introduction) 4 | [package](https://docs.getdbt.com/docs/build/packages#what-is-a-package) to 5 | query the [Activity 6 | Schema](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md) data 7 | modelling framework, based on the 8 | [relationships](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#relationships). 9 | 10 | ## Table of Contents 11 | - [Overview](#overview) 12 | - [Install](#install) 13 | - [Usage](#usage) 14 | - [Create a Dataset](#create-a-dataset) 15 | - [Required Columns](#required-columns) 16 | - [Vars](#vars) 17 | - [Column Mappings (optional)](#column-mappings-optional) 18 | - [Included Columns (optional)](#included-columns-optional) 19 | - [Macros](#macros) 20 | - [Dataset (source)](#dataset-source) 21 | - [Activity (source)](#activity-source) 22 | - [Relationships](#relationships) 23 | - [All Ever (source) (*Custom*)](#all-ever-source-custom) 24 | - [Nth Ever (source) (*Custom*)](#nth-ever-source-custom) 25 | - [First Ever (source)](#first-ever-source) 26 | - [Last Ever (source)](#last-ever-source) 27 | - [First Before (source)](#first-before-source) 28 | - [Last Before (source)](#last-before-source) 29 | - [First After (source)](#first-after-source) 30 | - [Last After (source)](#last-after-source) 31 | - [First In Between (source)](#first-in-between-source) 32 | - [Last In Between (source)](#last-in-between-source) 33 | - [Aggregate All Ever (source) (*Custom*)](#aggregate-all-ever-source-custom) 34 | - [Aggregate After (source) (*Custom*)](#aggregate-after-source-custom) 35 | - [Aggregate Before (source)](#aggregate-before-source) 36 | - [Aggregate In Between (source)](#aggregate-in-between-source) 37 | - [Aggregations](#aggregations) 38 | - [Custom Aggregations](#custom-aggregations) 39 | - [Warehouses](#warehouses) 40 | - [Contributions](#contributions) 41 | 42 | ## Overview 43 | This [dbt](https://docs.getdbt.com/docs/introduction) package includes 44 | [macros](https://docs.getdbt.com/docs/build/jinja-macros) to simplify the 45 | querying of an [Activity 46 | Stream](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#activity-stream), 47 | the primary table in the Activity Schema data modelling framework. 48 | 49 | > **Note:** Use this package to query an Activity Stream model that is _already 50 | > defined_ in a dbt project. **It is not intended to _create_ an Activity Stream 51 | > model in a dbt project.** 52 | 53 | It relies on the [Activity Schema V2 54 | Specification](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md). 55 | 56 | It leverages and extends the 57 | [relationships](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#relationships) 58 | defined in that spec to self-join activities in the Activity Stream. 59 | 60 | ## Install 61 | Include in `packages.yml`: 62 | 63 | ```yaml 64 | packages: 65 | - package: tnightengale/dbt_activity_schema 66 | version: 0.4.1 67 | ``` 68 | For latest release, see 69 | https://github.com/tnightengale/dbt-activity-schema/releases. 70 | 71 | ## Usage 72 | 73 | ### Create a Dataset 74 | Use the [dataset macro](#dataset-source) to self-join an Activity Stream using 75 | [relationships](#relationships). 76 | 77 | The [dataset macro](#dataset-source) will compile based on the provided 78 | [activity macros](#activity-source) and the [relationship 79 | macros](#relationships). It can then be nested in a CTE in a dbt-Core model. Eg: 80 | ```sql 81 | // my_first_dataset.sql 82 | 83 | with 84 | 85 | dataset_cte as ( 86 | {{ dbt_activity_schema.dataset( 87 | activity_stream = ref("example__activity_stream"), 88 | 89 | primary_activity = dbt_activity_schema.activity( 90 | dbt_activity_schema.all_ever(), "bought something"), 91 | 92 | appended_activities = [ 93 | dbt_activity_schema.activity( 94 | dbt_activity_schema.first_before(), "visited page"), 95 | dbt_activity_schema.activity( 96 | dbt_activity_schema.first_after(), "bought item"), 97 | ] 98 | ) }} 99 | ) 100 | 101 | select * from dataset_cte 102 | 103 | ``` 104 | > Note: This package does not contain macros to create the Activity Stream 105 | > model. It generates the SQL to self-join an existing Activity Stream model. 106 | 107 | ### Required Columns 108 | This package conforms to the [Activity Schema V2 109 | Specification](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#entity-table) 110 | and requires the following columns to function: 111 | - **`activity`**: A string or ID that identifies the action or fact 112 | attributable to the `customer`. 113 | - **`customer`**: The UUID of the entity or customer. Must be used across 114 | activities. 115 | - **`ts`**: The timestamp at which the activity occurred. 116 | - **`activity_repeated_at`**: The timestamp of the next activity, per 117 | customer. Create using a lead window function, partitioned by activity and 118 | customer. 119 | - **`activity_occurrence`**: The running count of the activity per customer. 120 | Create using a rank window function, partitioned by activity and customer. 121 | 122 | ## Vars 123 | This package can be configured with the following project variables. All project 124 | vars can be scoped globally or to the `dbt_activity_schema` package. 125 | 126 | ### Column Mappings (optional) 127 | The `column_mappings` project variable can be used to alias columns in Activity 128 | Stream. If the [required columns](#required-columns) exist conceptually under 129 | different names, they can be mapped to their names in the [V2 130 | Specification](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#entity-table). 131 | Eg: 132 | 133 | ```yml 134 | # dbt_project.yml 135 | ... 136 | 137 | vars: 138 | dbt_activity_schema: 139 | column_mappings: 140 | # Activity Stream with required column names that 141 | # differ from the V2 spec, mapped from their spec name. 142 | customer: entity_uuid 143 | ts: activity_occurred_at 144 | 145 | ... 146 | ``` 147 | 148 | ### Included Columns (optional) 149 | The `included_columns` project variable can be set to indicate the default 150 | columns to be included in each [activity](#activity-source) passed to 151 | [dataset](#dataset-source). Eg: 152 | ```yml 153 | # dbt_project.yml 154 | ... 155 | 156 | vars: 157 | dbt_activity_schema: 158 | # List columns from the Activity Schema to include in the Dataset 159 | included_columns: 160 | - activity_id 161 | - entity_uuid 162 | - activity_occurred_at 163 | - revenue_impact 164 | 165 | ... 166 | ``` 167 | 168 | If it is not set, all the columns from the [V2 169 | Specification](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#entity-table) 170 | will be included, based on the [columns macro](./macros/utils/columns.sql). 171 | 172 | These defaults can be overridden on a per-activity basis by passing a list of 173 | column names to the `included_columns` argument in the [activity 174 | macro](#activity-source). 175 | 176 | ## Macros 177 | 178 | ### Dataset ([source](macros/dataset.sql)) 179 | Generate the SQL for self-joining the Activity Stream. 180 | 181 | **args:** 182 | - **`activity_stream (required)`** : 183 | [ref](https://docs.getdbt.com/reference/dbt-jinja-functions/ref) | str 184 | 185 | The dbt `ref()` or a CTE name that contains the [required 186 | columns](#required-columns). 187 | 188 | - **`primary_activity (required)`** : [activity](#activity-source) 189 | 190 | The primary activity of the derived dataset. 191 | 192 | - **`appended_activities (optional)`** : List [ [activity](#activity-source) ] 193 | 194 | The list of appended activities to self-join to the primary activity. All 195 | appended activities and their relationship are with respect to the primary 196 | activity. 197 | 198 | ### Activity ([source](macros/activity.sql)) 199 | Represents either the primary activity or one of the appended activities in a 200 | dataset. 201 | 202 | **args:** 203 | - **`relationship (required)`** : [relationship](#relationships) 204 | 205 | The relationship that defines how the activity is filtered or joined, 206 | depending on if it is provided to the `primary_activity` or 207 | `appended_activities` argument in the dataset macro. 208 | 209 | - **`activity_name (required)`** : str 210 | 211 | The string identifier of the activity in the Activity Stream. Should match the 212 | value in the `activity` column. 213 | 214 | - **`included_columns (optional)`** : List [ str ] 215 | 216 | List of columns to include for the activity. Setting this overrides the 217 | defaults configured by the `default_dataset_columns` project var. If a column 218 | specified is not identified as any of the columns (or their project-specific 219 | aliases) from the Activity Schema spec, the column is assumed to be contained in 220 | the corresponding activity's `feature_json` and will be extracted. 221 | 222 | - **`additional_join_condition (optional)`** : str 223 | 224 | A valid sql boolean expression that is added to the join condition of the 225 | appended activity. The expression is an `and` with the condition created by 226 | the [relationship](#relationships). 227 | 228 | The expression can optionally contain either or both of the `{{ primary() }}` 229 | and `{{ appended() }}` macros, which are used to alias the primary and 230 | appended activities respectively. If using these aliases in the expression, it 231 | must be first assigned to a set block. Eg: 232 | 233 | ```sql 234 | // my_second_dataset.sql 235 | 236 | {% set join_condition %} 237 | json_extract({{ dbt_activity_schema.primary() }}.feature_json, 'type') 238 | = json_extract({{ dbt_activity_schema.appended() }}.feature_json, 'type') 239 | {% endset %} 240 | 241 | 242 | {{ 243 | dbt_activity_schema.dataset( 244 | ref("activity_schema"), 245 | dbt_activity_schema.activity( 246 | dbt_activity_schema.all_ever(), 247 | "signed up" 248 | ), 249 | [ 250 | dbt_activity_schema.activity( 251 | dbt_activity_schema.first_after(), 252 | "visit page", 253 | additional_join_condition=join_condition 254 | ) 255 | ] 256 | ) 257 | }} 258 | 259 | ``` 260 | 261 | The `{{ primary() }}` and `{{ appended() }}` placeholders are constants for 262 | the aliases used in the joins of the [dataset macro](#dataset-source). Columns 263 | used in the expression must be fully qualified with these aliases. 264 | 265 | In the above example, the value of the `type` key in the `feature_json` of the 266 | primary activity `feature_json` must match the value of the `type` key in the 267 | `feature_json` of the appended activity, in addition to the relationship join 268 | conditions. 269 | 270 | ## Relationships 271 | In the Activity Schema framework, 272 | [relationships](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md#relationships) 273 | define how an activity is joined/appended to the primary activity in a 274 | self-joining query of the Activity Stream. 275 | 276 | This package contains [relationship macros](./macros/relationships/) for each 277 | relationship defined in the Activity Schema. 278 | 279 | In the Activity Schema framework, a relationship encapsulates the logic for 280 | self-joining an activity. 281 | 282 | This package extends the relationships defined in the [Activity Schema V2 283 | Specification](https://github.com/ActivitySchema/ActivitySchema/blob/main/2.0.md) 284 | in two ways: 285 | 1. Some relationships can be applied to the Primary Activity *and* Appended 286 | Activities, whereas others can only be applied to the Appended Activities. 287 | - These are denoted with ✅, ❌ in the **Dataset Usage** section of each 288 | relationship below. 289 | 2. Relationships that are not in the spec can be defined and contributed to this 290 | project. These are denoted below with the (*Custom*) tag. 291 | 292 | ### All Ever ([source](./macros/relationships/all_ever.sql)) (*Custom*) 293 | Include all occurrences of the activity in the dataset. 294 | 295 | **Dataset Usage:** 296 | - `primary_activity:` ✅ 297 | - `appended_activity:` ✅ 298 | 299 | **Example Usage:** 300 | 301 | For every 'visited_website' append **All Ever** 'called_us'. This will result in 302 | a cross join of the activities. Therefore, this relationship, while it can be 303 | used for an *Appended Activity* is usually applied to a *Primary Activity*. 304 | 305 | ### Nth Ever ([source](./macros/relationships/last_ever.sql)) (*Custom*) 306 | Include the nth occurrence of the activity in the dataset. 307 | 308 | **args:** 309 | - **`nth_occurrence (required)`** : int 310 | 311 | The occurrence of the activity to include. 312 | 313 | **Dataset Usage:** 314 | - `primary_activity:` ✅ 315 | - `appended_activity:` ✅ 316 | 317 | **Example Usage:** 318 | 319 | For every 'visited_website' append **Nth Ever** 'called_us'. This will add the 320 | customer's Nth time calling on every row, regardless of when it happened. 321 | 322 | ### First Ever ([source](./macros/relationships/first_ever.sql)) 323 | Include the first ever occurrence of the activity in the dataset. 324 | 325 | **Dataset Usage:** 326 | - `primary_activity:` ✅ 327 | - `appended_activity:` ✅ 328 | 329 | **Example Usage:** 330 | 331 | For every 'visited_website' append **First Ever** 'called_us'. This will add the 332 | customer's first time calling to every row, regardless of whether it happened 333 | before or after visiting the website. 334 | 335 | ### Last Ever ([source](./macros/relationships/last_ever.sql)) 336 | Include the last ever occurrence of the activity in the dataset. 337 | 338 | **Dataset Usage:** 339 | - `primary_activity:` ✅ 340 | - `appended_activity:` ✅ 341 | 342 | **Example Usage:** 343 | 344 | For every 'visited_website' append **Last Ever** 'called_us'. This will add the 345 | customer's last time calling on every row, regardless of when it happened. 346 | 347 | ### First Before ([source](./macros/relationships/append_only/first_before.sql)) 348 | Append the first activity to occur before the primary activity. 349 | 350 | **Dataset Usage:** 351 | - `primary_activity:` ❌ 352 | - `appended_activity:` ✅ 353 | 354 | **Example Usage:** 355 | 356 | For every 'visited_website' append **First Before** 'opened_email'. This will 357 | add the the first email that the customer opened before their first visit. 358 | 359 | ### Last Before ([source](./macros/relationships/append_only/last_before.sql)) 360 | Append the last activity to occur before the primary activity. 361 | 362 | **Dataset Usage:** 363 | - `primary_activity:` ❌ 364 | - `appended_activity:` ✅ 365 | 366 | **Example Usage:** 367 | 368 | For every 'visited_website' append **Last Before** 'updated_opportunity_stage'. 369 | This will add the stage of the customer at the moment they visited the website. 370 | (ideal for slowly changing dimensions) 371 | 372 | ### First After ([source](./macros/relationships/append_only/first_after.sql)) 373 | Append the first activity to occur after the primary activity. 374 | 375 | **Dataset Usage:** 376 | - `primary_activity:` ❌ 377 | - `appended_activity:` ✅ 378 | 379 | **Example Usage:** 380 | 381 | For the **First Ever** 'visited_website' append **First After** 'signed_up'. For 382 | each customer add whether or not they converted any time after their first visit 383 | to the site. 384 | 385 | > **Note:** Be cautious when using this with **All Ever** for the primary 386 | > activity. It will result in adding the same **First After** activity to 387 | > multiple primary activity records, if the appended activity occurred after 388 | > multiple primary activities. Consider using **First In Between** instead. 389 | 390 | ### Last After ([source](./macros/relationships/append_only/last_after.sql)) 391 | Append the last activity to occur after the primary activity. 392 | 393 | **Dataset Usage:** 394 | - `primary_activity:` ❌ 395 | - `appended_activity:` ✅ 396 | 397 | **Example Usage:** 398 | 399 | For the **First Ever** 'visited_website' append **Last After** 'returned_item. 400 | The most recent time a customer returned an item after their first visit. 401 | 402 | > **Note:** Be cautious when using this with **All Ever** for the primary 403 | > activity. It will result in adding the same **Last After** activity to 404 | > multiple primary activity records, if the appended activity occurred after 405 | > multiple primary activities. Consider using **Last In Between** instead. 406 | 407 | ### First In Between ([source](./macros/relationships/append_only/first_in_between.sql)) 408 | Append the first activity to occur after each occurrence of the primary 409 | activity, but before the next occurrence of the primary activity. 410 | 411 | **Dataset Usage:** 412 | - `primary_activity:` ❌ 413 | - `appended_activity:` ✅ 414 | 415 | **Example Usage:** 416 | 417 | For **All Ever** 'visited_website' append **First In Between** 418 | 'completed_order'. On every website visit, did the customer order before the 419 | next visit. (generally used for event-based conversion) 420 | 421 | > **Note:** The appended activity *will also be added to the row of the final 422 | > occurrence of the primary activity*, even though it is not technically _in 423 | > between_ another occurrence of the primary activity. The generated SQL for the 424 | > dataset can be filtered further if desired, to remove those rows. 425 | 426 | ### Last In Between ([source](./macros/relationships/append_only/last_in_between.sql)) 427 | Append the last activity that occurred after each occurrence of the primary 428 | activity and before the next occurrence of the primary activity. 429 | 430 | **Dataset Usage:** 431 | - `primary_activity:` ❌ 432 | - `appended_activity:` ✅ 433 | 434 | **Example Usage:** 435 | 436 | For **All Ever** 'visited_website' append **Last In Between** 'viewed_page'. On 437 | every website visit, what was the last page that they viewed before leaving. 438 | 439 | > **Note:** The appended activity *will also be added to the row of the final 440 | > occurrence of the primary activity*, even though it is not technically _in 441 | > between_ another occurrence of the primary activity. The generated SQL for the 442 | > dataset can be filtered further if desired, to remove those rows. 443 | 444 | ### Aggregate All Ever ([source](./macros/relationships/append_only/aggregate_all_ever.sql)) (*Custom*) 445 | Append a count of all activities that occurred that can be linked by the 446 | `customer` [required column](#required-columns) to a primary activity. 447 | 448 | **args:** 449 | - **`aggregation_func (optional)`** : [aggregation](#aggregations) 450 | 451 | The aggregation macro to use on the columns passed to the 452 | [activity](#activity-source). See [aggregations](#aggregations) for details on 453 | how to create a custom aggregation to pass here. 454 | 455 | **Dataset Usage:** 456 | - `primary_activity:` ❌ 457 | - `appended_activity:` ✅ 458 | 459 | **Example Usage:** 460 | 461 | For every 'sign up' append **Aggregate All Ever** **Completed Order**. On every 462 | verified/signed up account, get the count of total orders placed. 463 | 464 | ### Aggregate After ([source](./macros/relationships/append_only/aggregate_after.sql)) (*Custom*) 465 | Append a count of all activities that occurred after each occurrence of the 466 | primary activity. 467 | 468 | **args:** 469 | - **`aggregation_func (optional)`** : [aggregation](#aggregations) 470 | 471 | The aggregation macro to use on the columns passed to the 472 | [activity](#activity-source). See [aggregations](#aggregations) for details on 473 | how to create a custom aggregation to pass here. 474 | 475 | **Dataset Usage:** 476 | - `primary_activity:` ❌ 477 | - `appended_activity:` ✅ 478 | 479 | **Example Usage:** 480 | 481 | For every 'visited_website' append **Aggregate After** **Completed Order**. On 482 | every website visit, sum the revenue that was spent on completed orders after 483 | this visit. 484 | 485 | ### Aggregate Before ([source](.macros/relationships/../../macros/relationships/append_only/aggregate_before.sql)) 486 | Append a count of all activities that occurred before each occurrence of the 487 | primary activity. 488 | 489 | **args:** 490 | - **`aggregation_func (optional)`** : [aggregation](#aggregations) 491 | 492 | The aggregation macro to use on the columns passed to the 493 | [activity](#activity-source). See [aggregations](#aggregations) for details on 494 | how to create a custom aggregation to pass here. 495 | 496 | **Dataset Usage:** 497 | - `primary_activity:` ❌ 498 | - `appended_activity:` ✅ 499 | 500 | **Example Usage:** 501 | 502 | For every 'visited_website' append **Aggregate Before** **Completed Order**. On 503 | every website visit, sum the revenue that was spent on completed orders before 504 | this visit. 505 | 506 | ### Aggregate In Between ([source](./macros/relationships/append_only/aggregate_in_between.sql)) 507 | Append a count of all activities that occurred after each occurrence of the 508 | primary activity, but before the next occurrence of the primary activity. 509 | 510 | **args:** 511 | - **`aggregation_func (optional)`** : [aggregation](#aggregations) 512 | 513 | The aggregation macro to use on the columns passed to the 514 | [activity](#activity-source). See [aggregations](#aggregations) for details on 515 | how to create a custom aggregation to pass here. 516 | 517 | **Dataset Usage:** 518 | - `primary_activity:` ❌ 519 | - `appended_activity:` ✅ 520 | 521 | **Example Usage:** 522 | 523 | For every 'visited_website' append **Aggregate In Between** 'viewed_page'. On 524 | every website visit, count the number of pages before the next visit. 525 | 526 | ## Aggregations 527 | Each of the [relationships](#relationships) relies on an aggregation function, 528 | located [here](./macros/utils/aggregations/). 529 | 530 | These aggregations use the 531 | [call](https://jinja.palletsprojects.com/en/3.1.x/templates/#call) function of 532 | Jinja Templates, to allow complex expressions to be passed into the function. 533 | 534 | ### Custom Aggregations 535 | To create a custom aggregation, simply define a valid SQL aggregation function 536 | in a macro and pass it to any of the aggregation relationships. Eg: 537 | ```jinja 538 | {% macro custom_agg() %} 539 | 540 | sum( 541 | case 542 | when {{ caller }} is not null 543 | then 10 544 | else 1 545 | end 546 | ) 547 | 548 | {% endmacro %} 549 | ``` 550 | 551 | In the example above `{{ caller() }}` will be replaced with each of the columns 552 | passed to the [activity](#activity-source). 553 | 554 | ## Warehouses 555 | To the best of the author's knowledge, this package is compatible with all dbt 556 | adapters. 557 | 558 | ## Contributions 559 | Contributions and feedback are welcome. Please create an issue if you'd like to 560 | contribute. 561 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------