├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── labels.yml ├── pull_request_template.md └── workflows │ ├── no-response.yml │ ├── pr_tests.yml │ ├── publish-gh-pages.yml │ ├── release.yml │ ├── set-labels.yml │ └── triage-labeler.yml ├── .gitignore ├── CHANGELOG ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets └── snowplow_logo.png ├── custom_example ├── .gitignore ├── README.md ├── dbt_project.yml ├── models │ └── snowplow_web_custom_modules │ │ ├── page_views │ │ ├── page_view_channel_engagement │ │ │ ├── bigquery │ │ │ │ └── snowplow_web_pv_channel_engagement.sql │ │ │ ├── channel_engagement.yml │ │ │ ├── databricks │ │ │ │ └── snowplow_web_pv_channel_engagement.sql │ │ │ ├── default │ │ │ │ └── snowplow_web_pv_channel_engagement.sql │ │ │ └── snowflake │ │ │ │ └── snowplow_web_pv_channel_engagement.sql │ │ └── snowplow_web_page_views_custom.sql │ │ ├── sessions │ │ ├── sessions_conversion │ │ │ ├── sessions_conversion.yml │ │ │ └── snowplow_web_sessions_conversion_this_run.sql │ │ └── snowplow_web_sessions_custom.sql │ │ ├── snowplow_sources.yml │ │ └── users │ │ └── snowplow_web_users_custom.sql └── packages.yml ├── dbt_project.yml ├── docs ├── .nojekyll ├── index.html └── markdown │ ├── snowplow_web_atomic_docs.md │ ├── snowplow_web_base_docs.md │ ├── snowplow_web_common_cols.md │ ├── snowplow_web_macros_docs.md │ ├── snowplow_web_overview.md │ ├── snowplow_web_page_views_docs.md │ ├── snowplow_web_sessions_docs.md │ └── snowplow_web_users_docs.md ├── integration_tests ├── .gitignore ├── .scripts │ ├── integration_test.sh │ └── integration_test_w_custom_module.sh ├── README.md ├── ci │ └── profiles.yml ├── data │ ├── expected │ │ ├── bigquery │ │ │ ├── snowplow_web_sessions_expected.csv │ │ │ └── snowplow_web_vital_measurements_expected.csv │ │ ├── databricks │ │ │ ├── snowplow_web_sessions_expected.csv │ │ │ └── snowplow_web_vital_measurements_expected.csv │ │ ├── default │ │ │ ├── snowplow_web_sessions_expected.csv │ │ │ └── snowplow_web_vital_measurements_expected.csv │ │ ├── snowflake │ │ │ ├── snowplow_web_sessions_expected.csv │ │ │ └── snowplow_web_vital_measurements_expected.csv │ │ ├── snowplow_web_base_quarantined_sessions_expected.csv │ │ ├── snowplow_web_consent_cmp_stats_expected.csv │ │ ├── snowplow_web_consent_log_expected.csv │ │ ├── snowplow_web_consent_scope_status_expected.csv │ │ ├── snowplow_web_consent_totals_expected.csv │ │ ├── snowplow_web_consent_users_expected.csv │ │ ├── snowplow_web_consent_versions_expected.csv │ │ ├── snowplow_web_page_views_expected.csv │ │ └── snowplow_web_users_expected.csv │ └── source │ │ ├── snowplow_web_consent_cmp_visible.csv │ │ ├── snowplow_web_consent_preferences.csv │ │ ├── snowplow_web_cwv_context.csv │ │ ├── snowplow_web_events.csv │ │ ├── snowplow_web_spider_context.csv │ │ ├── snowplow_web_ua_context.csv │ │ ├── snowplow_web_vital_events.csv │ │ └── snowplow_web_yauaa_context.csv ├── dbt_project.yml ├── macros │ ├── content_group_overwrite.sql │ ├── engaged_session_overwrite.sql │ ├── equality.sql │ └── get_batch_size.sql ├── models │ ├── actual │ │ ├── actual_vs_expected.yml │ │ ├── snowplow_web_base_quarantined_sessions_actual.sql │ │ ├── snowplow_web_consent_cmp_stats_actual.sql │ │ ├── snowplow_web_consent_log_actual.sql │ │ ├── snowplow_web_consent_scope_status_actual.sql │ │ ├── snowplow_web_consent_totals_actual.sql │ │ ├── snowplow_web_consent_users_actual.sql │ │ ├── snowplow_web_consent_versions_actual.sql │ │ ├── snowplow_web_page_views_actual.sql │ │ ├── snowplow_web_sessions_actual.sql │ │ ├── snowplow_web_users_actual.sql │ │ └── snowplow_web_vital_measurements_actual.sql │ ├── dummy_custom_module │ │ └── snowplow_web_pv_channels.sql │ ├── expected │ │ ├── snowplow_web_base_quarantined_sessions_expected_stg.sql │ │ ├── snowplow_web_consent_cmp_stats_expected_stg.sql │ │ ├── snowplow_web_consent_log_expected_stg.sql │ │ ├── snowplow_web_consent_scope_status_expected_stg.sql │ │ ├── snowplow_web_consent_totals_expected_stg.sql │ │ ├── snowplow_web_consent_users_expected_stg.sql │ │ ├── snowplow_web_consent_versions_expected_stg.sql │ │ ├── snowplow_web_page_views_expected_stg.sql │ │ ├── snowplow_web_sessions_expected_stg.sql │ │ ├── snowplow_web_users_expected_stg.sql │ │ └── snowplow_web_vital_measurements_expected_stg.sql │ └── source │ │ ├── bigquery │ │ └── snowplow_web_events_stg.sql │ │ ├── databricks │ │ └── snowplow_web_events_stg.sql │ │ ├── default │ │ ├── snowplow_web_consent_cmp_visible_stg.sql │ │ ├── snowplow_web_consent_preferences_stg.sql │ │ ├── snowplow_web_cwv_context_stg.sql │ │ ├── snowplow_web_events_stg.sql │ │ ├── snowplow_web_page_view_context_stg.sql │ │ ├── snowplow_web_spider_context_stg.sql │ │ ├── snowplow_web_ua_context_stg.sql │ │ └── snowplow_web_yauaa_context_stg.sql │ │ └── snowflake │ │ └── snowplow_web_events_stg.sql └── packages.yml ├── macros ├── allow_refresh.sql ├── bigquery │ ├── consent_fields.sql │ └── page_view_contexts.sql ├── channel_group_query.sql ├── cluster_by_fields.sql ├── content_group_query.sql ├── core_web_vital_page_groups.sql ├── core_web_vital_pass_query.sql ├── core_web_vital_results_query.sql ├── engaged_session.sql ├── filter_bots.sql ├── get_context_fields.sql ├── get_conversion_columns.sql ├── macros.yml └── stitch_user_identifiers.sql ├── models ├── base │ ├── manifest │ │ ├── base_manifest.yml │ │ ├── snowplow_web_base_quarantined_sessions.sql │ │ ├── snowplow_web_base_sessions_lifecycle_manifest.sql │ │ └── snowplow_web_incremental_manifest.sql │ ├── scratch │ │ ├── base_scratch.yml │ │ ├── bigquery │ │ │ └── snowplow_web_base_events_this_run.sql │ │ ├── databricks │ │ │ └── snowplow_web_base_events_this_run.sql │ │ ├── default │ │ │ └── snowplow_web_base_events_this_run.sql │ │ ├── snowflake │ │ │ └── snowplow_web_base_events_this_run.sql │ │ ├── snowplow_web_base_new_event_limits.sql │ │ └── snowplow_web_base_sessions_this_run.sql │ └── src_base.yml ├── optional_modules │ ├── consent │ │ ├── consent.yml │ │ ├── scratch │ │ │ ├── bigquery │ │ │ │ └── snowplow_web_consent_events_this_run.sql │ │ │ ├── databricks │ │ │ │ └── snowplow_web_consent_events_this_run.sql │ │ │ ├── default │ │ │ │ └── snowplow_web_consent_events_this_run.sql │ │ │ └── snowflake │ │ │ │ └── snowplow_web_consent_events_this_run.sql │ │ ├── snowplow_web_consent_cmp_stats.sql │ │ ├── snowplow_web_consent_log.sql │ │ ├── snowplow_web_consent_scope_status.sql │ │ ├── snowplow_web_consent_totals.sql │ │ ├── snowplow_web_consent_users.sql │ │ └── snowplow_web_consent_versions.sql │ └── core_web_vitals │ │ ├── bigquery │ │ └── snowplow_web_vital_measurements.sql │ │ ├── core_web_vitals.yml │ │ ├── databricks │ │ └── snowplow_web_vital_measurements.sql │ │ ├── default │ │ └── snowplow_web_vital_measurements.sql │ │ ├── scratch │ │ ├── bigquery │ │ │ └── snowplow_web_vital_events_this_run.sql │ │ ├── databricks │ │ │ └── snowplow_web_vital_events_this_run.sql │ │ ├── default │ │ │ └── snowplow_web_vital_events_this_run.sql │ │ ├── snowflake │ │ │ └── snowplow_web_vital_events_this_run.sql │ │ └── snowplow_web_vitals_this_run.sql │ │ ├── snowflake │ │ └── snowplow_web_vital_measurements.sql │ │ └── snowplow_web_vitals.sql ├── page_views │ ├── page_views.yml │ ├── scratch │ │ ├── bigquery │ │ │ └── snowplow_web_page_views_this_run.sql │ │ ├── databricks │ │ │ └── snowplow_web_page_views_this_run.sql │ │ ├── default │ │ │ └── snowplow_web_page_views_this_run.sql │ │ ├── page_views_scratch.yml │ │ ├── snowflake │ │ │ └── snowplow_web_page_views_this_run.sql │ │ ├── snowplow_web_pv_engaged_time.sql │ │ └── snowplow_web_pv_scroll_depth.sql │ └── snowplow_web_page_views.sql ├── sessions │ ├── scratch │ │ ├── bigquery │ │ │ └── snowplow_web_sessions_this_run.sql │ │ ├── databricks │ │ │ └── snowplow_web_sessions_this_run.sql │ │ ├── default │ │ │ └── snowplow_web_sessions_this_run.sql │ │ ├── sessions_scratch.yml │ │ └── snowflake │ │ │ └── snowplow_web_sessions_this_run.sql │ ├── sessions.yml │ └── snowplow_web_sessions.sql ├── user_mapping │ ├── snowplow_web_user_mapping.sql │ └── user_mapping.yml └── users │ ├── scratch │ ├── snowplow_web_users_aggs.sql │ ├── snowplow_web_users_lasts.sql │ ├── snowplow_web_users_sessions_this_run.sql │ ├── snowplow_web_users_this_run.sql │ └── users_scratch.yml │ ├── snowplow_web_users.sql │ └── users.yml ├── packages.yml ├── seeds ├── seeds.yml ├── snowplow_web_dim_ga4_source_categories.csv ├── snowplow_web_dim_geo_country_mapping.csv └── snowplow_web_dim_rfc_5646_language_mapping.csv ├── selectors.yml └── tests ├── page_views └── snowplow_tests_page_view_in_session_values.sql └── snowplow_tests_consent_versions.sql /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @snowplow/com-snowplowanalytics-engineering-digital_analytics_ae 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug or an issue you've found with this package 4 | title: '' 5 | labels: type:bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Describe the bug 11 | 14 | 15 | ## Steps to reproduce 16 | 19 | 20 | ## Expected results 21 | 24 | 25 | ## Actual results 26 | 29 | 30 | ## Screenshots and log output 31 | 34 | 35 | ## System information 36 | **The contents of your `packages.yml` file:** 37 | ```yml 38 | # contents goes here 39 | ``` 40 | 41 | **Which database are you using dbt with?** 42 | - [ ] postgres 43 | - [ ] redshift 44 | - [ ] bigquery 45 | - [ ] snowflake 46 | - [ ] databricks 47 | - [ ] other (specify: ____________) 48 | 49 | **The output of `dbt --version`:** 50 | ``` 51 | 52 | ``` 53 | 54 | **The operating system you're using:** 55 | 56 | 57 | **The output of `python --version`:** 58 | 59 | 60 | ## Additional context 61 | 64 | 65 | ## Are you interested in contributing towards the fix? 66 | 69 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: type:enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Is your feature request related to a problem? Please describe. 11 | 12 | 13 | ## Describe the solution you'd like 14 | 15 | 16 | ## Describe alternatives you've considered 17 | 18 | 19 | ## Additional context 20 | 21 | 22 | ## Are you interested in contributing towards this feature? 23 | 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: A more general question about the package. Consider using discourse if more applicable. 4 | title: '' 5 | labels: type:question 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | ### Question 13 | 14 | 15 | ### Additional context 16 | 17 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | - name: "category:breaking_change" 2 | color: '1e1037' 3 | description: "A breaking change will be introduced if this issue is completed." 4 | - name: "category:documentation" 5 | color: '3849b8' 6 | description: "About the documentation." 7 | - name: "cla:no" 8 | color: '313131' 9 | description: "[Auto generated] Snowplow Contributor License Agreement has not been signed." 10 | - name: "cla:yes" 11 | color: 'c1c1c1' 12 | description: "[Auto generated] Snowplow Contributor License Agreement has been signed." 13 | - name: "good first issue" 14 | color: '69f6ff' 15 | description: "Good issue for a first time contributor." 16 | - name: "priority:high" 17 | color: 'f1ff1d' 18 | description: "To fix as soon as possible." 19 | - name: "priority:low" 20 | color: 'ffd0d4' 21 | description: "Not on the roadmap." 22 | - name: "priority:medium" 23 | color: 'ff6372' 24 | description: "On the roadmap." 25 | - name: "status:blocked" 26 | color: '042e31' 27 | description: "Maintainers can't work on this right now." 28 | - name: "status:completed" 29 | color: '0ea417' 30 | description: "Completed - but might not be released yet." 31 | - name: "status:duplicate" 32 | color: 'b6e1e3' 33 | description: "Duplicates another issue." 34 | - name: "status:has_pr" 35 | color: '0ea462' 36 | description: "A PR exists for this issue." 37 | - name: "status:in_progress" 38 | color: '0e9ba4' 39 | description: "Maintainers are working on this." 40 | - name: "status:needs_triage" 41 | color: 'b6e3cf' 42 | description: "Needs maintainer triage." 43 | - name: "status:wont_fix" 44 | color: '04311d' 45 | description: "Can't or won't be fixed." 46 | - name: "status:info_needed" 47 | color: '56B9BF' 48 | description: "More information is needed from filer for issue to be actionable" 49 | - name: "status:do_not_merge" 50 | color: 'D93F0B' 51 | description: "Flag to denote a Issue or PR which should not yet be merged (usually pending a release)" 52 | - name: "status:help_wanted" 53 | color: '69f6ff' 54 | description: "Help is wanted to help solve this issue." 55 | - name: "type:admin" 56 | color: 'fdffd2' 57 | description: 'About code or repo management.' 58 | - name: "type:bug" 59 | color: '4c381f' 60 | description: 'Bugs or weaknesses. The issue has to contain steps to reproduce.' 61 | - name: "type:enhancement" 62 | color: 'ffbd69' 63 | description: 'New features or improvements to existing features.' 64 | - name: "type:question" 65 | color: 'ffebd2' 66 | description: "Questions to the maintainers." 67 | - name: "category:models" 68 | color: '6738b8' 69 | description: "Related to the models in the package." 70 | - name: "category:macros" 71 | color: 'f1ff1d' 72 | description: "Related to the macros in the package." 73 | - name: "category:tests" 74 | color: 'a738b8' 75 | description: "Related to the tests in the package." 76 | - name: "category:other" 77 | color: '6372ff' 78 | description: "Related to other parts of the package or repo itself." 79 | #- name: "category:4" 80 | # color: '9e62dd' 81 | # description: "Desc" 82 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | ## Description & motivation 7 | 10 | 11 | ## Checklist 12 | - [ ] I have verified that these changes work locally 13 | - [ ] I have updated the README.md (if applicable) 14 | - [ ] I have added tests & descriptions to my models (and macros if applicable) 15 | - [ ] I have raised a [documentation](https://github.com/snowplow/documentation) PR if applicable (Link here if required) 16 | - [ ] Is your change a breaking change? 17 | 18 | 23 | -------------------------------------------------------------------------------- /.github/workflows/no-response.yml: -------------------------------------------------------------------------------- 1 | name: No Response 2 | 3 | # Both `issue_comment` and `scheduled` event types are required for this Action 4 | # to work properly. 5 | on: 6 | issue_comment: 7 | types: [created] 8 | schedule: 9 | # Schedule for 08:00 every day 10 | - cron: '0 8 * * *' 11 | 12 | jobs: 13 | noResponse: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: lee-dohm/no-response@v0.5.0 17 | with: 18 | token: ${{ github.token }} 19 | daysUntilClose: 14 20 | responseRequiredLabel: "status:info_needed" 21 | closeComment: > 22 | This issue has been automatically closed because there has been no response 23 | to our request for more information from the original author within 14 days. With only what 24 | is currently in the issue, we don't have enough information 25 | to take action. Please comment with this information if you have it to reopen the issue so 26 | that we can investigate further. 27 | -------------------------------------------------------------------------------- /.github/workflows/set-labels.yml: -------------------------------------------------------------------------------- 1 | name: github 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | paths: 8 | - '.github/labels.yml' 9 | - '.github/workflows/labels.yml' 10 | 11 | jobs: 12 | labeler: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - 16 | name: Checkout 17 | uses: actions/checkout@v3 18 | - 19 | name: Run Labeler 20 | if: success() 21 | uses: crazy-max/ghaction-github-labeler@v4 22 | with: 23 | github-token: ${{ secrets.GITHUB_TOKEN }} 24 | yaml-file: .github/labels.yml 25 | skip-delete: false 26 | dry-run: false -------------------------------------------------------------------------------- /.github/workflows/triage-labeler.yml: -------------------------------------------------------------------------------- 1 | name: issue-automation 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | 7 | jobs: 8 | automate-issues-labels: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: initial labeling 12 | uses: andymckay/labeler@master 13 | with: 14 | add-labels: "status:needs_triage" 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | dbt_packages/ 5 | logs/ 6 | .DS_Store 7 | dbt-service-account.json 8 | .vscode/settings.json 9 | -------------------------------------------------------------------------------- /assets/snowplow_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowplow/dbt-snowplow-web/0e4f868f6bc7c35ec154506fb48f00584c15f95a/assets/snowplow_logo.png -------------------------------------------------------------------------------- /custom_example/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /custom_example/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'snowplow_custom_example' 2 | version: '0.16.0' 3 | config-version: 2 4 | 5 | dispatch: 6 | - macro_namespace: dbt 7 | search_order: ['snowplow_utils', 'dbt'] 8 | 9 | profile: 'default' 10 | 11 | model-paths: ["models"] 12 | analysis-paths: ["analysis"] 13 | test-paths: ["tests"] 14 | seed-paths: ["data"] 15 | macro-paths: ["macros"] 16 | snapshot-paths: ["snapshots"] 17 | 18 | target-path: "target" 19 | clean-targets: 20 | - "target" 21 | - "dbt_modules" 22 | 23 | vars: 24 | snowplow__sessions_table: "{{ ref('snowplow_web_sessions_custom') }}" # Redirect references to sessions table to your custom version. 25 | 26 | models: 27 | snowplow_custom_example: 28 | snowplow_web_custom_modules: 29 | +tags: snowplow_web_incremental #Adds tag to all models in the 'snowplow_web_custom_modules' directory 30 | snowplow_web: # Only applies to models provided by the Snowplow Web dbt package 31 | sessions: 32 | snowplow_web_sessions: 33 | +enabled: false # Disable the snowplow_web_sessions model as we have our custom version, snowplow_web_sessions_custom 34 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/page_views/page_view_channel_engagement/bigquery/snowplow_web_pv_channel_engagement.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | --Using `snowplow_optimize` config to reduce table scans. Could also use the standard `incremental` materialization. 9 | 10 | {{ 11 | config( 12 | materialized='incremental', 13 | unique_key='page_view_id', 14 | upsert_date_key='start_tstamp', 15 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 16 | "field": "start_tstamp", 17 | "data_type": "timestamp" 18 | }), 19 | snowplow_optimize=true 20 | ) 21 | }} 22 | 23 | with link_clicks as ( 24 | select distinct 25 | ev.page_view_id, 26 | 27 | count(ev.event_id) 28 | over(partition by ev.page_view_id 29 | order by ev.derived_tstamp desc 30 | rows between unbounded preceding and unbounded following) 31 | as link_clicks, 32 | 33 | first_value(ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1_0_1.target_url) 34 | over(partition by ev.page_view_id 35 | order by ev.derived_tstamp desc 36 | rows between unbounded preceding and unbounded following) 37 | as first_link_target 38 | 39 | from {{ ref('snowplow_web_base_events_this_run' ) }} ev -- Select events from base_events_this_run rather than raw events table 40 | 41 | where 42 | {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 43 | and ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1_0_1.target_url is not null -- only include link click events 44 | ) 45 | 46 | , engagement as ( 47 | select 48 | pv.page_view_id, 49 | pv.start_tstamp, 50 | case 51 | when pv.refr_medium = 'search' 52 | and (regexp_contains(lower(pv.mkt_medium), '(cpc|ppc|sem|paidsearch)') 53 | or regexp_contains(lower(pv.mkt_source), '(cpc|ppc|sem|paidsearch)')) then 'paidsearch' 54 | when lower(pv.mkt_medium) like '%paidsearch%' 55 | or lower(pv.mkt_source) like '%paidsearch%' then 'paidsearch' 56 | when regexp_contains(lower(pv.mkt_source), '(adwords|google_paid|googleads)') 57 | or regexp_contains(lower(pv.mkt_medium), '(adwords|google_paid|googleads)') then 'paidsearch' 58 | when lower(pv.mkt_source) like '%google%' 59 | and lower(pv.mkt_medium) like '%ads%' then 'paidsearch' 60 | when pv.refr_urlhost in ('www.googleadservices.com','googleads.g.doubleclick.net') then 'paidsearch' 61 | when regexp_contains(lower(pv.mkt_medium), '(cpv|cpa|cpp|content-text|advertising|ads)') then 'advertising' 62 | when regexp_contains(lower(pv.mkt_medium), '(display|cpm|banner)') then 'display' 63 | when pv.refr_medium is null and lower(pv.page_url) not like '%utm_%' then 'direct' 64 | when (lower(pv.refr_medium) = 'search' and pv.mkt_medium is null) 65 | or (lower(pv.refr_medium) = 'search' and lower(pv.mkt_medium) = 'organic') then 'organicsearch' 66 | when pv.refr_medium = 'social' 67 | or regexp_contains(lower(pv.mkt_source), '^((.*(facebook|linkedin|instagram|insta|slideshare|social|tweet|twitter|youtube|lnkd|pinterest|googleplus|instagram|plus.google.com|quora|reddit|t.co|twitch|viadeo|xing|youtube).*)|(yt|fb|li))$') 68 | or regexp_contains(lower(pv.mkt_medium), '^(.*)(social|facebook|linkedin|twitter|instagram|tweet)(.*)$') then 'social' 69 | when pv.refr_medium = 'email' 70 | or lower(pv.mkt_medium) = '_mail' then 'email' 71 | when lower(pv.mkt_medium) = 'affiliate' then 'affiliate' 72 | when pv.refr_medium = 'unknown' or lower(pv.mkt_medium) = 'referral' or lower(pv.mkt_medium) = 'referal' then 'referral' 73 | when pv.refr_medium = 'internal' then 'internal' 74 | else 'others' 75 | end as channel, 76 | case 77 | when pv.engaged_time_in_s = 0 then true 78 | else false 79 | end as is_bounced_page_view, 80 | (pv.vertical_percentage_scrolled / 100) * 0.3 + (pv.engaged_time_in_s / 600) * 0.7 as engagement_score 81 | 82 | from {{ ref('snowplow_web_page_views_this_run' ) }} pv --select from page_views_this_run rather than derived page_views table 83 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 84 | ) 85 | 86 | select 87 | eng.page_view_id, 88 | eng.start_tstamp, 89 | lc.link_clicks, 90 | lc.first_link_target, 91 | eng.is_bounced_page_view, 92 | eng.engagement_score, 93 | eng.channel 94 | 95 | from engagement eng 96 | left join link_clicks lc 97 | on eng.page_view_id = lc.page_view_id 98 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/page_views/page_view_channel_engagement/channel_engagement.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: snowplow_web_pv_channel_engagement 5 | description: Page view engagement grouped by channel 6 | columns: 7 | - name: page_view_id 8 | tests: 9 | - unique 10 | - not_null 11 | - name: start_tstamp 12 | description: The timestamp when the page view started, based on `derived_tstamp` 13 | - name: link_clicks 14 | description: The number of links clicked on the page 15 | - name: first_link_target 16 | description: The url of the first link clicked on the page 17 | - name: is_bounced_page_view 18 | description: Was the page view a bounce visit 19 | - name: engagement_score 20 | description: Engagement score based on time engaged and vertical scrolling 21 | - name: channel 22 | description: Rule based groupings of traffic sources and mediums 23 | tests: 24 | - not_null 25 | - accepted_values: 26 | values: ['search', 'paidsearch', 'advertising', 'display', 'direct', 27 | 'organicsearch', 'social', 'email', 'affiliate', 'referral', 'internal', 28 | 'others'] 29 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/page_views/page_view_channel_engagement/databricks/snowplow_web_pv_channel_engagement.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | --Using `snowplow_optimize` config to reduce table scans. Could also use the standard `incremental` materialization. 9 | 10 | {{ 11 | config( 12 | materialized='incremental', 13 | unique_key='page_view_id', 14 | upsert_date_key='start_tstamp', 15 | partition_by = snowplow_utils.get_value_by_target_type(databricks_val='start_tstamp_date'), 16 | snowplow_optimize=true 17 | ) 18 | }} 19 | 20 | with link_clicks as ( 21 | select distinct 22 | ev.page_view_id, 23 | 24 | count(ev.event_id) 25 | over(partition by ev.page_view_id 26 | order by ev.derived_tstamp desc 27 | rows between unbounded preceding and unbounded following) 28 | as link_clicks, 29 | 30 | first_value(ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1.target_url) 31 | over(partition by ev.page_view_id 32 | order by ev.derived_tstamp desc 33 | rows between unbounded preceding and unbounded following) 34 | as first_link_target 35 | 36 | from {{ ref('snowplow_web_base_events_this_run' ) }} ev -- Select events from base_events_this_run rather than raw events table 37 | 38 | where 39 | {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 40 | and ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1 is not null -- only include link click events 41 | ) 42 | 43 | , engagement as ( 44 | select 45 | pv.page_view_id, 46 | pv.start_tstamp, 47 | case 48 | when pv.refr_medium = 'search' 49 | and (rlike(lower(pv.mkt_medium), '(cpc|ppc|sem|paidsearch)') 50 | or rlike(lower(pv.mkt_source), '(cpc|ppc|sem|paidsearch)')) then 'paidsearch' 51 | when pv.mkt_medium ilike '%paidsearch%' 52 | or pv.mkt_source ilike '%paidsearch%' then 'paidsearch' 53 | when rlike(lower(pv.mkt_source), '(adwords|google_paid|googleads)') 54 | or rlike(lower(pv.mkt_medium), '(adwords|google_paid|googleads)') then 'paidsearch' 55 | when pv.mkt_source ilike '%google%' 56 | and pv.mkt_medium ilike '%ads%' then 'paidsearch' 57 | when pv.refr_urlhost in ('www.googleadservices.com','googleads.g.doubleclick.net') then 'paidsearch' 58 | when rlike(lower(pv.mkt_medium), '(cpv|cpa|cpp|content-text|advertising|ads)') then 'advertising' 59 | when rlike(lower(pv.mkt_medium), '(display|cpm|banner)') then 'display' 60 | when pv.refr_medium is null and pv.page_url not ilike '%utm_%' then 'direct' 61 | when (lower(pv.refr_medium) = 'search' and pv.mkt_medium is null) 62 | or (lower(pv.refr_medium) = 'search' and lower(pv.mkt_medium) = 'organic') then 'organicsearch' 63 | when pv.refr_medium = 'social' 64 | or rlike(lower(pv.mkt_source),'^((.*(facebook|linkedin|instagram|insta|slideshare|social|tweet|twitter|youtube|lnkd|pinterest|googleplus|instagram|plus.google.com|quora|reddit|t.co|twitch|viadeo|xing|youtube).*)|(yt|fb|li))$') 65 | or rlike(lower(pv.mkt_medium),'^(.*)(social|facebook|linkedin|twitter|instagram|tweet)(.*)$') then 'social' 66 | when pv.refr_medium = 'email' 67 | or pv.mkt_medium ilike '_mail' then 'email' 68 | when pv.mkt_medium ilike 'affiliate' then 'affiliate' 69 | when pv.refr_medium = 'unknown' or pv.mkt_medium ilike 'referral' or pv.mkt_medium ilike 'referal' then 'referral' 70 | when pv.refr_medium = 'internal' then 'internal' 71 | else 'others' 72 | end as channel, 73 | case 74 | when pv.engaged_time_in_s = 0 then true 75 | else false 76 | end as is_bounced_page_view, 77 | (pv.vertical_percentage_scrolled / 100) * 0.3 + (pv.engaged_time_in_s / 600) * 0.7 as engagement_score 78 | 79 | from {{ ref('snowplow_web_page_views_this_run' ) }} pv --select from page_views_this_run rather than derived page_views table 80 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 81 | ) 82 | 83 | select 84 | eng.page_view_id, 85 | eng.start_tstamp, 86 | DATE(eng.start_tstamp) as start_tstamp_date, 87 | lc.link_clicks, 88 | lc.first_link_target, 89 | eng.is_bounced_page_view, 90 | eng.engagement_score, 91 | eng.channel 92 | 93 | from engagement eng 94 | left join link_clicks lc 95 | on eng.page_view_id = lc.page_view_id 96 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/page_views/page_view_channel_engagement/snowflake/snowplow_web_pv_channel_engagement.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | --Using `snowplow_optimize` config to reduce table scans. Could also use the standard `incremental` materialization. 9 | 10 | {{ 11 | config( 12 | materialized='incremental', 13 | unique_key='page_view_id', 14 | upsert_date_key='start_tstamp', 15 | cluster_by=snowplow_web.web_cluster_by_fields_page_views(), 16 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 17 | snowplow_optimize=true 18 | ) 19 | }} 20 | 21 | with link_clicks as ( 22 | select distinct 23 | ev.page_view_id, 24 | 25 | count(ev.event_id) 26 | over(partition by ev.page_view_id 27 | order by ev.derived_tstamp desc 28 | rows between unbounded preceding and unbounded following) 29 | as link_clicks, 30 | 31 | first_value(ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1:targetUrl::varchar) 32 | over(partition by ev.page_view_id 33 | order by ev.derived_tstamp desc 34 | rows between unbounded preceding and unbounded following) 35 | as first_link_target 36 | 37 | from {{ ref('snowplow_web_base_events_this_run' ) }} ev -- Select events from base_events_this_run rather than raw events table 38 | 39 | where 40 | {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 41 | and ev.unstruct_event_com_snowplowanalytics_snowplow_link_click_1 is not null -- only include link click events 42 | ) 43 | 44 | , engagement as ( 45 | select 46 | pv.page_view_id, 47 | pv.start_tstamp, 48 | case 49 | when pv.refr_medium = 'search' 50 | and (rlike(lower(pv.mkt_medium), '(cpc|ppc|sem|paidsearch)') 51 | or rlike(lower(pv.mkt_source), '(cpc|ppc|sem|paidsearch)')) then 'paidsearch' 52 | when pv.mkt_medium ilike '%paidsearch%' 53 | or pv.mkt_source ilike '%paidsearch%' then 'paidsearch' 54 | when rlike(lower(pv.mkt_source), '(adwords|google_paid|googleads)') 55 | or rlike(lower(pv.mkt_medium), '(adwords|google_paid|googleads)') then 'paidsearch' 56 | when pv.mkt_source ilike '%google%' 57 | and pv.mkt_medium ilike '%ads%' then 'paidsearch' 58 | when pv.refr_urlhost in ('www.googleadservices.com','googleads.g.doubleclick.net') then 'paidsearch' 59 | when rlike(lower(pv.mkt_medium), '(cpv|cpa|cpp|content-text|advertising|ads)') then 'advertising' 60 | when rlike(lower(pv.mkt_medium), '(display|cpm|banner)') then 'display' 61 | when pv.refr_medium is null and pv.page_url not ilike '%utm_%' then 'direct' 62 | when (lower(pv.refr_medium) = 'search' and pv.mkt_medium is null) 63 | or (lower(pv.refr_medium) = 'search' and lower(pv.mkt_medium) = 'organic') then 'organicsearch' 64 | when pv.refr_medium = 'social' 65 | or regexp_count(lower(pv.mkt_source),'^((.*(facebook|linkedin|instagram|insta|slideshare|social|tweet|twitter|youtube|lnkd|pinterest|googleplus|instagram|plus.google.com|quora|reddit|t.co|twitch|viadeo|xing|youtube).*)|(yt|fb|li))$')>0 66 | or regexp_count(lower(pv.mkt_medium),'^(.*)(social|facebook|linkedin|twitter|instagram|tweet)(.*)$')>0 then 'social' 67 | when pv.refr_medium = 'email' 68 | or lower(pv.mkt_medium) = '_mail' then 'email' 69 | when lower(pv.mkt_medium) = 'affiliate' then 'affiliate' 70 | when pv.refr_medium = 'unknown' or lower(pv.mkt_medium) = 'referral' or lower(pv.mkt_medium) = 'referal' then 'referral' 71 | when pv.refr_medium = 'internal' then 'internal' 72 | else 'others' 73 | end as channel, 74 | case 75 | when pv.engaged_time_in_s = 0 then true 76 | else false 77 | end as is_bounced_page_view, 78 | (pv.vertical_percentage_scrolled / 100) * 0.3 + (pv.engaged_time_in_s / 600) * 0.7 as engagement_score 79 | 80 | from {{ ref('snowplow_web_page_views_this_run' ) }} pv --select from page_views_this_run rather than derived page_views table 81 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 82 | ) 83 | 84 | select 85 | eng.page_view_id, 86 | eng.start_tstamp, 87 | lc.link_clicks, 88 | lc.first_link_target, 89 | eng.is_bounced_page_view, 90 | eng.engagement_score, 91 | eng.channel 92 | 93 | from engagement eng 94 | left join link_clicks lc 95 | on eng.page_view_id = lc.page_view_id 96 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/page_views/snowplow_web_page_views_custom.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | -- materialized as a view since we are just joining two production tables. 9 | {{ 10 | config( 11 | materialized='view', 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | 17 | select 18 | pv.*, 19 | ce.link_clicks, 20 | ce.first_link_target, 21 | ce.is_bounced_page_view, 22 | ce.engagement_score, 23 | ce.channel 24 | 25 | from {{ ref('snowplow_web_page_views') }} pv -- Join together the two incremental production tables 26 | left join {{ ref('snowplow_web_pv_channel_engagement')}} ce 27 | on pv.page_view_id = ce.page_view_id 28 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/sessions/sessions_conversion/sessions_conversion.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: snowplow_web_sessions_conversion_this_run 5 | description: Table that calculates intent to convert and conversion 6 | columns: 7 | - name: domain_sessionid 8 | tests: 9 | - unique 10 | - not_null 11 | - name: is_session_w_intent 12 | description: Has the user shown intent to convert 13 | - name: is_session_w_conversion 14 | description: Has the user converted 15 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/sessions/sessions_conversion/snowplow_web_sessions_conversion_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | -- `this_run` table so calc in drop and recompute fashion. This will be joined into the `snowplow_web_sessions_custom` incremental table 9 | {{ 10 | config( 11 | sort='domain_sessionid', 12 | dist='domain_sessionid', 13 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 14 | ) 15 | }} 16 | 17 | select 18 | domain_sessionid, 19 | cast(sum(case when page_urlpath like 'https://www.mysite.com/products%' then 1 else 0 end) as boolean) as is_session_w_intent, 20 | cast(sum(case when page_urlpath like 'https://www.mysite.com/order_complete%' then 1 else 0 end) as boolean) as is_session_w_conversion 21 | 22 | from {{ ref('snowplow_web_page_views_this_run') }} pv 23 | group by 1 24 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/sessions/snowplow_web_sessions_custom.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | unique_key='domain_sessionid', 12 | upsert_date_key='start_tstamp', 13 | sort='start_tstamp', 14 | dist='domain_sessionid', 15 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 16 | "field": "start_tstamp", 17 | "data_type": "timestamp" 18 | }, databricks_val='start_tstamp_date'), 19 | cluster_by=snowplow_web.web_cluster_by_fields_sessions(), 20 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 21 | snowplow_optimize= true 22 | ) 23 | }} 24 | 25 | 26 | select 27 | s.*, 28 | {% if target.type in ['databricks', 'spark'] -%} 29 | , DATE(start_tstamp) as start_tstamp_date 30 | {%- endif %} 31 | c.is_session_w_intent, 32 | c.is_session_w_conversion 33 | 34 | from {{ ref('snowplow_web_sessions_this_run') }} s -- join sessions_this_run to sessions_conversion_this_run to produce complete sessions table 35 | left join {{ ref('snowplow_web_sessions_conversion_this_run')}} c 36 | on s.domain_sessionid = c.domain_sessionid 37 | 38 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 39 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/snowplow_sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: atomic 5 | schema: "{{ var('snowplow__atomic_schema', 'atomic') }}" 6 | database: "{{ var('snowplow__database', target.database) }}" 7 | tables: 8 | - name: com_snowplowanalytics_snowplow_link_click_1 9 | -------------------------------------------------------------------------------- /custom_example/models/snowplow_web_custom_modules/users/snowplow_web_users_custom.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | unique_key='user_primary_key', 12 | upsert_date_key='start_tstamp', 13 | sort='start_tstamp', 14 | dist='user_primary_key', 15 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 16 | "field": "start_tstamp", 17 | "data_type": "timestamp" 18 | }, databricks_val='start_tstamp_date'), 19 | cluster_by=snowplow_web.web_cluster_by_fields_users(), 20 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 21 | snowplow_optimize=true 22 | ) 23 | }} 24 | 25 | 26 | select distinct 27 | {{ dbt.concat(["u.domain_userid", "'-'", "s.user_ipaddress"]) }} as user_primary_key, 28 | u.* 29 | {% if target.type in ['databricks', 'spark'] -%} 30 | , DATE(start_tstamp) as start_tstamp_date 31 | {%- endif %} 32 | 33 | from {{ ref('snowplow_web_users_this_run') }} u -- join sessions_this_run to sessions_conversion_this_run to produce complete sessions table 34 | left join {{ ref('snowplow_web_sessions')}} s on u.domain_userid = s.domain_userid 35 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 36 | -------------------------------------------------------------------------------- /custom_example/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ 3 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowplow/dbt-snowplow-web/0e4f868f6bc7c35ec154506fb48f00584c15f95a/docs/.nojekyll -------------------------------------------------------------------------------- /docs/markdown/snowplow_web_atomic_docs.md: -------------------------------------------------------------------------------- 1 | {% docs table_page_view_context %} 2 | 3 | This context table contains the `page_view_id` associated with a given page view. 4 | 5 | {% enddocs %} 6 | 7 | {% docs table_iab_context %} 8 | 9 | This context table contains the data generated by the IAB Spiders & Robots enrichment. 10 | 11 | The IAB Spiders & Robots enrichment uses the [IAB/ABC International Spiders and Bots List](https://iabtechlab.com/software/iababc-international-spiders-and-bots-list/) to determine whether an event was produced by a user or a robot/spider based on its’ IP address and user agent. 12 | 13 | {% enddocs %} 14 | 15 | 16 | {% docs table_ua_parser_context %} 17 | 18 | This context table contains the data generated by the [UA parser enrichment](https://docs.snowplow.io/docs/enriching-your-data/available-enrichments/ua-parser-enrichment/). 19 | 20 | {% enddocs %} 21 | 22 | 23 | {% docs table_yauaa_context %} 24 | 25 | This context table contains the data generated by the [YAUAA enrichment](https://docs.snowplow.io/docs/enriching-your-data/available-enrichments/yauaa-enrichment/). 26 | 27 | {% enddocs %} 28 | 29 | 30 | {% docs table_events %} 31 | 32 | The `events` table contains all canonical events generated by [Snowplow's](https://snowplow.io/) trackers, including web, mobile and server side events. 33 | 34 | {% enddocs %} 35 | 36 | {% docs table_cmp_visible %} 37 | 38 | The cmp visible events table stores events related to the Consent Management Platform becoming available for users. 39 | 40 | {% enddocs %} 41 | 42 | {% docs table_consent_preferences %} 43 | 44 | The consent preferences table is for storing consent preference selection events. 45 | 46 | {% enddocs %} 47 | -------------------------------------------------------------------------------- /docs/markdown/snowplow_web_base_docs.md: -------------------------------------------------------------------------------- 1 | {% docs table_base_sessions_lifecycle_manifest %} 2 | 3 | This incremental table is a manifest of all sessions that have been processed by the Snowplow dbt web model. For each session, the start and end timestamp is recorded. 4 | 5 | By knowing the lifecycle of a session the model is able to able to determine which sessions and thus events to process for a given timeframe, as well as the complete date range required to reprocess all events of each session. 6 | 7 | {% enddocs %} 8 | 9 | {% docs table_base_incremental_manifest %} 10 | 11 | This incremental table is a manifest of the timestamp of the latest event consumed per model within the `snowplow-web` package as well as any models leveraging the incremental framework provided by the package. The latest event's timestamp is based off `collector_tstamp`. This table is used to determine what events should be processed in the next run of the model. 12 | {% enddocs %} 13 | 14 | {% docs table_base_new_event_limits %} 15 | 16 | This table contains the lower and upper timestamp limits for the given run of the web model. These limits are used to select new events from the events table. 17 | 18 | {% enddocs %} 19 | 20 | 21 | {% docs table_base_events_this_run %} 22 | 23 | For any given run, this table contains all required events to be consumed by subsequent nodes in the Snowplow dbt web package. This is a cleaned, deduped dataset, containing all columns from the raw events table as well as having the `page_view_id` joined in from the page view context. 24 | 25 | **Note: This table should be used as the input to any custom modules that require event level data, rather than selecting straight from `atomic.events`** 26 | 27 | {% enddocs %} 28 | 29 | 30 | {% docs table_base_sessions_this_run %} 31 | 32 | For any given run, this table contains all the required sessions. 33 | 34 | {% enddocs %} 35 | 36 | 37 | {% docs table_base_quarantined_sessions %} 38 | 39 | This table contains any sessions that have been quarantined. Sessions are quarantined once they exceed the maximum allowed session length, defined by `snowplow__max_session_days`. 40 | Once quarantined, no further events from these sessions will be processed. Events up until the point of quarantine remain in your derived tables. 41 | The reason for removing long sessions is to reduce table scans on both the events table and all derived tables. This improves performance greatly. 42 | 43 | {% enddocs %} 44 | -------------------------------------------------------------------------------- /docs/markdown/snowplow_web_page_views_docs.md: -------------------------------------------------------------------------------- 1 | {% docs table_page_views_this_run %} 2 | 3 | This staging table contains all the page views for the given run of the Web model. It possess all the same columns as `snowplow_web_page_views`. If building a custom module that requires page view events, this is the table you should reference. 4 | 5 | {% enddocs %} 6 | 7 | 8 | {% docs table_page_views %} 9 | 10 | This derived incremental table contains all historic page views and should be the end point for any analysis or BI tools. 11 | 12 | {% enddocs %} 13 | 14 | 15 | {% docs table_pv_engaged_time %} 16 | 17 | This model calculates the time a visitor spent engaged on a given page view. This is calculated using the number of page ping events received for that page view. 18 | 19 | {% enddocs %} 20 | 21 | {% docs table_scroll_depth %} 22 | 23 | This model calculates the horizontal and vertical scroll depth of the visitor on a given page view. Such metrics are useful when assessing engagement on a page view. 24 | 25 | {% enddocs %} 26 | -------------------------------------------------------------------------------- /docs/markdown/snowplow_web_sessions_docs.md: -------------------------------------------------------------------------------- 1 | {% docs table_sessions_this_run %} 2 | 3 | This staging table contains all the sessions for the given run of the Web model. It possess all the same columns as `snowplow_web_sessions`. If building a custom module that requires session level data, this is the table you should reference. 4 | 5 | {% enddocs %} 6 | 7 | 8 | {% docs table_sessions %} 9 | 10 | This derived incremental table contains all historic sessions and should be the end point for any analysis or BI tools. 11 | 12 | {% enddocs %} 13 | -------------------------------------------------------------------------------- /docs/markdown/snowplow_web_users_docs.md: -------------------------------------------------------------------------------- 1 | 2 | {% docs table_users_this_run %} 3 | 4 | This staging table contains all the users for the given run of the Web model. It possess all the same columns as `snowplow_web_users`. If building a custom module that requires session level data, this is the table you should reference. 5 | 6 | {% enddocs %} 7 | 8 | 9 | {% docs table_users %} 10 | 11 | This derived incremental table contains all historic users data and should be the end point for any analysis or BI tools. 12 | 13 | {% enddocs %} 14 | 15 | 16 | {% docs table_users_aggs %} 17 | 18 | This model aggregates various metrics derived from sessions to a users level. 19 | 20 | {% enddocs %} 21 | 22 | 23 | {% docs table_users_lasts %} 24 | 25 | This model identifies the last page view for a user and returns various dimensions associated with that page view. 26 | 27 | {% enddocs %} 28 | 29 | 30 | {% docs table_users_sessions_this_run %} 31 | 32 | This model contains all sessions data related to users contained in the given run of the Web model 33 | 34 | {% enddocs %} 35 | -------------------------------------------------------------------------------- /integration_tests/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /integration_tests/.scripts/integration_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Expected input: 4 | # -d (database) target database for dbt 5 | 6 | while getopts 'd:' opt 7 | do 8 | case $opt in 9 | d) DATABASE=$OPTARG 10 | esac 11 | done 12 | 13 | declare -a SUPPORTED_DATABASES=("bigquery" "postgres" "databricks" "redshift" "snowflake") 14 | 15 | # set to lower case 16 | DATABASE="$(echo $DATABASE | tr '[:upper:]' '[:lower:]')" 17 | 18 | if [[ $DATABASE == "all" ]]; then 19 | DATABASES=( "${SUPPORTED_DATABASES[@]}" ) 20 | else 21 | DATABASES=$DATABASE 22 | fi 23 | 24 | for db in ${DATABASES[@]}; do 25 | 26 | echo "Snowplow web integration tests: Seeding data" 27 | 28 | eval "dbt seed --full-refresh --target $db" || exit 1; 29 | 30 | echo "Snowplow web integration tests: Execute models (no contexts, no conversions)" 31 | 32 | eval "dbt run --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 243, snowplow__enable_iab: false, snowplow__enable_ua: false, snowplow__enable_yauaa: false, snowplow__conversion_events: , snowplow__total_all_conversions: false, snowplow__list_event_counts: false, snowplow__enable_cwv: false, snowplow__enable_consent: false}' --target $db" || exit 1; 33 | 34 | echo "Snowplow web integration tests: Execute models - run 1/4" 35 | 36 | eval "dbt run --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 243, snowplow__enable_cwv: false}' --target $db" || exit 1; 37 | 38 | for i in {2..4} 39 | do 40 | echo "Snowplow web integration tests: Execute models - run $i/4" 41 | 42 | eval "dbt run --vars '{snowplow__enable_cwv: false}' --target $db" || exit 1; 43 | done 44 | 45 | echo "Snowplow web integration tests: Test models" 46 | 47 | eval "dbt test --exclude snowplow_web_vital_measurements snowplow_web_vital_measurements_actual snowplow_web_vital_events_this_run --store-failures --target $db" || exit 1; 48 | 49 | echo "Snowplow web integration tests: All non-CWV tests passed" 50 | 51 | echo "Snowplow web integration tests - Core Web Vitals: Execute models" 52 | 53 | eval "dbt run --select +snowplow_web_vital_measurements_actual snowplow_web_vital_measurements_expected_stg source --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__start_date: '2023-03-01', snowplow__backfill_limit_days: 50, snowplow__cwv_days_to_measure: 999}' --target $db" || exit 1; 54 | 55 | eval "dbt test --select snowplow_web_vital_measurements_actual --store-failures --target $db" || exit 1; 56 | 57 | echo "Snowplow web integration tests: All CWV tests passed" 58 | 59 | done 60 | -------------------------------------------------------------------------------- /integration_tests/.scripts/integration_test_w_custom_module.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Expected input: 4 | # -d (database) target database for dbt 5 | 6 | while getopts 'd:' opt 7 | do 8 | case $opt in 9 | d) DATABASE=$OPTARG 10 | esac 11 | done 12 | 13 | declare -a SUPPORTED_DATABASES=("bigquery" "databricks" "postgres" "redshift" "snowflake") 14 | 15 | # set to lower case 16 | DATABASE="$(echo $DATABASE | tr '[:upper:]' '[:lower:]')" 17 | 18 | if [[ $DATABASE == "all" ]]; then 19 | DATABASES=( "${SUPPORTED_DATABASES[@]}" ) 20 | else 21 | DATABASES=$DATABASE 22 | fi 23 | 24 | for db in ${DATABASES[@]}; do 25 | 26 | echo "Snowplow web integration tests: Seeding data" 27 | 28 | eval "dbt seed --target $db --full-refresh" || exit 1; 29 | 30 | echo "Snowplow web integration tests: Run 1: standard modules" 31 | 32 | eval "dbt run --target $db --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 243}'" || exit 1; 33 | 34 | echo "Snowplow web integration tests: Run 2: standard modules" 35 | 36 | eval "dbt run --target $db" || exit 1; 37 | 38 | echo "Snowplow web integration tests: Run 3: Partial backfill of custom module + standard modules" 39 | # This tests the functionality of the snowplow_utils.is_run_with_new_events() macro 40 | # Could be a scenario when a new custom module is added where: 41 | # - the main scheduled snowplow job runs i.e. all modules + custom backfill 42 | # - then the user manually runs a job in dbt cloud to just backfill new custom module. 43 | # This results in the derived tables being partially backfilled 44 | 45 | eval "dbt run --target $db --vars '{snowplow__enable_custom_example: true, snowplow__backfill_limit_days: 243}'" || exit 1; 46 | 47 | echo "Snowplow web integration tests: Run 4: Partial backfill of custom module only" 48 | 49 | eval "dbt run --models +snowplow_web_pv_channels --target $db --vars 'snowplow__enable_custom_example: true'" || exit 1; 50 | 51 | for i in {5..6} 52 | do 53 | echo "Snowplow web integration tests: Run $i/6: Standard increment - all modules" 54 | 55 | eval "dbt run --target $db --vars 'snowplow__enable_custom_example: true'" || exit 1; 56 | done 57 | 58 | echo "Snowplow web integration tests: Test models" 59 | 60 | eval "dbt test --target $db --store-failures" || exit 1; 61 | 62 | echo "Snowplow web integration tests: All tests passed" 63 | 64 | done 65 | -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- 1 | # snowplow-web-integration-tests 2 | 3 | Integration test suite for the snowplow-web dbt package. 4 | 5 | The `./scripts` directory contains two scripts: 6 | 7 | - `integration_tests.sh`: This tests the standard modules of the snowplow-web package. It runs the Snowplow web package 4 times to replicate incremental loading of events, then performs an equality test between the actual vs expected output. 8 | - `integration_tests_w_custom_module.sh`: This tests the standard modules of the snowplow-web package as well as the back-filling of custom modules. In total the package is run 6 times, with run 1-2 being the standard modules, runs 3-4 being the back-filling of the newly introduced custom module, and runs 5-6 being the both the standard and custom module. Once complete, equality checks are performed on the actual vs expected output of the standard modules. 9 | 10 | Run the scripts using: 11 | 12 | ```bash 13 | bash integration_tests.sh -d {warehouse} 14 | ``` 15 | 16 | Supported warehouses: 17 | 18 | - redshift 19 | - bigquery 20 | - snowflake 21 | - postgres 22 | - all (iterates through all supported warehouses) 23 | -------------------------------------------------------------------------------- /integration_tests/ci/profiles.yml: -------------------------------------------------------------------------------- 1 | 2 | # HEY! This file is used in the Snowplow dbt Web integration tests. 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :) 4 | 5 | config: 6 | send_anonymous_usage_stats: False 7 | use_colors: True 8 | 9 | integration_tests: 10 | target: "{{ env_var('DEFAULT_TARGET') }}" 11 | outputs: 12 | postgres: 13 | type: postgres 14 | host: "{{ env_var('POSTGRES_TEST_HOST') }}" 15 | user: "{{ env_var('POSTGRES_TEST_USER') }}" 16 | pass: "{{ env_var('POSTGRES_TEST_PASS') }}" 17 | port: "{{ env_var('POSTGRES_TEST_PORT') | as_number }}" 18 | dbname: "{{ env_var('POSTGRES_TEST_DBNAME') }}" 19 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 20 | threads: 4 21 | 22 | redshift: 23 | type: redshift 24 | host: "{{ env_var('REDSHIFT_TEST_HOST') }}" 25 | user: "{{ env_var('REDSHIFT_TEST_USER') }}" 26 | pass: "{{ env_var('REDSHIFT_TEST_PASS') }}" 27 | dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}" 28 | port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}" 29 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 30 | threads: 4 31 | 32 | bigquery: 33 | type: bigquery 34 | method: service-account-json 35 | project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}" 36 | location: "{{ env_var('BIGQUERY_LOCATION') }}" 37 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 38 | threads: 4 39 | keyfile_json: 40 | type: "{{ env_var('BIGQUERY_SERVICE_TYPE') }}" 41 | project_id: "{{ env_var('BIGQUERY_SERVICE_PROJECT_ID') }}" 42 | private_key_id: "{{ env_var('BIGQUERY_SERVICE_PRIVATE_KEY_ID') }}" 43 | private_key: "{{ env_var('BIGQUERY_SERVICE_PRIVATE_KEY') }}" 44 | client_email: "{{ env_var('BIGQUERY_SERVICE_CLIENT_EMAIL') }}" 45 | client_id: "{{ env_var('BIGQUERY_SERVICE_CLIENT_ID') }}" 46 | auth_uri: "{{ env_var('BIGQUERY_SERVICE_AUTH_URI') }}" 47 | token_uri: "{{ env_var('BIGQUERY_SERVICE_TOKEN_URI') }}" 48 | auth_provider_x509_cert_url: "{{ env_var('BIGQUERY_SERVICE_AUTH_PROVIDER_X509_CERT_URL') }}" 49 | client_x509_cert_url: "{{ env_var('BIGQUERY_SERVICE_CLIENT_X509_CERT_URL') }}" 50 | 51 | snowflake: 52 | type: snowflake 53 | account: "{{ env_var('SNOWFLAKE_TEST_ACCOUNT') }}" 54 | user: "{{ env_var('SNOWFLAKE_TEST_USER') }}" 55 | password: "{{ env_var('SNOWFLAKE_TEST_PASSWORD') }}" 56 | role: "{{ env_var('SNOWFLAKE_TEST_ROLE') }}" 57 | database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}" 58 | warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}" 59 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 60 | threads: 4 61 | 62 | databricks: 63 | type: databricks 64 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 65 | host: "{{ env_var('DATABRICKS_TEST_HOST') }}" 66 | http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}" 67 | token: "{{ env_var('DATABRICKS_TEST_TOKEN') }}" 68 | threads: 4 69 | 70 | spark: 71 | type: spark 72 | method: odbc 73 | driver: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}" 74 | schema: "gh_sp_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}" 75 | host: "{{ env_var('DATABRICKS_TEST_HOST') }}" 76 | token: "{{ env_var('DATABRICKS_TEST_TOKEN') }}" 77 | endpoint: "{{ env_var('DATABRICKS_TEST_ENDPOINT') }}" 78 | threads: 4 79 | -------------------------------------------------------------------------------- /integration_tests/data/expected/snowplow_web_base_quarantined_sessions_expected.csv: -------------------------------------------------------------------------------- 1 | session_identifier 2 | 2bfc2c760c3ac434953eefd7903789429c8a5f8171c4b8b26aee93108b77bd35 3 | -------------------------------------------------------------------------------- /integration_tests/data/expected/snowplow_web_consent_scope_status_expected.csv: -------------------------------------------------------------------------------- 1 | scope,total_consent 2 | statistics,8 3 | preferences,6 4 | necessary,13 5 | marketing,7 6 | -------------------------------------------------------------------------------- /integration_tests/data/expected/snowplow_web_consent_totals_expected.csv: -------------------------------------------------------------------------------- 1 | consent_version,version_start_tstamp,consent_scopes,consent_url,domains_applied,is_latest_version,last_allow_all_event,total_visitors,allow_all,allow_selected,allow,pending,denied,expired,withdrawn,implicit_consent,expires_in_six_months 2 | "2.0",2021-02-26 09:12:23.161,"necessary, preferences, statistics, marketing",https://www.example.com/,https://www.example.com/,true,2021-03-03 09:14:01.599,13,4,6,10,0,3,0,0,0,0 3 | "1.0",2021-02-26 09:08:42.110,"necessary, preferences, statistics, marketing",https://www.example.com/,https://www.example.com/,false,2021-02-26 09:11:18.490,12,3,5,8,0,4,0,0,0,0 4 | -------------------------------------------------------------------------------- /integration_tests/data/expected/snowplow_web_consent_versions_expected.csv: -------------------------------------------------------------------------------- 1 | consent_version,version_start_tstamp,consent_scopes,consent_url,domains_applied,is_latest_version,last_allow_all_event 2 | "1.0",2021-02-26 09:08:42.110,"necessary, preferences, statistics, marketing",https://www.example.com/,https://www.example.com/,false,2021-02-26 09:11:18.490 3 | "2.0",2021-02-26 09:12:23.161,"necessary, preferences, statistics, marketing",https://www.example.com/,https://www.example.com/,true,2021-03-03 09:14:01.599 4 | -------------------------------------------------------------------------------- /integration_tests/data/source/snowplow_web_consent_cmp_visible.csv: -------------------------------------------------------------------------------- 1 | root_id,root_tstamp,elapsed_time 2 | a08502a3-1b4b-4532-b7bb-53fb176e2f65, 2021-02-26 09:10:59.572, 1.5 3 | c286d671-bbbb-4e9c-a3ad-8c990e04e8ab, 2021-02-26 09:11:08.413, 1.5 4 | 058259e6-8eac-4d81-9bf1-7d8b52b10da7, 2021-02-26 09:11:16.598, 1.5 5 | 9f040a27-7c98-4356-aab7-c56ecbc4d028, 2021-02-26 09:11:39.201, 1.5 6 | 3dfa7079-824a-429b-a7cc-5d4fa4c3bf35, 2021-02-26 09:12:02.380, 1.5 7 | 43c0fc20-f595-4dc3-bdd2-e14abf2f85d3, 2021-02-26 09:12:13.298, 1.5 8 | 5bb37626-5408-479c-b588-749e237a54e9, 2021-02-26 09:12:22.256, 1.5 9 | 121f04f6-b803-41c0-b327-1ca79c4aa88a, 2021-02-26 09:12:30.142, 1.5 10 | 8c4c6e71-bc62-4dc9-825c-c0ddfb1c52ec, 2021-02-26 09:12:39.067, 1.5 11 | aaea38e6-e19a-440c-a55b-d2bd24e9a274, 2021-02-26 09:12:46.564, 1.5 12 | 582b5e55-25ae-42b3-8f52-42c2e152ead3, 2021-02-26 09:13:01.552, 1.5 13 | bd17e5fc-065a-485f-a827-39c6a345a187, 2021-02-26 09:13:11.723, 1.5 14 | 282f4d87-3448-4e38-b310-e906f2c6362b, 2021-02-26 09:13:21.563, 1.5 15 | cdfb2bdf-e3fd-4494-82b1-9a87ee7c3734, 2021-02-26 09:13:28.584, 1.5 16 | 9e1b4293-097b-4ccc-baba-4a0c9be3b4d3, 2021-02-26 09:13:39.082, 1.5 17 | 737e0e0b-9829-494a-955b-14efd571e41b, 2021-02-26 09:08:40.269, 1.5 18 | 929dc773-b1b5-4f5e-84c9-c032a8ce8a22, 2021-02-26 09:09:13.509, 1.5 19 | 6f30fd58-900e-43d0-9108-e2531b75576e, 2021-02-26 09:09:22.656, 1.5 20 | 2cff26a7-7e80-42cb-8fb6-e2fe8a6553ee, 2021-02-26 09:09:31.304, 1.5 21 | 17e41e48-14c2-4a2f-936b-e076b10b05b8, 2021-02-26 09:09:52.031, 1.5 22 | dc105931-b062-4182-8838-9f366d09c758, 2021-02-26 09:08:28.324, 7 23 | de2a9fcf-4383-4714-8458-488d26c10739, 2021-02-26 09:08:53.689, 7 24 | db9cd5aa-f0f6-46f2-acb3-9d59a05b7489, 2021-02-26 09:09:03.413, 7 25 | d72bb29c-606d-46e8-9788-dc21ebf925b8, 2021-02-26 09:10:00.913, 7 26 | d1d3b778-8971-4fb0-97f3-53161d5a4a0f, 2021-02-26 09:10:10.249, 7 27 | 0c0cfa41-de83-48be-8d63-4da344f1fd42, 2021-02-26 09:10:32.282, 1.5 28 | fea8f9c4-64c8-42c0-a372-c18166975d9a, 2021-02-26 09:13:49.634, 1.5 29 | 877ee833-626d-4e31-b16f-826d5b35e668, 2021-02-26 09:10:41.307, 20 30 | 9f7e5d5d-968d-4efb-b6b8-9660db53c7db, 2021-02-26 09:10:49.990, 20 31 | e8ceca75-7774-477d-80f9-f539884c5fec, 2021-02-26 09:11:49.893, 20 32 | 8ea8042f-77aa-41e7-ab97-0523b8533d1b, 2021-02-26 09:08:44.838, 20 33 | 4901c314-ba82-4f42-a416-f8181821e26c, 2021-02-26 09:09:40.737, 20 34 | fb0cdd73-bc4b-4d03-a180-63a19c00df21, 2021-03-03 09:14:00.224, 20 35 | fb0cdd73-bc4b-4d03-a180-63a19c00df21, 2021-03-03 09:14:00.224, 20 36 | -------------------------------------------------------------------------------- /integration_tests/macros/content_group_overwrite.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {# Test out the overwrite works by taking a false to a true #} 9 | 10 | {% macro default__content_group_query() %} 11 | case when ev.page_view_id = 'ff8cc048-afe8-4913-843d-37de6b7d87d0' then 'Look no further, I am the test subject!' 12 | when ev.page_url like '%/product%' then 'PDP' 13 | when ev.page_url like '%/list%' then 'PLP' 14 | when ev.page_url like '%/checkout%' then 'checkout' 15 | when ev.page_url like '%/home%' then 'homepage' 16 | else 'other' 17 | end 18 | {% endmacro %} 19 | -------------------------------------------------------------------------------- /integration_tests/macros/engaged_session_overwrite.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {# Test out the overwrite works by taking a false to a true #} 9 | 10 | {% macro default__engaged_session() %} 11 | case when a.domain_sessionid = '0b0c7bb589ebd041177514f3e43446ca5d4343328936d2f8f12a42b41bf9140e' then true 12 | else 13 | page_views >= 2 or engaged_time_in_s / {{ var('snowplow__heartbeat', 10) }} >= 2 14 | {%- if var('snowplow__conversion_events', none) %} 15 | {%- for conv_def in var('snowplow__conversion_events') %} 16 | or cv_{{ conv_def['name'] }}_converted 17 | {%- endfor %} 18 | {%- endif %} end 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /integration_tests/macros/get_batch_size.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro get_batch_size() %} 9 | {{ return(100) }} 10 | {% endmacro %} 11 | -------------------------------------------------------------------------------- /integration_tests/models/actual/actual_vs_expected.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: snowplow_web_base_quarantined_sessions_actual 5 | tests: 6 | - dbt_utils.equal_rowcount: 7 | compare_model: ref('snowplow_web_base_quarantined_sessions_expected_stg') 8 | - dbt_utils.equality: 9 | compare_model: ref('snowplow_web_base_quarantined_sessions_expected_stg') 10 | - name: snowplow_web_page_views_actual 11 | tests: 12 | - dbt_utils.equal_rowcount: 13 | compare_model: ref('snowplow_web_page_views_expected_stg') 14 | - dbt_utils.equality: 15 | compare_model: ref('snowplow_web_page_views_expected_stg') 16 | - name: snowplow_web_sessions_actual 17 | tests: 18 | - dbt_utils.equal_rowcount: 19 | compare_model: ref('snowplow_web_sessions_expected_stg') 20 | - dbt_utils.equality: 21 | compare_model: ref('snowplow_web_sessions_expected_stg') 22 | - name: snowplow_web_users_actual 23 | tests: 24 | - dbt_utils.equal_rowcount: 25 | compare_model: ref('snowplow_web_users_expected_stg') 26 | - dbt_utils.equality: 27 | compare_model: ref('snowplow_web_users_expected_stg') 28 | - name: snowplow_web_consent_cmp_stats_actual 29 | tests: 30 | - dbt_utils.equal_rowcount: 31 | compare_model: ref('snowplow_web_consent_cmp_stats_expected_stg') 32 | - dbt_utils.equality: 33 | compare_model: ref('snowplow_web_consent_cmp_stats_expected_stg') 34 | - name: snowplow_web_consent_log_actual 35 | tests: 36 | - dbt_utils.equal_rowcount: 37 | compare_model: ref('snowplow_web_consent_log_expected_stg') 38 | - dbt_utils.equality: 39 | compare_model: ref('snowplow_web_consent_log_expected_stg') 40 | - name: snowplow_web_consent_scope_status_actual 41 | tests: 42 | - dbt_utils.equal_rowcount: 43 | compare_model: ref('snowplow_web_consent_scope_status_expected_stg') 44 | - dbt_utils.equality: 45 | compare_model: ref('snowplow_web_consent_scope_status_expected_stg') 46 | - name: snowplow_web_consent_totals_actual 47 | tests: 48 | - dbt_utils.equal_rowcount: 49 | compare_model: ref('snowplow_web_consent_totals_expected_stg') 50 | - dbt_utils.equality: 51 | compare_model: ref('snowplow_web_consent_totals_expected_stg') 52 | - name: snowplow_web_consent_users_actual 53 | tests: 54 | - dbt_utils.equal_rowcount: 55 | compare_model: ref('snowplow_web_consent_users_expected_stg') 56 | - dbt_utils.equality: 57 | compare_model: ref('snowplow_web_consent_users_expected_stg') 58 | - name: snowplow_web_consent_versions_actual 59 | tests: 60 | - dbt_utils.equal_rowcount: 61 | compare_model: ref('snowplow_web_consent_versions_expected_stg') 62 | - dbt_utils.equality: 63 | compare_model: ref('snowplow_web_consent_versions_expected_stg') 64 | - name: snowplow_web_vital_measurements_actual 65 | tests: 66 | - dbt_utils.equal_rowcount: 67 | compare_model: ref('snowplow_web_vital_measurements_expected_stg') 68 | - snowplow_web_integration_tests.equality: 69 | compare_model: ref('snowplow_web_vital_measurements_expected_stg') 70 | precision: 3 71 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_base_quarantined_sessions_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select * 10 | 11 | from {{ ref('snowplow_web_base_quarantined_sessions') }} 12 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_cmp_stats_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_cmp_stats') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_log_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | event_id, 17 | domain_userid, 18 | user_id, 19 | geo_country, 20 | page_view_id, 21 | domain_sessionid, 22 | derived_tstamp, 23 | load_tstamp, 24 | event_name, 25 | event_type, 26 | basis_for_processing, 27 | consent_url, 28 | consent_version, 29 | consent_scopes, 30 | domains_applied, 31 | gdpr_applies, 32 | cmp_load_time 33 | 34 | from {{ ref('snowplow_web_consent_log') }} 35 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_scope_status_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_scope_status') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_totals_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_totals') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_users_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_users') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_consent_versions_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_versions') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_page_views_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | -- Removing model_tstamp 9 | 10 | select 11 | page_view_id, 12 | event_id, 13 | 14 | -- hard-coding due to non-deterministic outcome from row_number for Redshift/Postgres/databricks 15 | {% if target.type in ['redshift', 'postgres', 'databricks'] -%} 16 | case when event_id = '1b4b3b57-3cb7-4df2-a7fd-526afa9e3c76' then 'true base' else app_id end as app_id, 17 | {% else %} 18 | app_id, 19 | {% endif %} 20 | platform, 21 | -- user fields 22 | user_id, 23 | domain_userid, 24 | original_domain_userid, 25 | stitched_user_id, 26 | network_userid, 27 | 28 | -- session fields 29 | domain_sessionid, 30 | original_domain_sessionid, 31 | domain_sessionidx, 32 | 33 | page_view_in_session_index, 34 | page_views_in_session, 35 | 36 | -- timestamp fields 37 | 38 | -- hard-coding due to non-deterministic outcome from row_number for Redshift/Postgres/databricks 39 | {% if target.type in ['redshift', 'postgres', 'databricks'] -%} 40 | case when event_id = '1b4b3b57-3cb7-4df2-a7fd-526afa9e3c76' then '2021-03-01 20:56:33.286' else dvce_created_tstamp end as dvce_created_tstamp, 41 | {% else %} 42 | dvce_created_tstamp, 43 | {% endif %} 44 | 45 | collector_tstamp, 46 | 47 | -- hard-coding due to non-deterministic outcome from row_number for Redshift/Postgres/databricks 48 | {% if target.type in ['redshift', 'postgres', 'databricks'] -%} 49 | case when event_id = '1b4b3b57-3cb7-4df2-a7fd-526afa9e3c76' then '2021-03-01 20:56:39.192' else derived_tstamp end as derived_tstamp, 50 | {% else %} 51 | derived_tstamp, 52 | {% endif %} 53 | 54 | -- hard-coding due to non-deterministic outcome from row_number for Redshift/Postgres/databricks 55 | {% if target.type in ['redshift', 'postgres', 'databricks'] -%} 56 | case when event_id = '1b4b3b57-3cb7-4df2-a7fd-526afa9e3c76' then '2021-03-01 20:56:39.192' else start_tstamp end as start_tstamp, 57 | {% else %} 58 | start_tstamp, 59 | {% endif %} 60 | 61 | -- hard-coding due to non-deterministic outcome from row_number for Redshift/Postgres/databricks 62 | {% if target.type in ['redshift', 'postgres', 'databricks'] -%} 63 | case when event_id = '1b4b3b57-3cb7-4df2-a7fd-526afa9e3c76' then '2021-03-01 20:56:39.192' else end_tstamp end as end_tstamp, 64 | {% else %} 65 | end_tstamp, 66 | {% endif %} 67 | 68 | 69 | engaged_time_in_s, 70 | absolute_time_in_s, 71 | 72 | horizontal_pixels_scrolled, 73 | vertical_pixels_scrolled, 74 | 75 | horizontal_percentage_scrolled, 76 | vertical_percentage_scrolled, 77 | 78 | doc_width, 79 | doc_height, 80 | content_group, 81 | 82 | page_title, 83 | page_url, 84 | page_urlscheme, 85 | page_urlhost, 86 | page_urlpath, 87 | page_urlquery, 88 | page_urlfragment, 89 | 90 | mkt_medium, 91 | mkt_source, 92 | mkt_term, 93 | mkt_content, 94 | mkt_campaign, 95 | mkt_clickid, 96 | mkt_network, 97 | default_channel_group, 98 | 99 | page_referrer, 100 | refr_urlscheme, 101 | refr_urlhost, 102 | refr_urlpath, 103 | refr_urlquery, 104 | refr_urlfragment, 105 | refr_medium, 106 | refr_source, 107 | refr_term, 108 | 109 | geo_country, 110 | geo_region, 111 | geo_region_name, 112 | geo_city, 113 | geo_zipcode, 114 | geo_latitude, 115 | geo_longitude, 116 | geo_timezone, 117 | 118 | user_ipaddress, 119 | 120 | useragent, 121 | 122 | br_lang, 123 | br_viewwidth, 124 | br_viewheight, 125 | br_colordepth, 126 | br_renderengine, 127 | 128 | os_timezone, 129 | 130 | category, 131 | primary_impact, 132 | reason, 133 | spider_or_robot, 134 | 135 | useragent_family, 136 | useragent_major, 137 | useragent_minor, 138 | useragent_patch, 139 | useragent_version, 140 | os_family, 141 | os_major, 142 | os_minor, 143 | os_patch, 144 | os_patch_minor, 145 | os_version, 146 | device_family, 147 | device_class, 148 | device_category, 149 | screen_resolution, 150 | agent_class, 151 | agent_name, 152 | agent_name_version, 153 | agent_name_version_major, 154 | agent_version, 155 | agent_version_major, 156 | device_brand, 157 | device_name, 158 | device_version, 159 | layout_engine_class, 160 | layout_engine_name, 161 | layout_engine_name_version, 162 | layout_engine_name_version_major, 163 | layout_engine_version, 164 | layout_engine_version_major, 165 | operating_system_class, 166 | operating_system_name, 167 | operating_system_name_version, 168 | operating_system_version, 169 | event_id2, 170 | v_collector 171 | 172 | from {{ ref('snowplow_web_page_views') }} 173 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_sessions_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | -- Removing model_tstamp 9 | 10 | select 11 | app_id 12 | ,platform 13 | ,domain_sessionid 14 | ,original_domain_sessionid 15 | ,domain_sessionidx 16 | ,start_tstamp 17 | ,end_tstamp 18 | ,user_id 19 | ,domain_userid 20 | ,original_domain_userid 21 | ,stitched_user_id 22 | ,network_userid 23 | ,page_views 24 | ,engaged_time_in_s 25 | {%- if var('snowplow__list_event_counts', false) %} 26 | -- just compare the string version for simplicity... 27 | {% if target.type == 'bigquery' %} 28 | ,to_json_string(event_counts) as event_counts 29 | {% elif target.type =='redshift' %} 30 | ,json_serialize(event_counts) as event_counts 31 | {% else %} 32 | ,cast(event_counts as {{snowplow_utils.type_max_string() }}) as event_counts 33 | {% endif %} 34 | {%- endif %} 35 | ,total_events 36 | ,is_engaged 37 | ,absolute_time_in_s 38 | ,first_page_title 39 | ,first_page_url 40 | ,first_page_urlscheme 41 | ,first_page_urlhost 42 | ,first_page_urlpath 43 | ,first_page_urlquery 44 | ,first_page_urlfragment 45 | ,last_page_title 46 | ,last_page_url 47 | ,last_page_urlscheme 48 | ,last_page_urlhost 49 | ,last_page_urlpath 50 | ,last_page_urlquery 51 | ,last_page_urlfragment 52 | ,referrer 53 | ,refr_urlscheme 54 | ,refr_urlhost 55 | ,refr_urlpath 56 | ,refr_urlquery 57 | ,refr_urlfragment 58 | ,refr_medium 59 | ,refr_source 60 | ,refr_term 61 | ,mkt_medium 62 | ,mkt_source 63 | ,mkt_term 64 | ,mkt_content 65 | ,mkt_campaign 66 | ,mkt_clickid 67 | ,mkt_network 68 | ,mkt_source_platform 69 | ,default_channel_group 70 | ,geo_country 71 | ,geo_region 72 | ,geo_region_name 73 | ,geo_city 74 | ,geo_zipcode 75 | ,geo_latitude 76 | ,geo_longitude 77 | ,geo_timezone 78 | ,geo_country_name 79 | ,geo_continent 80 | ,last_geo_country 81 | ,last_geo_region_name 82 | ,last_geo_city 83 | ,last_geo_country_name 84 | ,last_geo_continent 85 | ,user_ipaddress 86 | ,useragent 87 | ,br_renderengine 88 | ,br_lang 89 | ,br_lang_name 90 | ,last_br_lang 91 | ,last_br_lang_name 92 | ,os_timezone 93 | ,category 94 | ,primary_impact 95 | ,reason 96 | ,spider_or_robot 97 | ,useragent_family 98 | ,useragent_major 99 | ,useragent_minor 100 | ,useragent_patch 101 | ,useragent_version 102 | ,os_family 103 | ,os_major 104 | ,os_minor 105 | ,os_patch 106 | ,os_patch_minor 107 | ,os_version 108 | ,device_family 109 | ,device_class 110 | ,device_category 111 | ,screen_resolution 112 | ,agent_class 113 | ,agent_name 114 | ,agent_name_version 115 | ,agent_name_version_major 116 | ,agent_version 117 | ,agent_version_major 118 | ,device_brand 119 | ,device_name 120 | ,device_version 121 | ,layout_engine_class 122 | ,layout_engine_name 123 | ,layout_engine_name_version 124 | ,layout_engine_name_version_major 125 | ,layout_engine_version 126 | ,layout_engine_version_major 127 | ,operating_system_class 128 | ,operating_system_name 129 | ,operating_system_name_version 130 | ,operating_system_version 131 | {% if var('snowplow__conversion_events', none) %} 132 | , cv_view_page_volume 133 | {% if target.type == 'bigquery' %} 134 | , to_json_string(cv_view_page_events) as cv_view_page_events 135 | , to_json_string(cv_view_page_values) as cv_view_page_values 136 | {% else %} 137 | , cv_view_page_events 138 | , cv_view_page_values 139 | {% endif %} 140 | , cv_view_page_total 141 | , cv_view_page_first_conversion 142 | , cv_view_page_converted 143 | {% if var('snowplow__total_all_conversions') %} 144 | , cv__all_volume 145 | , cv__all_total 146 | {% endif %} 147 | {% endif %} 148 | ,event_id 149 | ,event_id2 150 | 151 | 152 | from {{ ref('snowplow_web_sessions') }} 153 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_users_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | --Removing model_tstamp 9 | 10 | select 11 | -- user fields 12 | user_id 13 | ,domain_userid 14 | ,original_domain_userid 15 | ,network_userid 16 | 17 | ,start_tstamp 18 | ,end_tstamp 19 | 20 | -- engagement fields 21 | ,page_views 22 | ,sessions 23 | ,engaged_time_in_s 24 | 25 | -- first page fields 26 | ,first_page_title 27 | ,first_page_url 28 | ,first_page_urlscheme 29 | ,first_page_urlhost 30 | ,first_page_urlpath 31 | ,first_page_urlquery 32 | ,first_page_urlfragment 33 | ,first_geo_country 34 | ,first_geo_country_name 35 | ,first_geo_continent 36 | ,first_geo_city 37 | ,first_geo_region_name 38 | ,first_br_lang 39 | ,first_br_lang_name 40 | 41 | ,last_geo_country 42 | ,last_geo_country_name 43 | ,last_geo_continent 44 | ,last_geo_city 45 | ,last_geo_region_name 46 | ,last_br_lang 47 | ,last_br_lang_name 48 | ,last_page_title 49 | ,last_page_url 50 | ,last_page_urlscheme 51 | ,last_page_urlhost 52 | ,last_page_urlpath 53 | ,last_page_urlquery 54 | ,last_page_urlfragment 55 | 56 | -- referrer fields 57 | ,referrer 58 | 59 | ,refr_urlscheme 60 | ,refr_urlhost 61 | ,refr_urlpath 62 | ,refr_urlquery 63 | ,refr_urlfragment 64 | 65 | ,refr_medium 66 | ,refr_source 67 | ,refr_term 68 | 69 | -- marketing fields 70 | ,mkt_medium 71 | ,mkt_source 72 | ,mkt_term 73 | ,mkt_content 74 | ,mkt_campaign 75 | ,mkt_clickid 76 | ,mkt_network 77 | ,mkt_source_platform 78 | ,default_channel_group 79 | 80 | ,first_event_id 81 | ,last_event_id 82 | ,first_event_id2 83 | ,last_event_id2 84 | 85 | from {{ ref('snowplow_web_users') }} 86 | -------------------------------------------------------------------------------- /integration_tests/models/actual/snowplow_web_vital_measurements_actual.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_cwv", false) | as_bool() 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_vital_measurements') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/dummy_custom_module/snowplow_web_pv_channels.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | enabled=var('snowplow__enable_custom_example'), 12 | unique_key='page_view_id', 13 | upsert_date_key='start_tstamp', 14 | sort='start_tstamp', 15 | dist='page_view_id', 16 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 17 | "field": "start_tstamp", 18 | "data_type": "timestamp" 19 | }), 20 | cluster_by=["page_view_id"], 21 | snowplow_optimize=true 22 | ) 23 | }} 24 | 25 | select 26 | pv.page_view_id, 27 | pv.start_tstamp, 28 | -- Arbitary case statements and inefficient string search functions (for cross db compatibility). Do not copy. 29 | case 30 | when pv.refr_medium = 'search' 31 | and (lower(pv.mkt_medium) like '%cpc%' or lower(pv.mkt_source) like '%cpc%') then 'paidsearch' 32 | when lower(pv.mkt_medium) like '%paidsearch%' 33 | or lower(pv.mkt_source) like '%paidsearch%' then 'paidsearch' 34 | when lower(pv.mkt_source) like '%adwords%' 35 | or lower(pv.mkt_medium) like '%adwords%' then 'paidsearch' 36 | when lower(pv.mkt_source) like '%google%' 37 | and lower(pv.mkt_medium) like '%ads%' then 'paidsearch' 38 | when pv.refr_urlhost in ('www.googleadservices.com','googleads.g.doubleclick.net') then 'paidsearch' 39 | when lower(pv.mkt_medium) like '%cpv%' then 'advertising' 40 | when lower(pv.mkt_medium) like '%(display|cpm|banner)%' then 'display' 41 | when pv.refr_medium is null and pv.page_url not like '%utm_%' then 'direct' 42 | when (lower(pv.refr_medium) = 'search' and pv.mkt_medium is null) 43 | or (lower(pv.refr_medium) = 'search' and lower(pv.mkt_medium) = 'organic') then 'organicsearch' 44 | when pv.refr_medium = 'social' then 'social' 45 | when pv.refr_medium = 'unknown' or pv.mkt_medium = 'referral' then 'referral' 46 | when pv.refr_medium = 'internal' then 'internal' 47 | else 'others' 48 | end as channel 49 | 50 | from {{ ref('snowplow_web_page_views_this_run' ) }} pv 51 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_base_quarantined_sessions_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select * 10 | 11 | from {{ ref('snowplow_web_base_quarantined_sessions_expected') }} 12 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_cmp_stats_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | event_id, 17 | domain_userid, 18 | original_domain_userid, 19 | cast(page_view_id as {{ dbt.type_string() }}) as page_view_id, 20 | domain_sessionid, 21 | original_domain_sessionid, 22 | cmp_load_time, 23 | cast(cmp_tstamp as {{ dbt.type_timestamp() }}) as cmp_tstamp, 24 | cast(first_consent_event_tstamp as {{ dbt.type_timestamp() }}) as first_consent_event_tstamp, 25 | first_consent_event_type, 26 | cmp_interaction_time 27 | 28 | from {{ ref('snowplow_web_consent_cmp_stats_expected') }} 29 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_log_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | event_id, 17 | domain_userid, 18 | user_id, 19 | geo_country, 20 | cast(page_view_id as {{ dbt.type_string() }}) as page_view_id, 21 | domain_sessionid, 22 | cast(derived_tstamp as {{ dbt.type_timestamp() }}) as derived_tstamp, 23 | cast(load_tstamp as {{ dbt.type_timestamp() }}) as load_tstamp, 24 | event_name, 25 | event_type, 26 | basis_for_processing, 27 | consent_url, 28 | replace(cast(consent_version as {{ dbt.type_string() }}), '.0', '') ||'.0' as consent_version, 29 | consent_scopes, 30 | domains_applied, 31 | gdpr_applies, 32 | cmp_load_time 33 | 34 | from {{ ref('snowplow_web_consent_log_expected') }} 35 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_scope_status_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select * 15 | 16 | from {{ ref('snowplow_web_consent_scope_status_expected') }} 17 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_totals_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | replace(cast(consent_version as {{ dbt.type_string() }}), '.0', '') ||'.0' as consent_version, 17 | cast(version_start_tstamp as {{ dbt.type_timestamp() }}) as version_start_tstamp, 18 | consent_scopes, 19 | consent_url, 20 | domains_applied, 21 | cast(is_latest_version as {{ dbt.type_boolean() }})as is_latest_version, 22 | cast(last_allow_all_event as {{ dbt.type_timestamp() }}) as last_allow_all_event, 23 | total_visitors, 24 | allow_all, 25 | allow_selected, 26 | allow, 27 | pending, 28 | denied, 29 | expired, 30 | withdrawn, 31 | implicit_consent, 32 | expires_in_six_months 33 | 34 | from {{ ref('snowplow_web_consent_totals_expected') }} 35 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_users_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | domain_userid, 17 | user_id, 18 | geo_country, 19 | cmp_events, 20 | consent_events, 21 | cast(last_cmp_event_tstamp as {{ dbt.type_timestamp() }}) as last_cmp_event_tstamp, 22 | cast(last_consent_event_tstamp as {{ dbt.type_timestamp() }}) as last_consent_event_tstamp, 23 | last_consent_event_type, 24 | last_consent_scopes, 25 | replace(cast(last_consent_version as {{ dbt.type_string() }}), '.0', '') ||'.0' as last_consent_version, 26 | last_consent_url, 27 | last_domains_applied, 28 | cast(last_processed_event as {{ dbt.type_timestamp() }}) as last_processed_event, 29 | cast(is_latest_version as {{ dbt.type_boolean() }}) as is_latest_version 30 | 31 | from {{ ref('snowplow_web_consent_users_expected') }} 32 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_consent_versions_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_consent", false) 11 | ) 12 | }} 13 | 14 | select 15 | 16 | replace(cast(consent_version as {{ dbt.type_string() }}), '.0', '') ||'.0' as consent_version, 17 | cast(version_start_tstamp as {{ dbt.type_timestamp() }}) as version_start_tstamp, 18 | consent_scopes, 19 | consent_url, 20 | domains_applied, 21 | cast(is_latest_version as {{ dbt.type_boolean() }}) as is_latest_version, 22 | cast(last_allow_all_event as {{ dbt.type_timestamp() }}) as last_allow_all_event 23 | 24 | from {{ ref('snowplow_web_consent_versions_expected') }} 25 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_page_views_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select * 10 | 11 | from {{ ref('snowplow_web_page_views_expected') }} 12 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_users_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select * 10 | 11 | from {{ ref('snowplow_web_users_expected') }} 12 | -------------------------------------------------------------------------------- /integration_tests/models/expected/snowplow_web_vital_measurements_expected_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_cwv", false) | as_bool() 11 | ) 12 | }} 13 | 14 | select 15 | compound_key, 16 | measurement_type, 17 | page_url, 18 | device_class, 19 | geo_country, 20 | country, 21 | time_period, 22 | page_view_count, 23 | lcp_75p, 24 | fid_75p, 25 | cls_75p, 26 | ttfb_75p, 27 | inp_75p, 28 | lcp_result, 29 | fid_result, 30 | cls_result, 31 | ttfb_result, 32 | inp_result, 33 | passed 34 | 35 | from {{ ref('snowplow_web_vital_measurements_expected') }} 36 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_consent_cmp_visible_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select 10 | root_id, 11 | root_tstamp::timestamp, 12 | elapsed_time, 13 | 'cmp_visible' as schema_name 14 | 15 | from {{ ref('snowplow_web_consent_cmp_visible') }} 16 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_consent_preferences_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | select 10 | root_id, 11 | root_tstamp::timestamp as root_tstamp, 12 | basis_for_processing, 13 | consent_version, 14 | consent_scopes, 15 | domains_applied, 16 | consent_url, 17 | event_type, 18 | gdpr_applies::boolean as gdpr_applies, 19 | 'consent_preferences' as schema_name 20 | 21 | from {{ ref('snowplow_web_consent_preferences') }} 22 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_cwv_context_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | select 9 | cls, 10 | fcp, 11 | fid, 12 | inp, 13 | lcp, 14 | navigation_type, 15 | ttfb, 16 | root_tstamp::timestamp, 17 | root_id, 18 | schema_name 19 | 20 | from {{ ref('snowplow_web_cwv_context') }} 21 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_events_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {# CWV tests run on a different source dataset, this is an easy way to hack them together. #} 9 | {% if not var("snowplow__enable_cwv", false) %} 10 | 11 | select 12 | * 13 | 14 | from {{ ref('snowplow_web_events') }} 15 | 16 | {% else %} 17 | 18 | select 19 | * 20 | 21 | from {{ ref('snowplow_web_vital_events') }} 22 | 23 | 24 | {% endif %} 25 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_page_view_context_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {# CWV tests run on a different source dataset, this is an easy way to hack them together. #} 9 | {% if not var("snowplow__enable_cwv", false) %} 10 | 11 | -- test dataset includes page_view_id as part of events table. 12 | -- RS and PG events tables are federated so split out page_view_id into its own table 13 | 14 | with prep as ( 15 | select 16 | event_id as root_id, 17 | collector_tstamp as root_tstamp, 18 | split_part(split_part(contexts_com_snowplowanalytics_snowplow_web_page_1_0_0,'[{"id":"', 2), '"}]', 1) as id -- test dataset uses json format. Extract. 19 | 20 | from {{ ref('snowplow_web_events') }} 21 | ) 22 | 23 | select 24 | root_id, 25 | root_tstamp, 26 | 'page_view_context' as schema_name, 27 | case when id = 'null' or id = '' then null else id end as id 28 | 29 | from prep 30 | 31 | 32 | {% else %} 33 | 34 | with prep as ( 35 | select 36 | event_id as root_id, 37 | collector_tstamp as root_tstamp, 38 | split_part(split_part(contexts_com_snowplowanalytics_snowplow_web_page_1_0_0,'[{"id":"', 2), '"}]', 1) as id -- test dataset uses json format. Extract. 39 | 40 | from {{ ref('snowplow_web_vital_events') }} 41 | ) 42 | 43 | select 44 | root_id, 45 | root_tstamp, 46 | 'page_view_context' as schema_name, 47 | case when id = 'null' or id = '' then null else id end as id 48 | 49 | from prep 50 | 51 | {% endif %} 52 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_spider_context_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | select 9 | root_id, 10 | root_tstamp::timestamp, 11 | category, 12 | primaryImpact as primary_impact, 13 | reason, 14 | spiderOrRobot::boolean as spider_or_robot, 15 | schema_name 16 | 17 | from {{ ref('snowplow_web_spider_context') }} 18 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_ua_context_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | select 9 | root_id, 10 | root_tstamp::timestamp, 11 | 'ua_parser' as schema_name, 12 | deviceFamily::varchar as device_family, 13 | osFamily::varchar as os_family, 14 | osMajor::varchar as os_major, 15 | osMinor::varchar as os_minor, 16 | osPatch::varchar as os_patch, 17 | osPatchMinor::varchar as os_patch_minor, 18 | osVersion::varchar as os_version, 19 | useragentFamily::varchar as useragent_family, 20 | useragentMajor::varchar as useragent_major, 21 | useragentMinor::varchar as useragent_minor, 22 | useragentPatch::varchar as useragent_patch, 23 | useragentVersion::varchar as useragent_version 24 | 25 | from {{ ref('snowplow_web_ua_context') }} 26 | -------------------------------------------------------------------------------- /integration_tests/models/source/default/snowplow_web_yauaa_context_stg.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | select 9 | root_id, 10 | root_tstamp::timestamp, 11 | 'yauaa' as schema_name, 12 | agentClass::varchar as agent_class, 13 | agentInformationEmail::varchar as agent_information_email, 14 | agentName::varchar as agent_name, 15 | agentNameVersion::varchar as agent_name_version, 16 | agentNameVersionMajor::varchar as agent_name_version_major, 17 | agentVersion::varchar as agent_version, 18 | agentVersionMajor::varchar as agent_version_major, 19 | deviceBrand::varchar as device_brand, 20 | deviceClass::varchar as device_class, 21 | deviceCpu::varchar as device_cpu, 22 | deviceCpuBits::varchar as device_cpu_bits, 23 | deviceName::varchar as device_name, 24 | deviceVersion::varchar as device_version, 25 | layoutEngineClass::varchar as layout_engine_class, 26 | layoutEngineName::varchar as layout_engine_name, 27 | layoutEngineNameVersion::varchar as layout_engine_name_version, 28 | layoutEngineNameVersionMajor::varchar as layout_engine_name_version_major, 29 | layoutEngineVersion::varchar as layout_engine_version, 30 | layoutEngineVersionMajor::varchar as layout_engine_version_major, 31 | networkType::varchar as network_type, 32 | operatingSystemClass::varchar as operating_system_class, 33 | operatingSystemName::varchar as operating_system_name, 34 | operatingSystemNameVersion::varchar as operating_system_name_version, 35 | operatingSystemNameVersionMajor::varchar as operating_system_name_version_major, 36 | operatingSystemVersion::varchar as operating_system_version, 37 | operatingSystemVersionBuild::varchar as operating_system_version_build, 38 | operatingSystemVersionMajor::varchar as operating_system_version_major, 39 | webviewAppName::varchar as webview_app_name, 40 | webviewAppNameVersionMajor::varchar as webview_app_name_version_major, 41 | webviewAppVersion::varchar as webview_app_version, 42 | webviewAppVersionMajor::varchar as webview_app_version_major 43 | 44 | from {{ ref('snowplow_web_yauaa_context') }} 45 | -------------------------------------------------------------------------------- /integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | 2 | packages: 3 | - local: ../ 4 | -------------------------------------------------------------------------------- /macros/allow_refresh.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {# Default: Allow refresh in dev, block refresh otherwise. dev defined by snowplow__dev_target_name #} 9 | 10 | {% macro allow_refresh() %} 11 | {{ return(adapter.dispatch('allow_refresh', 'snowplow_web')()) }} 12 | {% endmacro %} 13 | 14 | {% macro default__allow_refresh() %} 15 | 16 | {% if flags.FULL_REFRESH == True %} 17 | {% set allow_refresh = snowplow_utils.get_value_by_target( 18 | dev_value=none, 19 | default_value=var('snowplow__allow_refresh'), 20 | dev_target_name=var('snowplow__dev_target_name') 21 | ) %} 22 | {% else %} 23 | {% set allow_refresh = none %} 24 | {% endif %} 25 | 26 | {{ return(allow_refresh) }} 27 | 28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /macros/bigquery/consent_fields.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro consent_fields() %} 9 | 10 | {% set consent_fields = [ 11 | {'field': 'event_type', 'dtype': 'string'}, 12 | {'field': 'basis_for_processing', 'dtype': 'string'}, 13 | {'field': 'consent_url', 'dtype': 'string'}, 14 | {'field': 'consent_version', 'dtype': 'string'}, 15 | {'field': 'consent_scopes', 'dtype': 'string'}, 16 | {'field': 'domains_applied', 'dtype': 'string'}, 17 | {'field': 'gdpr_applies', 'dtype': 'string'} 18 | ] %} 19 | 20 | {{ return(consent_fields) }} 21 | 22 | {% endmacro %} 23 | -------------------------------------------------------------------------------- /macros/bigquery/page_view_contexts.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro iab_fields() %} 9 | 10 | {% set iab_fields = [ 11 | {'field':'category', 'dtype':'string'}, 12 | {'field':'primary_impact', 'dtype':'string'}, 13 | {'field':'reason', 'dtype':'string'}, 14 | {'field':'spider_or_robot', 'dtype':'boolean'} 15 | ] %} 16 | 17 | {{ return(iab_fields) }} 18 | 19 | {% endmacro %} 20 | 21 | {% macro ua_fields() %} 22 | 23 | {% set ua_fields = [ 24 | {'field': 'useragent_family', 'dtype': 'string'}, 25 | {'field': 'useragent_major', 'dtype': 'string'}, 26 | {'field': 'useragent_minor', 'dtype': 'string'}, 27 | {'field': 'useragent_patch', 'dtype': 'string'}, 28 | {'field': 'useragent_version', 'dtype': 'string'}, 29 | {'field': 'os_family', 'dtype': 'string'}, 30 | {'field': 'os_major', 'dtype': 'string'}, 31 | {'field': 'os_minor', 'dtype': 'string'}, 32 | {'field': 'os_patch', 'dtype': 'string'}, 33 | {'field': 'os_patch_minor', 'dtype': 'string'}, 34 | {'field': 'os_version', 'dtype': 'string'}, 35 | {'field': 'device_family', 'dtype': 'string'} 36 | ] %} 37 | 38 | {{ return(ua_fields) }} 39 | 40 | {% endmacro %} 41 | 42 | {% macro yauaa_fields() %} 43 | 44 | {% set yauaa_fields = [ 45 | {'field': 'device_class', 'dtype': 'string'}, 46 | {'field': 'agent_class', 'dtype': 'string'}, 47 | {'field': 'agent_name', 'dtype': 'string'}, 48 | {'field': 'agent_name_version', 'dtype': 'string'}, 49 | {'field': 'agent_name_version_major', 'dtype': 'string'}, 50 | {'field': 'agent_version', 'dtype': 'string'}, 51 | {'field': 'agent_version_major', 'dtype': 'string'}, 52 | {'field': 'device_brand', 'dtype': 'string'}, 53 | {'field': 'device_name', 'dtype': 'string'}, 54 | {'field': 'device_version', 'dtype': 'string'}, 55 | {'field': 'layout_engine_class', 'dtype': 'string'}, 56 | {'field': 'layout_engine_name', 'dtype': 'string'}, 57 | {'field': 'layout_engine_name_version', 'dtype': 'string'}, 58 | {'field': 'layout_engine_name_version_major', 'dtype': 'string'}, 59 | {'field': 'layout_engine_version', 'dtype': 'string'}, 60 | {'field': 'layout_engine_version_major', 'dtype': 'string'}, 61 | {'field': 'operating_system_class', 'dtype': 'string'}, 62 | {'field': 'operating_system_name', 'dtype': 'string'}, 63 | {'field': 'operating_system_name_version', 'dtype': 'string'}, 64 | {'field': 'operating_system_version', 'dtype': 'string'} 65 | ] %} 66 | 67 | {{ return(yauaa_fields) }} 68 | 69 | {% endmacro %} 70 | -------------------------------------------------------------------------------- /macros/cluster_by_fields.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro web_cluster_by_fields_sessions_lifecycle() %} 9 | 10 | {{ return(adapter.dispatch('web_cluster_by_fields_sessions_lifecycle', 'snowplow_web')()) }} 11 | 12 | {% endmacro %} 13 | 14 | {% macro default__web_cluster_by_fields_sessions_lifecycle() %} 15 | 16 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["session_identifier"], snowflake_val=["to_date(start_tstamp)"])) }} 17 | 18 | {% endmacro %} 19 | 20 | 21 | {% macro web_cluster_by_fields_page_views() %} 22 | 23 | {{ return(adapter.dispatch('web_cluster_by_fields_page_views', 'snowplow_web')()) }} 24 | 25 | {% endmacro %} 26 | 27 | {% macro default__web_cluster_by_fields_page_views() %} 28 | 29 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["domain_userid","domain_sessionid"], snowflake_val=["to_date(start_tstamp)"])) }} 30 | 31 | {% endmacro %} 32 | 33 | 34 | {% macro web_cluster_by_fields_sessions() %} 35 | 36 | {{ return(adapter.dispatch('web_cluster_by_fields_sessions', 'snowplow_web')()) }} 37 | 38 | {% endmacro %} 39 | 40 | {% macro default__web_cluster_by_fields_sessions() %} 41 | 42 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["domain_userid"], snowflake_val=["to_date(start_tstamp)"])) }} 43 | 44 | {% endmacro %} 45 | 46 | 47 | {% macro web_cluster_by_fields_users() %} 48 | 49 | {{ return(adapter.dispatch('web_cluster_by_fields_users', 'snowplow_web')()) }} 50 | 51 | {% endmacro %} 52 | 53 | {% macro default__web_cluster_by_fields_users() %} 54 | 55 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["user_id","domain_userid"], snowflake_val=["to_date(start_tstamp)"])) }} 56 | 57 | {% endmacro %} 58 | 59 | {% macro web_cluster_by_fields_consent() %} 60 | 61 | {{ return(adapter.dispatch('web_cluster_by_fields_consent', 'snowplow_web')()) }} 62 | 63 | {% endmacro %} 64 | 65 | {% macro default__web_cluster_by_fields_consent() %} 66 | 67 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["event_id","domain_userid"], snowflake_val=["to_date(load_tstamp)"])) }} 68 | 69 | {% endmacro %} 70 | 71 | {% macro web_cluster_by_fields_cwv() %} 72 | 73 | {{ return(adapter.dispatch('web_cluster_by_fields_cwv', 'snowplow_web')()) }} 74 | 75 | {% endmacro %} 76 | 77 | {% macro default__web_cluster_by_fields_cwv() %} 78 | 79 | {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["page_view_id","domain_userid"], snowflake_val=["to_date(derived_tstamp)"])) }} 80 | 81 | {% endmacro %} 82 | -------------------------------------------------------------------------------- /macros/content_group_query.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro content_group_query() %} 9 | {{ return(adapter.dispatch('content_group_query', 'snowplow_web')()) }} 10 | {% endmacro %} 11 | 12 | 13 | {% macro default__content_group_query() %} 14 | case when ev.page_url like '%/product%' then 'PDP' 15 | when ev.page_url like '%/list%' then 'PLP' 16 | when ev.page_url like '%/checkout%' then 'checkout' 17 | when ev.page_url like '%/home%' then 'homepage' 18 | else 'other' 19 | end 20 | 21 | {% endmacro %} 22 | -------------------------------------------------------------------------------- /macros/core_web_vital_page_groups.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro core_web_vital_page_groups() %} 9 | {{ return(adapter.dispatch('core_web_vital_page_groups', 'snowplow_web')()) }} 10 | {%- endmacro -%} 11 | 12 | {% macro default__core_web_vital_page_groups() %} 13 | 14 | case when page_url like '%/product%' then 'PDP' 15 | when page_url like '%/list%' then 'PLP' 16 | when page_url like '%/checkout%' then 'checkout' 17 | when page_url like '%/home%' then 'homepage' 18 | else 'other' end 19 | 20 | {% endmacro %} 21 | -------------------------------------------------------------------------------- /macros/core_web_vital_pass_query.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro core_web_vital_pass_query() %} 9 | {{ return(adapter.dispatch('core_web_vital_pass_query', 'snowplow_web')()) }} 10 | {%- endmacro -%} 11 | 12 | {% macro default__core_web_vital_pass_query() %} 13 | 14 | case when m.lcp_result = 'good' and m.fid_result = 'good' and m.cls_result = 'good' then 1 else 0 end 15 | 16 | {% endmacro %} 17 | -------------------------------------------------------------------------------- /macros/core_web_vital_results_query.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro core_web_vital_results_query(suffix) %} 9 | {{ return(adapter.dispatch('core_web_vital_results_query', 'snowplow_web')(suffix)) }} 10 | {%- endmacro -%} 11 | 12 | {% macro default__core_web_vital_results_query(suffix) %} 13 | case when lcp{{suffix}} is null then 'not measurable' 14 | when lcp{{suffix}} < 2.5 then 'good' 15 | when lcp{{suffix}} < 4 then 'needs improvement' 16 | else 'poor' end as lcp_result, 17 | 18 | case when fid{{suffix}} is null then 'not measurable' 19 | when fid{{suffix}} < 100 then 'good' 20 | when fid{{suffix}} < 300 then 'needs improvement' 21 | else 'poor' end as fid_result, 22 | 23 | case when cls{{suffix}} is null then 'not measurable' 24 | when cls{{suffix}} < 0.1 then 'good' 25 | when cls{{suffix}} < 0.25 then 'needs improvement' 26 | else 'poor' end as cls_result, 27 | 28 | case when ttfb{{suffix}} is null then 'not measurable' 29 | when ttfb{{suffix}} < 800 then 'good' 30 | when ttfb{{suffix}} < 1800 then 'needs improvement' 31 | else 'poor' end as ttfb_result, 32 | 33 | case when inp{{suffix}} is null then 'not measurable' 34 | when inp{{suffix}} < 200 then 'good' 35 | when inp{{suffix}} < 500 then 'needs improvement' 36 | else 'poor' end as inp_result 37 | 38 | {% endmacro %} 39 | -------------------------------------------------------------------------------- /macros/engaged_session.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro engaged_session() %} 9 | {{ return(adapter.dispatch('engaged_session', 'snowplow_web')()) }} 10 | {% endmacro %} 11 | 12 | {% macro default__engaged_session() %} 13 | page_views >= 2 or engaged_time_in_s / {{ var('snowplow__heartbeat', 10) }} >= 2 14 | {%- if var('snowplow__conversion_events', none) %} 15 | {%- for conv_def in var('snowplow__conversion_events') %} 16 | or cv_{{ conv_def['name'] }}_converted 17 | {%- endfor %} 18 | {%- endif %} 19 | {% endmacro %} 20 | -------------------------------------------------------------------------------- /macros/filter_bots.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro filter_bots(table_alias = none) %} 9 | {{ return(adapter.dispatch('filter_bots', 'snowplow_web')(table_alias)) }} 10 | {%- endmacro -%} 11 | 12 | {% macro default__filter_bots(table_alias = none) %} 13 | and lower({% if table_alias %}{{table_alias~'.'}}{% endif %}useragent) not similar to '%(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|pingdombot|phantomjs|yandexbot|twitterbot|a_archiver|facebookexternalhit|bingbot|bingpreview|googlebot|baiduspider|360(spider|user-agent)|semalt)%' 14 | {% endmacro %} 15 | 16 | {% macro bigquery__filter_bots(table_alias = none) %} 17 | and not regexp_contains(lower({% if table_alias %}{{table_alias~'.'}}{% endif %}useragent), '(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|pingdombot|phantomjs|yandexbot|twitterbot|a_archiver|facebookexternalhit|bingbot|bingpreview|googlebot|baiduspider|360(spider|user-agent)|semalt)') 18 | {% endmacro %} 19 | 20 | {% macro spark__filter_bots(table_alias = none) %} 21 | and not rlike(lower({% if table_alias %}{{table_alias~'.'}}{% endif %}useragent), '.*(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|pingdombot|phantomjs|yandexbot|twitterbot|a_archiver|facebookexternalhit|bingbot|bingpreview|googlebot|baiduspider|360(spider|user-agent)|semalt).*') 22 | {% endmacro %} 23 | 24 | {% macro snowflake__filter_bots(table_alias = none) %} 25 | and not rlike(lower({% if table_alias %}{{table_alias~'.'}}{% endif %}useragent), '.*(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|pingdombot|phantomjs|yandexbot|twitterbot|a_archiver|facebookexternalhit|bingbot|bingpreview|googlebot|baiduspider|360(spider|user-agent)|semalt).*') 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /macros/macros.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | macros: 4 | - name: filter_bots 5 | description: '{{ doc("macro_filter_bots") }}' 6 | arguments: 7 | - name: table_alias 8 | type: string 9 | description: (Optional) the table alias to identify the useragent column from. Default none 10 | - name: stitch_user_identifiers 11 | description: '{{ doc("macro_stitch_user_identifiers") }}' 12 | arguments: 13 | - name: enabled 14 | type: boolean 15 | description: If the user stitching should be done or not 16 | - name: relation 17 | type: string 18 | description: (Optional) The model to update the `stitched_user_id` column in. Default `this` 19 | - name: user_mapping_relation 20 | type: string 21 | description: (Optional) The model to use the `user_id` column from. Default `snowplow_web_user_mapping` 22 | - name: get_iab_context_fields 23 | description: '{{ doc("macro_get_iab_context_fields") }}' 24 | arguments: 25 | - name: table_prefix 26 | type: string 27 | description: (Optional) Table alias to prefix the column selection with. Default none 28 | - name: get_ua_context_fields 29 | description: '{{ doc("macro_get_ua_context_fields") }}' 30 | arguments: 31 | - name: table_prefix 32 | type: string 33 | description: (Optional) Table alias to prefix the column selection with. Default none 34 | - name: get_yauaa_context_fields 35 | description: '{{ doc("macro_get_yauaa_context_fields") }}' 36 | arguments: 37 | - name: table_prefix 38 | type: string 39 | description: (Optional) Table alias to prefix the column selection with. Default none 40 | - name: web_cluster_by_fields_sessions_lifecycle 41 | description: '{{ doc("macro_web_cluster_by_X") }}' 42 | - name: web_cluster_by_fields_page_views 43 | description: '{{ doc("macro_web_cluster_by_X") }}' 44 | - name: web_cluster_by_fields_sessions 45 | description: '{{ doc("macro_web_cluster_by_X") }}' 46 | - name: web_cluster_by_fields_users 47 | description: '{{ doc("macro_web_cluster_by_X") }}' 48 | - name: web_cluster_by_fields_consent 49 | description: '{{ doc("macro_web_cluster_by_X") }}' 50 | - name: iab_fields 51 | description: '{{ doc("macro_bq_context_fields") }}' 52 | - name: ua_fields 53 | description: '{{ doc("macro_bq_context_fields") }}' 54 | - name: yauaa_fields 55 | description: '{{ doc("macro_bq_context_fields") }}' 56 | - name: allow_refresh 57 | description: '{{ doc("macro_allow_refresh") }}' 58 | - name: channel_group_query 59 | description: '{{ doc("macro_channel_group_query") }}' 60 | - name: engaged_session 61 | description: '{{ doc("macro_engaged_session") }}' 62 | - name: core_web_vital_page_groups 63 | description: '{{ doc("macro_core_web_vital_page_groups") }}' 64 | - name: core_web_vital_results_query 65 | description: '{{ doc("macro_core_web_vital_results_query") }}' 66 | - name: core_web_vital_pass_query 67 | description: '{{ doc("macro_core_web_vital_pass_query") }}' 68 | -------------------------------------------------------------------------------- /macros/stitch_user_identifiers.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {% macro stitch_user_identifiers(enabled, relation=this, user_mapping_relation='snowplow_web_user_mapping') %} 9 | {{ return(adapter.dispatch('stitch_user_identifiers', 'snowplow_web')(enabled, relation, user_mapping_relation)) }} 10 | {%- endmacro -%} 11 | 12 | {% macro default__stitch_user_identifiers(enabled, relation=this, user_mapping_relation='snowplow_web_user_mapping') %} 13 | {% if enabled | as_bool() %} 14 | 15 | -- Update sessions /page_views table with mapping 16 | update {{ relation }} as s 17 | set stitched_user_id = um.user_id 18 | from {{ ref(user_mapping_relation) }} as um 19 | where s.domain_userid = um.domain_userid; 20 | 21 | {% endif %} 22 | {%- endmacro -%} 23 | 24 | {% macro spark__stitch_user_identifiers(enabled, relation=this, user_mapping_relation='snowplow_web_user_mapping') %} 25 | {% if enabled | as_bool() %} 26 | 27 | -- Update sessions /page_views table with mapping 28 | merge into {{ relation }} as s 29 | using {{ ref(user_mapping_relation) }} as um 30 | on s.domain_userid = um.domain_userid 31 | 32 | when matched then 33 | update set s.stitched_user_id = um.user_id; 34 | 35 | {% endif %} 36 | {%- endmacro -%} 37 | -------------------------------------------------------------------------------- /models/base/manifest/base_manifest.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: snowplow_web_base_sessions_lifecycle_manifest 5 | description: '{{ doc("table_base_sessions_lifecycle_manifest") }}' 6 | columns: 7 | - name: session_identifier 8 | description: '{{ doc("col_session_identifier") }}' 9 | tags: 10 | - primary-key 11 | tests: 12 | - unique 13 | - not_null 14 | - name: user_identifier 15 | description: '{{ doc("col_user_identifier") }}' 16 | - name: start_tstamp 17 | description: The `collector_tstamp` when the session began 18 | tests: 19 | - not_null 20 | - name: end_tstamp 21 | description: The `collector_tstamp` when the session ended 22 | tests: 23 | - not_null 24 | - name: snowplow_web_incremental_manifest 25 | description: '{{ doc("table_base_incremental_manifest") }}' 26 | columns: 27 | - name: model 28 | description: The name of the model. 29 | tags: 30 | - primary-key 31 | tests: 32 | - unique 33 | - not_null 34 | - name: last_success 35 | description: The latest event consumed by the model, based on `collector_tstamp` 36 | - name: snowplow_web_base_quarantined_sessions 37 | description: '{{ doc("table_base_quarantined_sessions") }}' 38 | columns: 39 | - name: session_identifier 40 | description: The `session_identifier` of the quarantined session 41 | tags: 42 | - primary-key 43 | tests: 44 | - unique 45 | - not_null 46 | -------------------------------------------------------------------------------- /models/base/manifest/snowplow_web_base_quarantined_sessions.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | full_refresh=snowplow_web.allow_refresh(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 13 | tblproperties={ 14 | 'delta.autoOptimize.optimizeWrite' : 'true', 15 | 'delta.autoOptimize.autoCompact' : 'true' 16 | } 17 | ) 18 | }} 19 | 20 | {% set quarantined_query = snowplow_utils.base_create_snowplow_quarantined_sessions() %} 21 | 22 | {{ quarantined_query }} 23 | -------------------------------------------------------------------------------- /models/base/manifest/snowplow_web_base_sessions_lifecycle_manifest.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | unique_key='session_identifier', 12 | upsert_date_key='start_tstamp', 13 | sort='start_tstamp', 14 | dist='session_identifier', 15 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 16 | "field": "start_tstamp", 17 | "data_type": "timestamp" 18 | }, databricks_val='start_tstamp_date'), 19 | cluster_by=snowplow_web.web_cluster_by_fields_sessions_lifecycle(), 20 | full_refresh=snowplow_web.allow_refresh(), 21 | tags=["manifest"], 22 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 23 | tblproperties={ 24 | 'delta.autoOptimize.optimizeWrite' : 'true', 25 | 'delta.autoOptimize.autoCompact' : 'true' 26 | }, 27 | snowplow_optimize = true 28 | ) 29 | }} 30 | 31 | {% set sessions_lifecycle_manifest_query = snowplow_utils.base_create_snowplow_sessions_lifecycle_manifest( 32 | session_identifiers=var('snowplow__session_identifiers', [{"schema" : "atomic", "field" : "domain_sessionid"}]), 33 | session_sql=var('snowplow__session_sql', none), 34 | session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), 35 | user_identifiers=var('snowplow__user_identifiers', [{"schema": "atomic", "field" : "domain_userid"}]), 36 | user_sql=var('snowplow__user_sql', none), 37 | quarantined_sessions='snowplow_web_base_quarantined_sessions', 38 | derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), 39 | days_late_allowed=var('snowplow__days_late_allowed', 3), 40 | max_session_days=var('snowplow__max_session_days', 3), 41 | app_ids=var('snowplow__app_id', []), 42 | snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), 43 | snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), 44 | snowplow_events_table=var('snowplow__events_table', 'events'), 45 | event_limits_table='snowplow_web_base_new_event_limits', 46 | incremental_manifest_table='snowplow_web_incremental_manifest' 47 | ) %} 48 | 49 | {{ sessions_lifecycle_manifest_query }} 50 | -------------------------------------------------------------------------------- /models/base/manifest/snowplow_web_incremental_manifest.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | full_refresh=snowplow_web.allow_refresh(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 13 | tblproperties={ 14 | 'delta.autoOptimize.optimizeWrite' : 'true', 15 | 'delta.autoOptimize.autoCompact' : 'true' 16 | } 17 | ) 18 | }} 19 | 20 | {% set incremental_manifest_query = snowplow_utils.base_create_snowplow_incremental_manifest() %} 21 | 22 | {{ incremental_manifest_query }} 23 | -------------------------------------------------------------------------------- /models/base/scratch/bigquery/snowplow_web_base_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"] 11 | ) 12 | }} 13 | 14 | {% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( 15 | sessions_this_run_table='snowplow_web_base_sessions_this_run', 16 | session_identifiers=var('snowplow__session_identifiers', [{"schema" : "atomic", "field" : "domain_sessionid"}]), 17 | session_sql=var('snowplow__session_sql', none), 18 | session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), 19 | derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), 20 | days_late_allowed=var('snowplow__days_late_allowed', 3), 21 | max_session_days=var('snowplow__max_session_days', 3), 22 | app_ids=var('snowplow__app_id', []), 23 | snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), 24 | snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), 25 | snowplow_events_table=var('snowplow__events_table', 'events')) %} 26 | 27 | with base_query as ( 28 | {{ base_events_query }} 29 | ) 30 | 31 | select 32 | a.contexts_com_snowplowanalytics_snowplow_web_page_1_0_0[safe_offset(0)].id as page_view_id, 33 | a.session_identifier as domain_sessionid, 34 | a.domain_sessionid as original_domain_sessionid, 35 | a.user_identifier as domain_userid, 36 | a.domain_userid as original_domain_userid, 37 | a.* except(contexts_com_snowplowanalytics_snowplow_web_page_1_0_0, domain_sessionid, domain_userid) 38 | 39 | from base_query a 40 | -------------------------------------------------------------------------------- /models/base/scratch/databricks/snowplow_web_base_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"] 11 | ) 12 | }} 13 | 14 | 15 | {% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( 16 | sessions_this_run_table='snowplow_web_base_sessions_this_run', 17 | session_identifiers=var('snowplow__session_identifiers', [{"schema" : "atomic", "field" : "domain_sessionid"}]), 18 | session_sql=var('snowplow__session_sql', none), 19 | session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), 20 | derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), 21 | days_late_allowed=var('snowplow__days_late_allowed', 3), 22 | max_session_days=var('snowplow__max_session_days', 3), 23 | app_ids=var('snowplow__app_id', []), 24 | snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), 25 | snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), 26 | snowplow_events_table=var('snowplow__events_table', 'events')) %} 27 | 28 | with base_query as ( 29 | {{ base_events_query }} 30 | ) 31 | 32 | select 33 | a.contexts_com_snowplowanalytics_snowplow_web_page_1[0].id as page_view_id, 34 | a.session_identifier as domain_sessionid, 35 | a.domain_sessionid as original_domain_sessionid, 36 | a.user_identifier as domain_userid, 37 | a.domain_userid as original_domain_userid, 38 | a.* except(contexts_com_snowplowanalytics_snowplow_web_page_1, domain_sessionid, domain_userid) 39 | 40 | from base_query a 41 | -------------------------------------------------------------------------------- /models/base/scratch/default/snowplow_web_base_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | sort='collector_tstamp', 11 | dist='event_id', 12 | tags=["this_run"] 13 | ) 14 | }} 15 | 16 | {# dbt passed variables by reference so need to use copy to avoid altering the list multiple times #} 17 | {% set contexts = var('snowplow__entities_or_sdes', []).copy() %} 18 | 19 | {% do contexts.append({'schema': var('snowplow__page_view_context'), 'prefix': 'page_view', 'single_entity': True}) %} 20 | 21 | {% if var('snowplow__enable_iab', false) -%} 22 | {% do contexts.append({'schema': var('snowplow__iab_context'), 'prefix': 'iab', 'single_entity': True}) %} 23 | {% endif -%} 24 | 25 | {% if var('snowplow__enable_ua', false) -%} 26 | {% do contexts.append({'schema': var('snowplow__ua_parser_context'), 'prefix': 'ua', 'single_entity': True}) %} 27 | {% endif -%} 28 | 29 | {% if var('snowplow__enable_yauaa', false) -%} 30 | {% do contexts.append({'schema': var('snowplow__yauaa_context'), 'prefix': 'yauaa', 'single_entity': True}) %} 31 | {% endif -%} 32 | 33 | 34 | {% if var('snowplow__enable_consent', false) -%} 35 | {% do contexts.append({'schema': var('snowplow__consent_cmp_visible'), 'prefix': 'cmp_visible', 'single_entity': True}) %} 36 | {% do contexts.append({'schema': var('snowplow__consent_preferences'), 'prefix': 'consent_pref', 'single_entity': True}) %} 37 | {% endif -%} 38 | 39 | {% if var('snowplow__enable_cwv', false) -%} 40 | {% do contexts.append({'schema': var('snowplow__cwv_context'), 'prefix': 'cwv', 'single_entity': True}) %} 41 | {% endif -%} 42 | 43 | {% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( 44 | sessions_this_run_table='snowplow_web_base_sessions_this_run', 45 | session_identifiers=var('snowplow__session_identifiers', [{"schema" : "atomic", "field" : "domain_sessionid"}]), 46 | session_sql=var('snowplow__session_sql', none), 47 | session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), 48 | derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), 49 | days_late_allowed=var('snowplow__days_late_allowed', 3), 50 | max_session_days=var('snowplow__max_session_days', 3), 51 | app_ids=var('snowplow__app_id', []), 52 | snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), 53 | snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), 54 | snowplow_events_table=var('snowplow__events_table', 'events'), 55 | entities_or_sdes=contexts) %} 56 | 57 | 58 | with base_query as ( 59 | {{ base_events_query }} 60 | ) 61 | 62 | {% set base_query_cols = get_column_schema_from_query( 'select * from (' + base_events_query +') a') %} 63 | 64 | select 65 | {% for col in base_query_cols | map(attribute='name') | list -%} 66 | {% if col == 'session_identifier' -%} 67 | a.session_identifier as domain_sessionid 68 | {%- elif col == 'domain_sessionid' -%} 69 | a.domain_sessionid as original_domain_sessionid 70 | {%- elif col == 'user_identifier' -%} 71 | a.user_identifier as domain_userid 72 | {%- elif col == 'domain_userid' -%} 73 | a.domain_userid as original_domain_userid 74 | {%- else -%} 75 | a.{{col}} 76 | {%- endif -%} 77 | {%- if not loop.last -%},{%- endif %} 78 | {% endfor %} 79 | 80 | from base_query a 81 | -------------------------------------------------------------------------------- /models/base/scratch/snowflake/snowplow_web_base_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 12 | ) 13 | }} 14 | 15 | {% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( 16 | sessions_this_run_table='snowplow_web_base_sessions_this_run', 17 | session_identifiers=var('snowplow__session_identifiers', [{"schema" : "atomic", "field" : "domain_sessionid"}]), 18 | session_sql=var('snowplow__session_sql', none), 19 | session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), 20 | derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), 21 | days_late_allowed=var('snowplow__days_late_allowed', 3), 22 | max_session_days=var('snowplow__max_session_days', 3), 23 | app_ids=var('snowplow__app_id', []), 24 | snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), 25 | snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), 26 | snowplow_events_table=var('snowplow__events_table', 'events')) %} 27 | 28 | with base_query as ( 29 | {{ base_events_query }} 30 | ) 31 | 32 | select 33 | a.contexts_com_snowplowanalytics_snowplow_web_page_1[0]:id::varchar as page_view_id, 34 | a.session_identifier as domain_sessionid, 35 | a.domain_sessionid as original_domain_sessionid, 36 | a.user_identifier as domain_userid, 37 | a.domain_userid as original_domain_userid, 38 | a.* exclude(contexts_com_snowplowanalytics_snowplow_web_page_1, domain_sessionid, domain_userid) 39 | 40 | from base_query a 41 | -------------------------------------------------------------------------------- /models/base/scratch/snowplow_web_base_new_event_limits.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ config( 9 | post_hook=["{{snowplow_utils.print_run_limits(this, 'snowplow_web')}}"], 10 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 11 | ) 12 | }} 13 | 14 | 15 | {%- set models_in_run = snowplow_utils.get_enabled_snowplow_models('snowplow_web') -%} 16 | 17 | {% set min_last_success, 18 | max_last_success, 19 | models_matched_from_manifest, 20 | has_matched_all_models = snowplow_utils.get_incremental_manifest_status(ref('snowplow_web_incremental_manifest'), 21 | models_in_run) -%} 22 | 23 | 24 | {% set run_limits_query = snowplow_utils.get_run_limits(min_last_success, 25 | max_last_success, 26 | models_matched_from_manifest, 27 | has_matched_all_models, 28 | var("snowplow__start_date","2020-01-01")) -%} 29 | 30 | 31 | {{ run_limits_query }} 32 | -------------------------------------------------------------------------------- /models/base/scratch/snowplow_web_base_sessions_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | post_hook=["{{ snowplow_utils.base_quarantine_sessions(var('snowplow__max_session_days', 3), var('snowplow__quarantined_sessions', 'snowplow_web_base_quarantined_sessions')) }}"], 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | 17 | {% set sessions_query = snowplow_utils.base_create_snowplow_sessions_this_run( 18 | lifecycle_manifest_table='snowplow_web_base_sessions_lifecycle_manifest', 19 | new_event_limits_table='snowplow_web_base_new_event_limits') %} 20 | 21 | {{ sessions_query }} 22 | -------------------------------------------------------------------------------- /models/optional_modules/consent/scratch/bigquery/snowplow_web_consent_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_consent", false) and target.type == 'bigquery' | as_bool(), 12 | ) 13 | }} 14 | 15 | with prep as ( 16 | 17 | select 18 | e.event_id, 19 | e.domain_userid, 20 | e.original_domain_userid, 21 | e.user_id, 22 | e.geo_country, 23 | e.page_view_id, 24 | e.domain_sessionid, 25 | e.original_domain_sessionid, 26 | e.derived_tstamp, 27 | e.load_tstamp, 28 | e.event_name, 29 | {{ snowplow_utils.get_optional_fields( 30 | enabled= true, 31 | fields=consent_fields(), 32 | col_prefix='unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1', 33 | relation=ref('snowplow_web_base_events_this_run'), 34 | relation_alias='e') }}, 35 | {{ snowplow_utils.get_optional_fields( 36 | enabled= true, 37 | fields=[{'field': 'elapsed_time', 'dtype': 'string'}], 38 | col_prefix='unstruct_event_com_snowplowanalytics_snowplow_cmp_visible_1', 39 | relation=ref('snowplow_web_base_events_this_run'), 40 | relation_alias='e') }} 41 | 42 | from {{ ref("snowplow_web_base_events_this_run") }} as e 43 | 44 | where e.event_name in ('cmp_visible', 'consent_preferences') 45 | 46 | and {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 47 | 48 | {% if var("snowplow__ua_bot_filter", false) %} 49 | {{ filter_bots() }} 50 | {% endif %} 51 | ) 52 | 53 | select 54 | p.event_id, 55 | p.domain_userid, 56 | p.original_domain_userid, 57 | p.user_id, 58 | p.geo_country, 59 | p.page_view_id, 60 | p.domain_sessionid, 61 | p.original_domain_sessionid, 62 | p.derived_tstamp, 63 | p.load_tstamp, 64 | p.event_name, 65 | p.event_type, 66 | p.basis_for_processing, 67 | p.consent_url, 68 | p.consent_version, 69 | {{ snowplow_utils.get_array_to_string('consent_scopes', 'p', ', ') }} as consent_scopes, 70 | {{ snowplow_utils.get_array_to_string('domains_applied', 'p', ', ') }} as domains_applied, 71 | coalesce(safe_cast(p.gdpr_applies as boolean), false) gdpr_applies, 72 | cast(p.elapsed_time as {{ dbt.type_float() }}) as cmp_load_time 73 | 74 | from prep p 75 | -------------------------------------------------------------------------------- /models/optional_modules/consent/scratch/databricks/snowplow_web_consent_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_consent", false) and target.type in ['databricks', 'spark'] | as_bool(), 12 | ) 13 | }} 14 | 15 | with prep as ( 16 | 17 | select 18 | e.event_id, 19 | e.domain_userid, 20 | e.original_domain_userid, 21 | e.user_id, 22 | e.geo_country, 23 | e.page_view_id, 24 | e.domain_sessionid, 25 | e.original_domain_sessionid, 26 | e.derived_tstamp, 27 | e.load_tstamp, 28 | e.event_name, 29 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.event_type::STRING as event_type, 30 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.basis_for_processing::STRING as basis_for_processing, 31 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.consent_url::STRING as consent_url, 32 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.consent_version::STRING as consent_version, 33 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.consent_scopes::ARRAY as consent_scopes, 34 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.domains_applied::ARRAY as domains_applied, 35 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1.gdpr_applies::boolean as gdpr_applies, 36 | e.unstruct_event_com_snowplowanalytics_snowplow_cmp_visible_1.elapsed_time::float as cmp_load_time 37 | 38 | from {{ ref("snowplow_web_base_events_this_run") }} as e 39 | 40 | where event_name in ('cmp_visible', 'consent_preferences') 41 | 42 | and {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 43 | 44 | {% if var("snowplow__ua_bot_filter", false) %} 45 | {{ filter_bots() }} 46 | {% endif %} 47 | ) 48 | 49 | select 50 | p.event_id, 51 | p.domain_userid, 52 | p.original_domain_userid, 53 | p.user_id, 54 | p.geo_country, 55 | p.page_view_id, 56 | p.domain_sessionid, 57 | p.original_domain_sessionid, 58 | p.derived_tstamp, 59 | p.load_tstamp, 60 | p.event_name, 61 | p.event_type, 62 | p.basis_for_processing, 63 | p.consent_url, 64 | p.consent_version, 65 | {{ snowplow_utils.get_array_to_string('consent_scopes', 'p', ', ') }} as consent_scopes, 66 | {{ snowplow_utils.get_array_to_string('domains_applied', 'p', ', ') }} as domains_applied, 67 | coalesce(p.gdpr_applies, false) as gdpr_applies, 68 | p.cmp_load_time 69 | 70 | from prep p 71 | -------------------------------------------------------------------------------- /models/optional_modules/consent/scratch/default/snowplow_web_consent_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_consent", false) and target.type in ['redshift', 'postgres'] | as_bool(), 12 | ) 13 | }} 14 | 15 | {%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(ref('snowplow_web_base_sessions_this_run'), 16 | 'start_tstamp', 17 | 'end_tstamp') %} 18 | 19 | select 20 | e.event_id, 21 | e.domain_userid, 22 | e.original_domain_userid, 23 | e.user_id, 24 | e.geo_country, 25 | e.page_view_id, 26 | e.domain_sessionid, 27 | e.original_domain_sessionid, 28 | e.derived_tstamp, 29 | e.load_tstamp, 30 | e.event_name, 31 | e.consent_pref_event_type as event_type, 32 | e.consent_pref_basis_for_processing as basis_for_processing, 33 | e.consent_pref_consent_url as consent_url, 34 | e.consent_pref_consent_version as consent_version, 35 | replace(translate(e.consent_pref_consent_scopes, '"[]', ''), ',', ', ') as consent_scopes, 36 | replace(translate(e.consent_pref_domains_applied, '"[]', ''), ',', ', ') as domains_applied, 37 | coalesce(e.consent_pref_gdpr_applies, false) as gdpr_applies, 38 | e.cmp_visible_elapsed_time as cmp_load_time 39 | 40 | from {{ ref("snowplow_web_base_events_this_run") }} as e 41 | 42 | where event_name in ('cmp_visible', 'consent_preferences') 43 | 44 | and {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} 45 | 46 | --returns false if run doesn't contain new events. 47 | 48 | {% if var("snowplow__ua_bot_filter", false) %} 49 | {{ filter_bots() }} 50 | {% endif %} 51 | -------------------------------------------------------------------------------- /models/optional_modules/consent/scratch/snowflake/snowplow_web_consent_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_consent", false) and target.type == 'snowflake' | as_bool(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with prep as ( 17 | 18 | select 19 | e.event_id, 20 | e.domain_userid, 21 | e.original_domain_userid, 22 | e.user_id, 23 | e.geo_country, 24 | e.page_view_id, 25 | e.domain_sessionid, 26 | e.original_domain_sessionid, 27 | e.derived_tstamp, 28 | e.load_tstamp, 29 | e.event_name, 30 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:eventType::varchar as event_type, 31 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:basisForProcessing::varchar as basis_for_processing, 32 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:consentUrl::varchar as consent_url, 33 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:consentVersion::varchar as consent_version, 34 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:consentScopes::array as consent_scopes, 35 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:domainsApplied::array as domains_applied, 36 | e.unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1:gdprApplies::boolean as gdpr_applies, 37 | e.unstruct_event_com_snowplowanalytics_snowplow_cmp_visible_1:elapsedTime::float as cmp_load_time 38 | 39 | from {{ ref("snowplow_web_base_events_this_run") }} as e 40 | 41 | where event_name in ('cmp_visible', 'consent_preferences') 42 | 43 | and {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 44 | 45 | {% if var("snowplow__ua_bot_filter", false) %} 46 | {{ filter_bots() }} 47 | {% endif %} 48 | 49 | ) 50 | 51 | select 52 | p.event_id, 53 | p.domain_userid, 54 | p.original_domain_userid, 55 | p.user_id, 56 | p.geo_country, 57 | p.page_view_id, 58 | p.domain_sessionid, 59 | p.original_domain_sessionid, 60 | p.derived_tstamp, 61 | p.load_tstamp, 62 | p.event_name, 63 | p.event_type, 64 | p.basis_for_processing, 65 | p.consent_url, 66 | p.consent_version, 67 | {{ snowplow_utils.get_array_to_string('consent_scopes', 'p', ', ') }} as consent_scopes, 68 | {{ snowplow_utils.get_array_to_string('domains_applied', 'p', ', ') }} as domains_applied, 69 | coalesce(p.gdpr_applies, false) as gdpr_applies, 70 | p.cmp_load_time 71 | 72 | from prep p 73 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_cmp_stats.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='table', 11 | enabled=var("snowplow__enable_consent", false), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | {%- if target.type in ('postgres') -%} 17 | 18 | with events as ( 19 | 20 | select 21 | event_id, 22 | domain_userid, 23 | original_domain_userid, 24 | page_view_id, 25 | domain_sessionid, 26 | original_domain_sessionid, 27 | derived_tstamp, 28 | event_name, 29 | event_type, 30 | cmp_load_time, 31 | -- postgres does not allow the IGNORE NULL clause within last_value(), below workaround should do the same: removing NULLS using array_remove then using the COUNT window function (which counts the number of non-null items and count is bounded up to the current row) to access the array using that as its index position 32 | (array_remove(array_agg(case when event_name = 'cmp_visible' then event_id else null end) over (partition by domain_userid order by derived_tstamp), null))[count(case when event_name = 'cmp_visible' then event_id else null end) over (partition by domain_userid order by derived_tstamp rows between unbounded preceding and current row)] as cmp_id 33 | 34 | from {{ ref('snowplow_web_consent_log') }} 35 | 36 | where event_type <> 'pending' or event_type is null 37 | 38 | ) 39 | 40 | {%- elif target.type in ('databricks', 'spark') -%} 41 | 42 | with events as ( 43 | 44 | select 45 | event_id, 46 | domain_userid, 47 | original_domain_userid, 48 | page_view_id, 49 | domain_sessionid, 50 | original_domain_sessionid, 51 | derived_tstamp, 52 | event_name, 53 | event_type, 54 | cmp_load_time, 55 | last_value(case when event_name = 'cmp_visible' then event_id else null end, TRUE) 56 | over (partition by domain_userid order by derived_tstamp 57 | rows between unbounded preceding and current row) as cmp_id 58 | 59 | from {{ ref('snowplow_web_consent_log') }} 60 | 61 | where event_type <> 'pending' or event_type is null 62 | 63 | ) 64 | 65 | {%- else -%} 66 | 67 | with events as ( 68 | 69 | select 70 | event_id, 71 | domain_userid, 72 | original_domain_userid, 73 | page_view_id, 74 | domain_sessionid, 75 | original_domain_sessionid, 76 | derived_tstamp, 77 | event_name, 78 | event_type, 79 | cmp_load_time, 80 | last_value(case when event_name = 'cmp_visible' then event_id else null end ignore nulls) 81 | over (partition by domain_userid order by derived_tstamp 82 | rows between unbounded preceding and current row) as cmp_id 83 | 84 | from {{ ref('snowplow_web_consent_log') }} 85 | 86 | where event_type <> 'pending' or event_type is null 87 | 88 | ) 89 | 90 | {%- endif -%} 91 | 92 | , event_orders as ( 93 | 94 | select 95 | event_id, 96 | event_type, 97 | cmp_id, 98 | derived_tstamp, 99 | row_number() over(partition by cmp_id order by derived_tstamp) as row_num 100 | 101 | from events 102 | 103 | ) 104 | 105 | , first_consent_events as ( 106 | 107 | select 108 | event_id, 109 | cmp_id, 110 | event_type, 111 | derived_tstamp as first_consent_event_tstamp 112 | 113 | from event_orders 114 | 115 | where row_num = 2 116 | 117 | ) 118 | 119 | , cmp_events as ( 120 | 121 | select distinct 122 | event_id, 123 | domain_userid, 124 | original_domain_userid, 125 | page_view_id, 126 | domain_sessionid, 127 | original_domain_sessionid, 128 | cmp_load_time, 129 | derived_tstamp as cmp_tstamp 130 | 131 | from events 132 | 133 | where event_name = 'cmp_visible' 134 | 135 | ) 136 | 137 | select 138 | e.event_id, 139 | e.domain_userid, 140 | e.original_domain_userid, 141 | e.page_view_id, 142 | e.domain_sessionid, 143 | e.original_domain_sessionid, 144 | e.cmp_load_time, 145 | e.cmp_tstamp, 146 | f.first_consent_event_tstamp, 147 | f.event_type as first_consent_event_type, 148 | {{ datediff('e.cmp_tstamp', 'f.first_consent_event_tstamp', 'second') }} as cmp_interaction_time 149 | 150 | from cmp_events e 151 | 152 | left join first_consent_events f 153 | on e.event_id = f.cmp_id 154 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_log.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized= 'incremental', 11 | enabled=var("snowplow__enable_consent", false), 12 | unique_key='event_id', 13 | upsert_date_key='derived_tstamp', 14 | sort='derived_tstamp', 15 | dist='event_id', 16 | tags=["derived"], 17 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 18 | "field": "derived_tstamp", 19 | "data_type": "timestamp" 20 | }, databricks_val = 'derived_tstamp_date'), 21 | cluster_by=snowplow_web.web_cluster_by_fields_consent(), 22 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 23 | tblproperties={ 24 | 'delta.autoOptimize.optimizeWrite' : 'true', 25 | 'delta.autoOptimize.autoCompact' : 'true' 26 | }, 27 | snowplow_optimize= true 28 | ) 29 | }} 30 | 31 | select 32 | * 33 | {% if target.type in ['databricks', 'spark'] -%} 34 | , DATE(derived_tstamp) as derived_tstamp_date 35 | {%- endif %} 36 | 37 | from {{ ref('snowplow_web_consent_events_this_run') }} 38 | 39 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 40 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_scope_status.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='table', 11 | enabled=var("snowplow__enable_consent", false) 12 | ) 13 | }} 14 | 15 | with arrays as ( 16 | 17 | select 18 | u.domain_userid, 19 | {{ snowplow_utils.get_split_to_array('last_consent_scopes', 'u', ', ') }} as scope_array 20 | 21 | from {{ ref('snowplow_web_consent_users') }} u 22 | 23 | where is_latest_version 24 | 25 | ) 26 | 27 | , unnesting as ( 28 | 29 | {{ snowplow_utils.unnest('domain_userid', 'scope_array', 'consent_scope', 'arrays') }} 30 | 31 | ) 32 | 33 | select 34 | replace(replace(replace(cast(consent_scope as {{ snowplow_utils.type_max_string() }}), '"', ''), '[', ''), ']', '') as scope, 35 | count(*) as total_consent 36 | 37 | from unnesting 38 | 39 | group by 1 40 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_totals.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='table', 11 | enabled=var("snowplow__enable_consent", false), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with totals as ( 17 | 18 | select 19 | last_consent_version, 20 | count(distinct domain_userid) as total_visitors, 21 | count(case when last_consent_event_type ='allow_all' then 1 end) as allow_all, 22 | count(case when last_consent_event_type ='allow_selected' then 1 end) as allow_selected, 23 | count(case when last_consent_event_type IN ('allow_all', 'allow_selected') then 1 end) as allow, 24 | count(case when last_consent_event_type = 'pending' then 1 end) as pending, 25 | count(case when last_consent_event_type = 'deny_all' then 1 end) as denied, 26 | count(case when last_consent_event_type = 'expired' then 1 end) as expired, 27 | count(case when last_consent_event_type = 'withdrawn' then 1 end) as withdrawn, 28 | count(case when last_consent_event_type = 'implicit_consent' then 1 end) as implicit_consent, 29 | count(case when {{ dateadd('year', '1', 'last_consent_event_tstamp') }} <= {{ dateadd('month', '6', 'current_date') }} 30 | and last_consent_event_type <> 'expired' 31 | and {{ dateadd('year', '1', 'last_consent_event_tstamp') }} > current_date then 1 end) as expires_in_six_months 32 | 33 | from {{ ref('snowplow_web_consent_users') }} 34 | 35 | where last_consent_event_type is not null 36 | 37 | group by 1 38 | 39 | ) 40 | 41 | select 42 | v.*, 43 | t.total_visitors, 44 | t.allow_all, 45 | t.allow_selected, 46 | t.allow, 47 | t.pending, 48 | t.denied, 49 | t.expired, 50 | t.withdrawn, 51 | t.implicit_consent, 52 | t.expires_in_six_months 53 | 54 | from {{ ref('snowplow_web_consent_versions') }} v 55 | 56 | left join totals t 57 | on t.last_consent_version = v.consent_version 58 | 59 | order by v.version_start_tstamp desc 60 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_users.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | enabled=var("snowplow__enable_consent", false), 12 | unique_key='domain_userid', 13 | sort = 'last_consent_event_tstamp', 14 | dist = 'domain_userid', 15 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 16 | ) 17 | }} 18 | 19 | 20 | {% if is_incremental() %} 21 | {%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(this, 22 | 'last_processed_event', 23 | 'last_processed_event') %} 24 | {% endif %} 25 | 26 | with base as ( 27 | 28 | select 29 | domain_userid, 30 | user_id, 31 | geo_country, 32 | max(load_tstamp) as last_processed_event, 33 | count(case when event_name = 'cmp_visible' then 1 end) as cmp_events, 34 | count(case when event_name = 'consent_preferences' then 1 end) as consent_events, 35 | max(case when event_name = 'cmp_visible' then derived_tstamp end) as last_cmp_event_tstamp, 36 | row_number() over(partition by domain_userid order by max(load_tstamp) desc) as latest_event_by_user_rank 37 | 38 | from {{ ref('snowplow_web_consent_log') }} 39 | 40 | {% if is_incremental() %} -- and it has not been processed yet 41 | where load_tstamp > {{ upper_limit }} 42 | {% endif %} 43 | 44 | group by 1,2,3 45 | 46 | ) 47 | 48 | , latest_consents as ( 49 | 50 | select 51 | domain_userid, 52 | derived_tstamp as last_consent_event_tstamp, 53 | event_type as last_consent_event_type, 54 | consent_scopes as last_consent_scopes, 55 | consent_version as last_consent_version, 56 | consent_url as last_consent_url, 57 | domains_applied as last_domains_applied, 58 | row_number() over(partition by domain_userid order by load_tstamp desc) as latest_consent_event_by_user_rank 59 | 60 | from {{ ref('snowplow_web_consent_log') }} 61 | 62 | where event_name = 'consent_preferences' 63 | 64 | {% if is_incremental() %} -- and it has not been processed yet 65 | and load_tstamp > {{ upper_limit }} 66 | {% endif %} 67 | 68 | ) 69 | 70 | {% if is_incremental() %} 71 | 72 | select 73 | b.domain_userid, 74 | b.user_id, 75 | b.geo_country, 76 | coalesce(b.cmp_events, 0) + coalesce(t.cmp_events, 0) as cmp_events, 77 | coalesce(b.consent_events, 0) + coalesce(t.consent_events, 0) as consent_events, 78 | b.last_cmp_event_tstamp, 79 | l.last_consent_event_tstamp, 80 | l.last_consent_event_type, 81 | l.last_consent_scopes, 82 | l.last_consent_version, 83 | l.last_consent_url, 84 | l.last_domains_applied, 85 | b.last_processed_event, 86 | case when v.is_latest_version then True else False end as is_latest_version 87 | 88 | from base b 89 | 90 | left join latest_consents l 91 | on b.domain_userid = l.domain_userid 92 | 93 | left join {{ ref('snowplow_web_consent_versions')}} v 94 | on v.consent_version = l.last_consent_version 95 | 96 | left join {{ this }} t 97 | on t.domain_userid = b.domain_userid 98 | 99 | where (l.latest_consent_event_by_user_rank = 1 or l.domain_userid is null) 100 | and b.latest_event_by_user_rank = 1 101 | 102 | {% else %} 103 | 104 | select 105 | b.domain_userid, 106 | b.user_id, 107 | b.geo_country, 108 | b.cmp_events, 109 | b.consent_events, 110 | b.last_cmp_event_tstamp, 111 | l.last_consent_event_tstamp, 112 | l.last_consent_event_type, 113 | l.last_consent_scopes, 114 | l.last_consent_version, 115 | l.last_consent_url, 116 | l.last_domains_applied, 117 | b.last_processed_event, 118 | case when v.is_latest_version then True else False end as is_latest_version 119 | 120 | from base b 121 | 122 | left join latest_consents l 123 | on b.domain_userid = l.domain_userid 124 | 125 | left join {{ ref('snowplow_web_consent_versions') }} v 126 | on v.consent_version = l.last_consent_version 127 | 128 | where (l.latest_consent_event_by_user_rank = 1 or l.domain_userid is null) 129 | and b.latest_event_by_user_rank = 1 130 | 131 | {% endif %} 132 | -------------------------------------------------------------------------------- /models/optional_modules/consent/snowplow_web_consent_versions.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | enabled=var("snowplow__enable_consent", false), 12 | unique_key='consent_version', 13 | sort = 'version_start_tstamp', 14 | dist = 'consent_version', 15 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 16 | ) 17 | }} 18 | 19 | 20 | {% if is_incremental() %} 21 | {%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(this, 22 | 'last_allow_all_event', 23 | 'last_allow_all_event') %} 24 | {% endif %} 25 | 26 | with consent_versions as ( 27 | 28 | select 29 | consent_version, 30 | consent_scopes, 31 | consent_url, 32 | domains_applied, 33 | min(derived_tstamp) as version_start_tstamp, 34 | max(load_tstamp) as last_allow_all_event 35 | 36 | from {{ ref('snowplow_web_consent_log') }} 37 | 38 | where event_name <> 'cmp_visible' and event_type = 'allow_all' 39 | 40 | {% if is_incremental() %} -- and it has not been processed yet 41 | and load_tstamp > {{ upper_limit }} 42 | {% endif %} 43 | 44 | group by 1,2,3,4 45 | ) 46 | 47 | , latest_version as ( 48 | 49 | select 50 | consent_version, 51 | version_start_tstamp 52 | 53 | from consent_versions 54 | 55 | order by 2 desc limit 1 56 | ) 57 | 58 | {% if is_incremental() %} 59 | 60 | select 61 | v.consent_version, 62 | least(v.version_start_tstamp, t.version_start_tstamp) as version_start_tstamp, 63 | v.consent_scopes, 64 | v.consent_url, 65 | v.domains_applied, 66 | case when l.consent_version is not null then True else False end is_latest_version, 67 | v.last_allow_all_event 68 | 69 | from consent_versions v 70 | 71 | left join latest_version l 72 | 73 | on v.consent_version = l.consent_version 74 | 75 | left join {{ this }} t 76 | on t.consent_version = v.consent_version 77 | 78 | {% else %} 79 | 80 | select 81 | v.consent_version, 82 | v.version_start_tstamp, 83 | v.consent_scopes, 84 | v.consent_url, 85 | v.domains_applied, 86 | case when l.consent_version is not null then True else False end is_latest_version, 87 | v.last_allow_all_event 88 | 89 | from consent_versions v 90 | 91 | left join latest_version l 92 | 93 | on v.consent_version = l.consent_version 94 | 95 | {% endif %} 96 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/databricks/snowplow_web_vital_measurements.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='table', 11 | enabled=var("snowplow__enable_cwv", false) and target.type in ('databricks', 'spark') | as_bool() 12 | ) 13 | }} 14 | 15 | with measurements as ( 16 | 17 | select 18 | page_url, 19 | device_class, 20 | geo_country, 21 | cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }}) as time_period, 22 | count(*) as page_view_count, 23 | grouping_id() as grouping_ids, 24 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by lcp) as lcp_{{ var('snowplow__cwv_percentile') }}p, 25 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by fid) as fid_{{ var('snowplow__cwv_percentile') }}p, 26 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by cls) as cls_{{ var('snowplow__cwv_percentile') }}p, 27 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by ttfb) as ttfb_{{ var('snowplow__cwv_percentile') }}p, 28 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by inp) as inp_{{ var('snowplow__cwv_percentile') }}p 29 | from {{ ref('snowplow_web_vitals') }} 30 | 31 | where cast(derived_tstamp as date) >= {{ dateadd('day', '-'+var('snowplow__cwv_days_to_measure')|string, date_trunc('day', snowplow_utils.current_timestamp_in_utc())) }} 32 | 33 | group by cube(page_url, device_class,cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }}), geo_country) 34 | 35 | ) 36 | 37 | , measurement_type as ( 38 | 39 | select 40 | *, 41 | case when grouping_ids = 15 then 'overall' 42 | when grouping_ids = 3 then 'by_url_and_device' 43 | when grouping_ids = 9 then 'by_day_and_device' 44 | when grouping_ids = 10 then 'by_country_and_device' 45 | when grouping_ids = 14 then 'by_country' 46 | when grouping_ids = 11 then 'by_device' 47 | when grouping_ids = 13 then 'by_day' 48 | end as measurement_type, 49 | {{ snowplow_web.core_web_vital_results_query('_' + var('snowplow__cwv_percentile') | string + 'p') }} 50 | 51 | from measurements 52 | ) 53 | 54 | , coalesce as ( 55 | 56 | select 57 | m.measurement_type, 58 | coalesce(m.page_url, 'all') as page_url, 59 | coalesce(m.device_class, 'all') as device_class, 60 | coalesce(m.geo_country, 'all') as geo_country, 61 | coalesce(g.name, 'all') as country, 62 | coalesce(m.time_period, 'last {{var("snowplow__cwv_days_to_measure")|string }} days') as time_period, 63 | m.page_view_count, 64 | ceil(m.lcp_{{ var('snowplow__cwv_percentile') }}p, 3) as lcp_{{ var('snowplow__cwv_percentile') }}p, 65 | ceil(m.fid_{{ var('snowplow__cwv_percentile') }}p, 3) as fid_{{ var('snowplow__cwv_percentile') }}p, 66 | ceil(m.cls_{{ var('snowplow__cwv_percentile') }}p, 3) as cls_{{ var('snowplow__cwv_percentile') }}p, 67 | ceil(m.ttfb_{{ var('snowplow__cwv_percentile') }}p, 3) as ttfb_{{ var('snowplow__cwv_percentile') }}p, 68 | ceil(m.inp_{{ var('snowplow__cwv_percentile') }}p, 3) as inp_{{ var('snowplow__cwv_percentile') }}p, 69 | m.lcp_result, 70 | m.fid_result, 71 | m.cls_result, 72 | m.ttfb_result, 73 | m.inp_result, 74 | {{ snowplow_web.core_web_vital_pass_query() }} as passed 75 | 76 | from measurement_type m 77 | 78 | left join {{ ref(var('snowplow__geo_mapping_seed')) }} g on lower(m.geo_country) = lower(g.alpha_2) 79 | 80 | where measurement_type is not null 81 | 82 | order by 1 83 | 84 | ) 85 | 86 | select 87 | {{ dbt.concat(['page_url', "'-'" , 'device_class', "'-'" , 'geo_country', "'-'" , 'time_period' ]) }} compound_key, 88 | * 89 | from coalesce 90 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/scratch/bigquery/snowplow_web_vital_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_cwv", false) and target.type == 'bigquery' | as_bool(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with prep as ( 17 | 18 | select 19 | e.event_id, 20 | e.event_name, 21 | e.app_id, 22 | e.platform, 23 | e.domain_userid, 24 | e.original_domain_userid, 25 | e.user_id, 26 | e.page_view_id, 27 | e.domain_sessionid, 28 | e.original_domain_sessionid, 29 | e.collector_tstamp, 30 | e.derived_tstamp, 31 | e.dvce_created_tstamp, 32 | e.load_tstamp, 33 | e.geo_country, 34 | e.page_url, 35 | e.page_title, 36 | e.useragent, 37 | 38 | {{ snowplow_utils.get_optional_fields( 39 | enabled=true, 40 | fields=yauaa_fields(), 41 | col_prefix='contexts_nl_basjes_yauaa_context_1', 42 | relation=ref('snowplow_web_base_events_this_run'), 43 | relation_alias='e') }}, 44 | 45 | {{ snowplow_utils.get_optional_fields( 46 | enabled= true, 47 | fields=[{'field': 'lcp', 'dtype': 'string'}, {'field': 'fcp', 'dtype': 'string'}, {'field': 'fid', 'dtype': 'string'}, {'field': 'cls', 'dtype': 'string'}, {'field': 'inp', 'dtype': 'string'}, {'field': 'ttfb', 'dtype': 'string'}, {'field': 'navigation_type', 'dtype': 'string'}], 48 | col_prefix='unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1', 49 | relation=ref('snowplow_web_base_events_this_run'), 50 | relation_alias='e') }} 51 | 52 | from {{ ref("snowplow_web_base_events_this_run") }} as e 53 | 54 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 55 | 56 | and event_name = 'web_vitals' 57 | 58 | and page_view_id is not null 59 | 60 | -- exclude bot traffic 61 | 62 | {% if var('snowplow__enable_iab', false) %} 63 | and not {{ snowplow_utils.get_field(column_name = 'contexts_com_iab_snowplow_spiders_and_robots_1_0_0', 64 | field_name = 'spider_or_robot', 65 | table_alias = 'e', 66 | type = 'boolean', 67 | array_index = 0)}} = True 68 | {% endif %} 69 | 70 | {{ filter_bots() }} 71 | 72 | ) 73 | 74 | select 75 | event_id, 76 | event_name, 77 | app_id, 78 | platform, 79 | domain_userid, 80 | original_domain_userid, 81 | user_id, 82 | page_view_id, 83 | domain_sessionid, 84 | original_domain_sessionid, 85 | collector_tstamp, 86 | derived_tstamp, 87 | dvce_created_tstamp, 88 | load_tstamp, 89 | geo_country, 90 | page_url, 91 | page_title, 92 | useragent, 93 | lower(device_class) as device_class, 94 | agent_class, 95 | agent_name, 96 | agent_name_version, 97 | agent_name_version_major, 98 | agent_version, 99 | agent_version_major, 100 | device_brand, 101 | device_name, 102 | device_version, 103 | layout_engine_class, 104 | layout_engine_name, 105 | layout_engine_name_version, 106 | layout_engine_name_version_major, 107 | layout_engine_version, 108 | layout_engine_version_major, 109 | operating_system_class, 110 | operating_system_name, 111 | operating_system_name_version, 112 | operating_system_version, 113 | ceil(cast(lcp as decimal)) /1000 as lcp, 114 | ceil(cast(fcp as decimal)) /1000 as fcp, 115 | ceil(safe_cast(fid as decimal) * 1000) /1000 as fid, 116 | ceil(cast(cls as decimal) * 1000) /1000 as cls, 117 | ceil(cast(inp as decimal) * 1000) /1000 as inp, 118 | ceil(cast(ttfb as decimal) * 1000) /1000 as ttfb, 119 | navigation_type 120 | 121 | from prep p 122 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/scratch/databricks/snowplow_web_vital_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_cwv", false) and target.type in ('databricks', 'spark') | as_bool(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with prep as ( 17 | 18 | select 19 | e.event_id, 20 | e.event_name, 21 | e.app_id, 22 | e.platform, 23 | e.domain_userid, 24 | e.original_domain_userid, 25 | e.user_id, 26 | e.page_view_id, 27 | e.domain_sessionid, 28 | e.original_domain_sessionid, 29 | e.collector_tstamp, 30 | e.derived_tstamp, 31 | e.dvce_created_tstamp, 32 | e.load_tstamp, 33 | e.geo_country, 34 | e.page_url, 35 | e.page_title, 36 | e.useragent, 37 | 38 | {{snowplow_web.get_yauaa_context_fields()}}, 39 | 40 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.lcp::decimal(14,4)) /1000 as lcp, 41 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.fcp::decimal(14,4), 3) as fcp, 42 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.fid::decimal(14,4), 3) as fid, 43 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.cls::decimal(14,4), 3) as cls, 44 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.inp::decimal(14,4), 3) as inp, 45 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.ttfb::decimal(14,4), 3) as ttfb, 46 | e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1.navigation_type::varchar(128) as navigation_type 47 | 48 | from {{ ref("snowplow_web_base_events_this_run") }} as e 49 | 50 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 51 | 52 | and event_name = 'web_vitals' 53 | 54 | and page_view_id is not null 55 | 56 | -- exclude bot traffic 57 | 58 | {% if var('snowplow__enable_iab', false) %} 59 | and not {{ snowplow_utils.get_field(column_name = 'contexts_com_iab_snowplow_spiders_and_robots_1', 60 | field_name = 'spider_or_robot', 61 | table_alias = 'e', 62 | type = 'boolean', 63 | array_index = 0)}} = True 64 | {% endif %} 65 | 66 | {{ filter_bots() }} 67 | 68 | ) 69 | 70 | select 71 | event_id, 72 | event_name, 73 | app_id, 74 | platform, 75 | domain_userid, 76 | original_domain_userid, 77 | user_id, 78 | page_view_id, 79 | domain_sessionid, 80 | original_domain_sessionid, 81 | collector_tstamp, 82 | derived_tstamp, 83 | dvce_created_tstamp, 84 | load_tstamp, 85 | geo_country, 86 | page_url, 87 | page_title, 88 | useragent, 89 | lower(device_class) as device_class, 90 | agent_class, 91 | agent_name, 92 | agent_name_version, 93 | agent_name_version_major, 94 | agent_version, 95 | agent_version_major, 96 | device_brand, 97 | device_name, 98 | device_version, 99 | layout_engine_class, 100 | layout_engine_name, 101 | layout_engine_name_version, 102 | layout_engine_name_version_major, 103 | layout_engine_version, 104 | layout_engine_version_major, 105 | operating_system_class, 106 | operating_system_name, 107 | operating_system_name_version, 108 | operating_system_version, 109 | lcp, 110 | fcp, 111 | fid, 112 | cls, 113 | inp, 114 | ttfb, 115 | navigation_type 116 | 117 | from prep p 118 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/scratch/default/snowplow_web_vital_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_cwv", false) and target.type in ('redshift', 'postgres') | as_bool() 12 | ) 13 | }} 14 | 15 | with prep as ( 16 | 17 | select 18 | e.event_id, 19 | e.event_name, 20 | e.app_id, 21 | e.platform, 22 | e.domain_userid, 23 | e.original_domain_userid, 24 | e.user_id, 25 | e.page_view_id, 26 | e.domain_sessionid, 27 | e.original_domain_sessionid, 28 | e.collector_tstamp, 29 | e.derived_tstamp, 30 | e.dvce_created_tstamp, 31 | e.load_tstamp, 32 | e.geo_country, 33 | e.page_url, 34 | e.page_title, 35 | e.useragent, 36 | 37 | {{snowplow_web.get_yauaa_context_fields()}}, 38 | 39 | ceil(cast(cwv_lcp/1000 as decimal(14,4))*1000) /1000 as lcp, 40 | ceil(cast(cwv_fcp as decimal(14,4))*1000) /1000 as fcp, 41 | ceil(cast(cwv_fid as decimal(14,4))*1000) /1000 as fid, 42 | ceil(cast(cwv_cls as decimal(14,4))*1000) /1000 as cls, 43 | ceil(cast(cwv_inp as decimal(14,4))*1000) /1000 as inp, 44 | ceil(cast(cwv_ttfb as decimal(14,4))*1000) /1000 as ttfb, 45 | cast(cwv_navigation_type as {{ dbt.type_string() }}) as navigation_type 46 | 47 | from {{ ref("snowplow_web_base_events_this_run") }} as e 48 | 49 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 50 | 51 | and event_name = 'web_vitals' 52 | 53 | and page_view_id is not null 54 | 55 | -- exclude bot traffic 56 | 57 | {% if var('snowplow__enable_iab', false) %} 58 | and not e.iab_spider_or_robot = True 59 | {% endif %} 60 | 61 | {{ filter_bots() }} 62 | 63 | ) 64 | 65 | select 66 | event_id, 67 | event_name, 68 | app_id, 69 | platform, 70 | domain_userid, 71 | original_domain_userid, 72 | user_id, 73 | page_view_id, 74 | domain_sessionid, 75 | original_domain_sessionid, 76 | collector_tstamp, 77 | derived_tstamp, 78 | dvce_created_tstamp, 79 | load_tstamp, 80 | geo_country, 81 | page_url, 82 | page_title, 83 | useragent, 84 | lower(yauaa_device_class) as device_class, 85 | yauaa_agent_class as agent_class, 86 | yauaa_agent_name as agent_name, 87 | yauaa_agent_name_version as agent_name_version, 88 | yauaa_agent_name_version_major as agent_name_version_major, 89 | yauaa_agent_version as agent_version, 90 | yauaa_agent_version_major as agent_version_major, 91 | yauaa_device_brand as device_brand, 92 | yauaa_device_name as device_name, 93 | yauaa_device_version as device_version, 94 | yauaa_layout_engine_class as layout_engine_class, 95 | yauaa_layout_engine_name as layout_engine_name, 96 | yauaa_layout_engine_name_version as layout_engine_name_version, 97 | yauaa_layout_engine_name_version_major as layout_engine_name_version_major, 98 | yauaa_layout_engine_version as layout_engine_version, 99 | yauaa_layout_engine_version_major as layout_engine_version_major, 100 | yauaa_operating_system_class as operating_system_class, 101 | yauaa_operating_system_name as operating_system_name, 102 | yauaa_operating_system_name_version as operating_system_name_version, 103 | yauaa_operating_system_version as operating_system_version, 104 | lcp, 105 | fcp, 106 | fid, 107 | cls, 108 | inp, 109 | ttfb, 110 | navigation_type 111 | 112 | from prep p 113 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/scratch/snowflake/snowplow_web_vital_events_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | enabled=var("snowplow__enable_cwv", false) and target.type == 'snowflake' | as_bool(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with prep as ( 17 | 18 | select 19 | e.event_id, 20 | e.event_name, 21 | e.app_id, 22 | e.platform, 23 | e.domain_userid, 24 | e.original_domain_userid, 25 | e.user_id, 26 | e.page_view_id, 27 | e.domain_sessionid, 28 | e.original_domain_sessionid, 29 | e.collector_tstamp, 30 | e.derived_tstamp, 31 | e.dvce_created_tstamp, 32 | e.load_tstamp, 33 | e.geo_country, 34 | e.page_url, 35 | e.page_title, 36 | e.useragent, 37 | 38 | {{snowplow_web.get_yauaa_context_fields()}}, 39 | 40 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:lcp::decimal(14,4), 3) /1000 as lcp, 41 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:fcp::decimal(14,4), 3) as fcp, 42 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:fid::decimal(14,4), 3) as fid, 43 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:cls::decimal(14,4), 3) as cls, 44 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:inp::decimal(14,4), 3) as inp, 45 | ceil(e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:ttfb::decimal(14,4), 3) as ttfb, 46 | e.unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1:navigationType::varchar as navigation_type 47 | 48 | from {{ ref("snowplow_web_base_events_this_run") }} as e 49 | 50 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 51 | 52 | and event_name = 'web_vitals' 53 | 54 | and page_view_id is not null 55 | 56 | -- exclude bot traffic 57 | 58 | {% if var('snowplow__enable_iab', false) %} 59 | and not {{ snowplow_utils.get_field(column_name = 'contexts_com_iab_snowplow_spiders_and_robots_1', 60 | field_name = 'spiderOrRobot', 61 | table_alias = 'e', 62 | type = 'boolean', 63 | array_index = 0)}} = True 64 | {% endif %} 65 | 66 | {{ filter_bots() }} 67 | 68 | ) 69 | 70 | select 71 | event_id, 72 | event_name, 73 | app_id, 74 | platform, 75 | domain_userid, 76 | original_domain_userid, 77 | user_id, 78 | page_view_id, 79 | domain_sessionid, 80 | original_domain_sessionid, 81 | collector_tstamp, 82 | derived_tstamp, 83 | dvce_created_tstamp, 84 | load_tstamp, 85 | geo_country, 86 | page_url, 87 | page_title, 88 | useragent, 89 | lower(device_class) as device_class, 90 | agent_class, 91 | agent_name, 92 | agent_name_version, 93 | agent_name_version_major, 94 | agent_version, 95 | agent_version_major, 96 | device_brand, 97 | device_name, 98 | device_version, 99 | layout_engine_class, 100 | layout_engine_name, 101 | layout_engine_name_version, 102 | layout_engine_name_version_major, 103 | layout_engine_version, 104 | layout_engine_version_major, 105 | operating_system_class, 106 | operating_system_name, 107 | operating_system_name_version, 108 | operating_system_version, 109 | lcp, 110 | fcp, 111 | fid, 112 | cls, 113 | inp, 114 | ttfb, 115 | navigation_type 116 | 117 | from prep p 118 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/scratch/snowplow_web_vitals_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | enabled=var("snowplow__enable_cwv", false) | as_bool(), 11 | tags=["this_run"], 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with prep as ( 17 | 18 | select 19 | e.event_id, 20 | e.event_name, 21 | e.app_id, 22 | e.platform, 23 | e.domain_userid, 24 | e.original_domain_userid, 25 | e.user_id, 26 | e.page_view_id, 27 | e.domain_sessionid, 28 | e.original_domain_sessionid, 29 | e.collector_tstamp, 30 | e.derived_tstamp, 31 | e.load_tstamp, 32 | coalesce(e.geo_country, 'unknown_geo_country') as geo_country, 33 | coalesce(e.page_url, 'unknown_page_url') as page_url, 34 | {{ core_web_vital_page_groups() }} as url_group, 35 | e.page_title, 36 | e.useragent, 37 | coalesce(e.device_class, 'unknown_device_class') as device_class, 38 | e.device_name, 39 | e.agent_name, 40 | e.agent_version, 41 | e.operating_system_name, 42 | e.lcp, 43 | e.fcp, 44 | e.fid, 45 | e.cls, 46 | e.inp, 47 | e.ttfb, 48 | e.navigation_type, 49 | row_number() over (partition by e.page_view_id order by e.derived_tstamp, e.dvce_created_tstamp, e.event_id) dedupe_index 50 | 51 | from {{ ref("snowplow_web_vital_events_this_run") }} as e 52 | 53 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 54 | 55 | 56 | ) 57 | 58 | select 59 | *, 60 | {{ snowplow_web.core_web_vital_results_query() }} 61 | 62 | from prep p 63 | 64 | where dedupe_index = 1 65 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/snowflake/snowplow_web_vital_measurements.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='table', 11 | enabled=var("snowplow__enable_cwv", false) and target.type == 'snowflake' | as_bool(), 12 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 13 | ) 14 | }} 15 | 16 | with measurements as ( 17 | 18 | select 19 | page_url, 20 | device_class, 21 | geo_country, 22 | cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }}) as time_period, 23 | count(*) as page_view_count, 24 | grouping_id(page_url, device_class) as id_url_and_device, 25 | grouping_id(device_class) as id_device, 26 | grouping_id(cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }})) as id_period, 27 | grouping_id(cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }}), device_class) as id_period_and_device, 28 | grouping_id(geo_country) as id_country, 29 | grouping_id(geo_country, device_class) as id_country_and_device, 30 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by lcp) as lcp_{{ var('snowplow__cwv_percentile') }}p, 31 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by fid) as fid_{{ var('snowplow__cwv_percentile') }}p, 32 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by cls) as cls_{{ var('snowplow__cwv_percentile') }}p, 33 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by ttfb) as ttfb_{{ var('snowplow__cwv_percentile') }}p, 34 | percentile_cont(0.{{ var('snowplow__cwv_percentile') }}) within group (order by inp) as inp_{{ var('snowplow__cwv_percentile') }}p 35 | 36 | from {{ ref('snowplow_web_vitals') }} 37 | 38 | where cast(derived_tstamp as date) >= {{ dateadd('day', '-'+var('snowplow__cwv_days_to_measure')|string, date_trunc('day', snowplow_utils.current_timestamp_in_utc())) }} 39 | 40 | group by grouping sets ((), (page_url, device_class), (device_class), (cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }})), (cast( {{ dbt.date_trunc('day', 'derived_tstamp') }} as {{ dbt.type_string() }}), device_class), (geo_country), (geo_country, device_class)) 41 | 42 | ) 43 | 44 | , measurement_type as ( 45 | 46 | select 47 | *, 48 | case when id_url_and_device <> 0 and id_device <> 0 and id_period <> 0 and id_period_and_device <> 0 and id_country <> 0 and id_country_and_device <> 0 then 'overall' 49 | when id_url_and_device = 0 then 'by_url_and_device' 50 | when id_period_and_device = 0 then 'by_day_and_device' 51 | when id_country_and_device = 0 then 'by_country_and_device' 52 | when id_country = 0 then 'by_country' 53 | when id_device = 0 then 'by_device' 54 | when id_period = 0 then 'by_day' 55 | end as measurement_type, 56 | {{ snowplow_web.core_web_vital_results_query('_' + var('snowplow__cwv_percentile') | string + 'p') }} 57 | 58 | from measurements 59 | ) 60 | 61 | , coalesce as ( 62 | 63 | select 64 | m.measurement_type, 65 | coalesce(m.page_url, 'all') as page_url, 66 | coalesce(m.device_class, 'all') as device_class, 67 | coalesce(m.geo_country, 'all') as geo_country, 68 | coalesce(g.name, 'all') as country, 69 | coalesce(time_period, 'last {{var("snowplow__cwv_days_to_measure")|string }} days') as time_period, 70 | page_view_count, 71 | ceil(lcp_{{ var('snowplow__cwv_percentile') }}p, 3) as lcp_{{ var('snowplow__cwv_percentile') }}p, 72 | ceil(fid_{{ var('snowplow__cwv_percentile') }}p, 3) as fid_{{ var('snowplow__cwv_percentile') }}p, 73 | ceil(cls_{{ var('snowplow__cwv_percentile') }}p, 3) as cls_{{ var('snowplow__cwv_percentile') }}p, 74 | ceil(ttfb_{{ var('snowplow__cwv_percentile') }}p, 3) as ttfb_{{ var('snowplow__cwv_percentile') }}p, 75 | ceil(inp_{{ var('snowplow__cwv_percentile') }}p, 3) as inp_{{ var('snowplow__cwv_percentile') }}p, 76 | m.lcp_result, 77 | m.fid_result, 78 | m.cls_result, 79 | m.ttfb_result, 80 | m.inp_result, 81 | {{ snowplow_web.core_web_vital_pass_query() }} as passed 82 | 83 | from measurement_type m 84 | 85 | left join {{ ref(var('snowplow__geo_mapping_seed')) }} g on lower(m.geo_country) = lower(g.alpha_2) 86 | 87 | order by 1 88 | 89 | ) 90 | 91 | select 92 | {{ dbt.concat(['page_url', "'-'" , 'device_class', "'-'" , 'geo_country', "'-'" , 'time_period' ]) }} compound_key, 93 | * 94 | from coalesce 95 | -------------------------------------------------------------------------------- /models/optional_modules/core_web_vitals/snowplow_web_vitals.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized= 'incremental', 11 | enabled=var("snowplow__enable_cwv", false) | as_bool(), 12 | unique_key='page_view_id', 13 | upsert_date_key='derived_tstamp', 14 | sort='derived_tstamp', 15 | dist='page_view_id', 16 | tags=["derived"], 17 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 18 | "field": "derived_tstamp", 19 | "data_type": "timestamp" 20 | }, databricks_val = 'derived_tstamp_date'), 21 | cluster_by=snowplow_web.web_cluster_by_fields_cwv(), 22 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 23 | tblproperties={ 24 | 'delta.autoOptimize.optimizeWrite' : 'true', 25 | 'delta.autoOptimize.autoCompact' : 'true' 26 | }, 27 | snowplow_optimize= true 28 | ) 29 | }} 30 | 31 | select 32 | * 33 | {% if target.type in ['databricks', 'spark'] -%} 34 | , DATE(derived_tstamp) as derived_tstamp_date 35 | {%- endif %} 36 | 37 | from {{ ref('snowplow_web_vitals_this_run') }} 38 | 39 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 40 | -------------------------------------------------------------------------------- /models/page_views/scratch/snowplow_web_pv_engaged_time.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 11 | ) 12 | }} 13 | 14 | select 15 | ev.page_view_id, 16 | {% if var('snowplow__limit_page_views_to_session', true) %} 17 | ev.domain_sessionid, 18 | {% endif %} 19 | max(ev.derived_tstamp) as end_tstamp, 20 | 21 | -- aggregate pings: 22 | -- divides epoch tstamps by snowplow__heartbeat to get distinct intervals 23 | -- floor rounds to nearest integer - duplicates all evaluate to the same number 24 | -- count(distinct) counts duplicates only once 25 | -- adding snowplow__min_visit_length accounts for the page view event itself. 26 | 27 | {{ var("snowplow__heartbeat", 10) }} * (count(distinct(floor({{ snowplow_utils.to_unixtstamp('ev.dvce_created_tstamp') }}/{{ var("snowplow__heartbeat", 10) }}))) - 1) + {{ var("snowplow__min_visit_length", 5) }} as engaged_time_in_s 28 | 29 | from {{ ref('snowplow_web_base_events_this_run') }} as ev 30 | 31 | where ev.event_name = 'page_ping' 32 | and ev.page_view_id is not null 33 | 34 | group by 1 {% if var('snowplow__limit_page_views_to_session', true) %}, 2 {% endif %} 35 | -------------------------------------------------------------------------------- /models/page_views/scratch/snowplow_web_pv_scroll_depth.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 11 | ) 12 | }} 13 | 14 | with prep as ( 15 | select 16 | ev.page_view_id, 17 | {% if var('snowplow__limit_page_views_to_session', true) %} 18 | ev.domain_sessionid, 19 | {% endif %} 20 | 21 | max(ev.doc_width) as doc_width, 22 | max(ev.doc_height) as doc_height, 23 | 24 | max(ev.br_viewwidth) as br_viewwidth, 25 | max(ev.br_viewheight) as br_viewheight, 26 | 27 | -- coalesce replaces null with 0 (because the page view event does send an offset) 28 | -- greatest prevents outliers (negative offsets) 29 | -- least also prevents outliers (offsets greater than the docwidth or docheight) 30 | 31 | least(greatest(min(coalesce(ev.pp_xoffset_min, 0)), 0), max(ev.doc_width)) as hmin, -- should be zero 32 | least(greatest(max(coalesce(ev.pp_xoffset_max, 0)), 0), max(ev.doc_width)) as hmax, 33 | 34 | least(greatest(min(coalesce(ev.pp_yoffset_min, 0)), 0), max(ev.doc_height)) as vmin, -- should be zero (edge case: not zero because the pv event is missing) 35 | least(greatest(max(coalesce(ev.pp_yoffset_max, 0)), 0), max(ev.doc_height)) as vmax 36 | 37 | from {{ ref('snowplow_web_base_events_this_run') }} as ev 38 | 39 | where ev.event_name in ('page_view', 'page_ping') 40 | and ev.page_view_id is not null 41 | and ev.doc_height > 0 -- exclude problematic (but rare) edge case 42 | and ev.doc_width > 0 -- exclude problematic (but rare) edge case 43 | 44 | group by 1 {% if var('snowplow__limit_page_views_to_session', true) %}, 2 {% endif %} 45 | ) 46 | 47 | select 48 | page_view_id, 49 | {% if var('snowplow__limit_page_views_to_session', true) %} 50 | domain_sessionid, 51 | {% endif %} 52 | 53 | doc_width, 54 | doc_height, 55 | 56 | br_viewwidth, 57 | br_viewheight, 58 | 59 | hmin, 60 | hmax, 61 | vmin, 62 | vmax, 63 | 64 | cast(round(100*(greatest(hmin, 0)/cast(doc_width as {{ type_float() }}))) as {{ type_float() }}) as relative_hmin, -- brackets matter: because hmin is of type int, we need to divide before we multiply by 100 or we risk an overflow 65 | cast(round(100*(least(hmax + br_viewwidth, doc_width)/cast(doc_width as {{ type_float() }}))) as {{ type_float() }}) as relative_hmax, 66 | cast(round(100*(greatest(vmin, 0)/cast(doc_height as {{ type_float() }}))) as {{ type_float() }}) as relative_vmin, 67 | cast(round(100*(least(vmax + br_viewheight, doc_height)/cast(doc_height as {{ type_float() }}))) as {{ type_float() }}) as relative_vmax -- not zero when a user hasn't scrolled because it includes the non-zero viewheight 68 | 69 | from prep 70 | -------------------------------------------------------------------------------- /models/page_views/snowplow_web_page_views.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | on_schema_change='append_new_columns', 12 | unique_key='page_view_id', 13 | upsert_date_key='start_tstamp', 14 | sort='start_tstamp', 15 | dist='page_view_id', 16 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { 17 | "field": "start_tstamp", 18 | "data_type": "timestamp" 19 | }, databricks_val='start_tstamp_date'), 20 | cluster_by=snowplow_web.web_cluster_by_fields_page_views(), 21 | tags=["derived"], 22 | post_hook="{{ snowplow_web.stitch_user_identifiers( 23 | enabled=var('snowplow__page_view_stitching') 24 | ) }}", 25 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 26 | tblproperties={ 27 | 'delta.autoOptimize.optimizeWrite' : 'true', 28 | 'delta.autoOptimize.autoCompact' : 'true' 29 | }, 30 | snowplow_optimize = true 31 | ) 32 | }} 33 | 34 | 35 | select * 36 | {% if target.type in ['databricks', 'spark'] -%} 37 | , DATE(start_tstamp) as start_tstamp_date 38 | {%- endif %} 39 | from {{ ref('snowplow_web_page_views_this_run') }} 40 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 41 | -------------------------------------------------------------------------------- /models/sessions/snowplow_web_sessions.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | on_schema_change='append_new_columns', 12 | unique_key='domain_sessionid', 13 | upsert_date_key='start_tstamp', 14 | sort='start_tstamp', 15 | dist='domain_sessionid', 16 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 17 | "field": "start_tstamp", 18 | "data_type": "timestamp" 19 | }, databricks_val='start_tstamp_date'), 20 | cluster_by=snowplow_web.web_cluster_by_fields_sessions(), 21 | tags=["derived"], 22 | post_hook="{{ snowplow_web.stitch_user_identifiers( 23 | enabled=var('snowplow__session_stitching') 24 | ) }}", 25 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 26 | tblproperties={ 27 | 'delta.autoOptimize.optimizeWrite' : 'true', 28 | 'delta.autoOptimize.autoCompact' : 'true' 29 | }, 30 | snowplow_optimize = true 31 | ) 32 | }} 33 | 34 | 35 | select * 36 | {% if target.type in ['databricks', 'spark'] -%} 37 | , DATE(start_tstamp) as start_tstamp_date 38 | {%- endif %} 39 | from {{ ref('snowplow_web_sessions_this_run') }} 40 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 41 | -------------------------------------------------------------------------------- /models/user_mapping/snowplow_web_user_mapping.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | unique_key='domain_userid', 12 | sort='end_tstamp', 13 | dist='domain_userid', 14 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 15 | "field": "end_tstamp", 16 | "data_type": "timestamp" 17 | }), 18 | tags=["derived"], 19 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 20 | ) 21 | }} 22 | 23 | 24 | select distinct 25 | domain_userid, 26 | last_value({{ var('snowplow__user_stitching_id', 'user_id') }}) over( 27 | partition by domain_userid 28 | order by collector_tstamp 29 | rows between unbounded preceding and unbounded following 30 | ) as user_id, 31 | max(collector_tstamp) over (partition by domain_userid) as end_tstamp 32 | 33 | from {{ ref('snowplow_web_base_events_this_run') }} 34 | 35 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 36 | and {{ var('snowplow__user_stitching_id', 'user_id') }} is not null 37 | and domain_userid is not null 38 | -------------------------------------------------------------------------------- /models/user_mapping/user_mapping.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: snowplow_web_user_mapping 5 | description: A mapping table between `domain_userid` and `user_id`. 6 | columns: 7 | - name: domain_userid 8 | description: '{{ doc("col_domain_userid") }}' 9 | tags: 10 | - primary-key 11 | tests: 12 | - unique 13 | - not_null 14 | - name: user_id 15 | description: '{{ doc("col_user_id") }}' 16 | tests: 17 | - not_null 18 | - name: end_tstamp 19 | description: The `collector_tstamp` when the user was last active 20 | tests: 21 | - not_null 22 | -------------------------------------------------------------------------------- /models/users/scratch/snowplow_web_users_aggs.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 11 | "field": "start_tstamp", 12 | "data_type": "timestamp" 13 | }), 14 | cluster_by=snowplow_utils.get_value_by_target_type(bigquery_val=["domain_userid"]), 15 | sort='domain_userid', 16 | dist='domain_userid', 17 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 18 | ) 19 | }} 20 | 21 | select 22 | domain_userid, 23 | -- time 24 | user_start_tstamp as start_tstamp, 25 | user_end_tstamp as end_tstamp, 26 | -- first/last session. Max to resolve edge case with multiple sessions with the same start/end tstamp 27 | max(case when start_tstamp = user_start_tstamp then domain_sessionid end) as first_domain_sessionid, 28 | max(case when end_tstamp = user_end_tstamp then domain_sessionid end) as last_domain_sessionid, 29 | -- engagement 30 | sum(page_views) as page_views, 31 | count(distinct domain_sessionid) as sessions, 32 | sum(engaged_time_in_s) as engaged_time_in_s 33 | 34 | from {{ ref('snowplow_web_users_sessions_this_run') }} 35 | 36 | group by 1,2,3 37 | -------------------------------------------------------------------------------- /models/users/scratch/snowplow_web_users_lasts.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 11 | ) 12 | }} 13 | 14 | 15 | select 16 | a.domain_userid, 17 | a.last_page_title, 18 | 19 | a.last_page_url, 20 | 21 | a.last_page_urlscheme, 22 | a.last_page_urlhost, 23 | a.last_page_urlpath, 24 | a.last_page_urlquery, 25 | a.last_page_urlfragment, 26 | 27 | a.last_geo_country, 28 | a.last_geo_country_name, 29 | a.last_geo_continent, 30 | a.last_geo_city, 31 | a.last_geo_region_name, 32 | a.last_br_lang, 33 | a.last_br_lang_name 34 | 35 | {%- if var('snowplow__user_last_passthroughs', []) -%} 36 | {%- for identifier in var('snowplow__user_last_passthroughs', []) %} 37 | {# Check if it's a simple column or a sql+alias #} 38 | {%- if identifier is mapping -%} 39 | ,{{identifier['sql']}} as {{identifier['alias']}} 40 | {%- else -%} 41 | ,a.{{identifier}} as last_{{identifier}} 42 | {%- endif -%} 43 | {% endfor -%} 44 | {%- endif %} 45 | 46 | from {{ ref('snowplow_web_users_sessions_this_run') }} a 47 | 48 | inner join {{ ref('snowplow_web_users_aggs') }} b 49 | on a.domain_sessionid = b.last_domain_sessionid 50 | -------------------------------------------------------------------------------- /models/users/scratch/snowplow_web_users_sessions_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 12 | ) 13 | }} 14 | 15 | select 16 | a.*, 17 | min(a.start_tstamp) over(partition by a.domain_userid) as user_start_tstamp, 18 | max(a.end_tstamp) over(partition by a.domain_userid) as user_end_tstamp 19 | 20 | from {{ var('snowplow__sessions_table') }} a 21 | where exists (select 1 from {{ ref('snowplow_web_base_sessions_this_run') }} b where a.domain_userid = b.user_identifier) 22 | -------------------------------------------------------------------------------- /models/users/scratch/snowplow_web_users_this_run.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | tags=["this_run"], 11 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) 12 | ) 13 | }} 14 | 15 | select 16 | -- user fields 17 | a.user_id, 18 | a.domain_userid, 19 | a.original_domain_userid, 20 | a.network_userid, 21 | 22 | b.start_tstamp, 23 | b.end_tstamp, 24 | {{ snowplow_utils.current_timestamp_in_utc() }} as model_tstamp, 25 | 26 | -- engagement fields 27 | b.page_views, 28 | b.sessions, 29 | 30 | b.engaged_time_in_s, 31 | 32 | -- first page fields 33 | a.first_page_title, 34 | a.first_page_url, 35 | a.first_page_urlscheme, 36 | a.first_page_urlhost, 37 | a.first_page_urlpath, 38 | a.first_page_urlquery, 39 | a.first_page_urlfragment, 40 | 41 | a.geo_country as first_geo_country, 42 | a.geo_country_name as first_geo_country_name, 43 | a.geo_continent as first_geo_continent, 44 | a.geo_city as first_geo_city, 45 | a.geo_region_name as first_geo_region_name, 46 | a.br_lang as first_br_lang, 47 | a.br_lang_name as first_br_lang_name, 48 | 49 | c.last_page_title, 50 | c.last_page_url, 51 | c.last_page_urlscheme, 52 | c.last_page_urlhost, 53 | c.last_page_urlpath, 54 | c.last_page_urlquery, 55 | c.last_page_urlfragment, 56 | 57 | c.last_geo_country, 58 | c.last_geo_country_name, 59 | c.last_geo_continent, 60 | c.last_geo_city, 61 | c.last_geo_region_name, 62 | c.last_br_lang, 63 | c.last_br_lang_name, 64 | 65 | 66 | -- referrer fields 67 | a.referrer, 68 | 69 | a.refr_urlscheme, 70 | a.refr_urlhost, 71 | a.refr_urlpath, 72 | a.refr_urlquery, 73 | a.refr_urlfragment, 74 | 75 | a.refr_medium, 76 | a.refr_source, 77 | a.refr_term, 78 | 79 | -- marketing fields 80 | a.mkt_medium, 81 | a.mkt_source, 82 | a.mkt_term, 83 | a.mkt_content, 84 | a.mkt_campaign, 85 | a.mkt_clickid, 86 | a.mkt_network, 87 | a.mkt_source_platform, 88 | a.default_channel_group 89 | 90 | {%- if var('snowplow__user_first_passthroughs', []) -%} 91 | {%- for identifier in var('snowplow__user_first_passthroughs', []) %} 92 | {# Check if it's a simple column or a sql+alias #} 93 | {%- if identifier is mapping -%} 94 | ,{{identifier['sql']}} as {{identifier['alias']}} 95 | {%- else -%} 96 | ,a.{{identifier}} as first_{{identifier}} 97 | {%- endif -%} 98 | {% endfor -%} 99 | {%- endif %} 100 | {%- if var('snowplow__user_last_passthroughs', []) -%} 101 | {%- for identifier in var('snowplow__user_last_passthroughs', []) %} 102 | {# Check if it's a simple column or a sql+alias #} 103 | {%- if identifier is mapping -%} 104 | ,c.{{identifier['alias']}} 105 | {%- else -%} 106 | ,c.last_{{identifier}} 107 | {%- endif -%} 108 | {% endfor -%} 109 | {%- endif %} 110 | 111 | from {{ ref('snowplow_web_users_aggs') }} as b 112 | 113 | inner join {{ ref('snowplow_web_users_sessions_this_run') }} as a 114 | on a.domain_sessionid = b.first_domain_sessionid 115 | 116 | inner join {{ ref('snowplow_web_users_lasts') }} c 117 | on b.domain_userid = c.domain_userid 118 | -------------------------------------------------------------------------------- /models/users/snowplow_web_users.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | {{ 9 | config( 10 | materialized='incremental', 11 | on_schema_change='append_new_columns', 12 | unique_key='domain_userid', 13 | upsert_date_key='start_tstamp', 14 | disable_upsert_lookback=true, 15 | sort='start_tstamp', 16 | dist='domain_userid', 17 | partition_by = snowplow_utils.get_value_by_target_type(bigquery_val={ 18 | "field": "start_tstamp", 19 | "data_type": "timestamp" 20 | }, databricks_val='start_tstamp_date'), 21 | cluster_by=snowplow_web.web_cluster_by_fields_users(), 22 | tags=["derived"], 23 | sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), 24 | tblproperties={ 25 | 'delta.autoOptimize.optimizeWrite' : 'true', 26 | 'delta.autoOptimize.autoCompact' : 'true' 27 | }, 28 | snowplow_optimize = true 29 | ) 30 | }} 31 | 32 | select * 33 | {% if target.type in ['databricks', 'spark'] -%} 34 | , DATE(start_tstamp) as start_tstamp_date 35 | {%- endif %} 36 | from {{ ref('snowplow_web_users_this_run') }} 37 | where {{ snowplow_utils.is_run_with_new_events('snowplow_web') }} --returns false if run doesn't contain new events. 38 | -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: snowplow/snowplow_utils 3 | version: [">=0.15.0", "<0.16.0"] 4 | -------------------------------------------------------------------------------- /seeds/seeds.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | seeds: 4 | - name: snowplow_web_dim_ga4_source_categories 5 | description: A list of all source (websites) and their corresponding GA4 category. 6 | columns: 7 | - name: source 8 | - name: source_category 9 | - name: snowplow_web_dim_geo_country_mapping 10 | description: A mapping from geo_country alpha-2 ISO 3166-2 codes to the other properties of the country. 11 | columns: 12 | - name: name 13 | - name: alpha_2 14 | - name: alpha_3 15 | - name: country_code 16 | - name: iso_3166_2 17 | - name: region 18 | - name: sub_region 19 | - name: intermediate_region 20 | - name: region_code 21 | - name: sub_region_code 22 | - name: intermediate_region_code 23 | - name: snowplow_web_dim_rfc_5646_language_mapping 24 | description: A mapping between the RFC 5646 language code to the full name of the language. 25 | columns: 26 | - name: lang_tag 27 | - name: name 28 | -------------------------------------------------------------------------------- /selectors.yml: -------------------------------------------------------------------------------- 1 | selectors: 2 | - name: snowplow_web 3 | # Description field added dbt v0.19. Commenting out for compatibility. 4 | # description: > 5 | # Suggested node selection when running the Snowplow Web package. 6 | # Runs: 7 | # - All Snowplow Web models. 8 | # - All custom models in your dbt project, tagged with `snowplow_web_incremental`. 9 | definition: 10 | union: 11 | - method: package 12 | value: snowplow_web 13 | - method: tag 14 | value: snowplow_web_incremental 15 | - name: snowplow_web_lean_tests 16 | # Description field added dbt v0.19. Commenting out for compatibility. 17 | # description: > 18 | # Suggested testing implementation for the Snowplow Web package. Lean approach, essential tests rather than full suite to save cost. 19 | # Tests: 20 | # - All tests on Snowplow Web this_run and manifest tables. 21 | # - Primary key and data tests on the Snowplow Web derived tables (page_views, sessions and users) 22 | # - All tests on any custom models in your dbt project, tagged with `snowplow_web_incremental`. 23 | definition: 24 | union: 25 | - intersection: 26 | - method: package 27 | value: snowplow_web 28 | - method: tag 29 | value: this_run 30 | - intersection: 31 | - method: package 32 | value: snowplow_web 33 | - method: tag 34 | value: manifest 35 | - intersection: 36 | - method: package 37 | value: snowplow_web 38 | - method: tag 39 | value: derived 40 | - method: tag 41 | value: primary-key 42 | - intersection: 43 | - method: package 44 | value: snowplow_web 45 | - method: tag 46 | value: derived 47 | - method: test_type 48 | value: data 49 | - intersection: 50 | - method: path 51 | value: models 52 | - method: tag 53 | value: snowplow_web_incremental 54 | -------------------------------------------------------------------------------- /tests/page_views/snowplow_tests_page_view_in_session_values.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | 9 | with prep as ( 10 | select 11 | domain_sessionid, 12 | count(distinct page_views_in_session) as dist_pvis_values, 13 | count(*) - count(distinct page_view_in_session_index) as all_minus_dist_pvisi, 14 | count(*) - count(distinct page_view_id) as all_minus_dist_pvids 15 | 16 | from {{ ref('snowplow_web_page_views') }} 17 | group by 1 18 | ) 19 | 20 | select 21 | domain_sessionid 22 | 23 | from prep 24 | 25 | where dist_pvis_values != 1 26 | or all_minus_dist_pvisi != 0 27 | or all_minus_dist_pvids != 0 28 | -------------------------------------------------------------------------------- /tests/snowplow_tests_consent_versions.sql: -------------------------------------------------------------------------------- 1 | {# 2 | Copyright (c) 2020-present Snowplow Analytics Ltd. All rights reserved. 3 | This program is licensed to you under the Snowplow Community License Version 1.0, 4 | and you may not use this file except in compliance with the Snowplow Community License Version 1.0. 5 | You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 6 | #} 7 | 8 | with prep as ( 9 | 10 | select 11 | consent_version, 12 | count(*) as n_consents 13 | 14 | from {{ ref('snowplow_web_consent_versions')}} 15 | 16 | group by 1 17 | 18 | having count(*)>1 19 | ) 20 | 21 | select * from prep 22 | --------------------------------------------------------------------------------