├── .gitignore ├── LICENSE ├── dataform.json ├── definitions ├── reporting │ ├── posts_combined.sqlx │ └── user_stats.sqlx ├── sources │ ├── badges.sqlx │ ├── posts_answers.sqlx │ ├── posts_questions.sqlx │ └── users.sqlx └── staging │ ├── stg_badges.sqlx │ ├── stg_posts_answers.sqlx │ ├── stg_posts_questions.sqlx │ └── stg_users.sqlx ├── environments.json ├── package-lock.json ├── package.json └── schedules.json /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .df-credentials.json 3 | node_modules/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Dataform 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataform.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaultSchema": "dataform", 3 | "assertionSchema": "dataform_assertions", 4 | "warehouse": "bigquery", 5 | "defaultDatabase": "dataform-demos", 6 | "defaultLocation": "us" 7 | } 8 | -------------------------------------------------------------------------------- /definitions/reporting/posts_combined.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "table", 3 | schema: "reporting", 4 | tags: ["daily"], 5 | bigquery: { 6 | partitionBy: "date(created_at)" 7 | }, 8 | description: "Combine both questions and answers into a single posts_all table", 9 | assertions: { 10 | uniqueKey: ["post_id"] 11 | } 12 | } 13 | 14 | select 15 | post_id, 16 | created_at, 17 | type, 18 | title, 19 | body, 20 | owner_user_id, 21 | parent_id 22 | from 23 | ${ref("stg_posts_answers")} 24 | 25 | union all 26 | 27 | select 28 | post_id, 29 | created_at, 30 | type, 31 | title, 32 | body, 33 | owner_user_id, 34 | parent_id 35 | from 36 | ${ref("stg_posts_questions")} -------------------------------------------------------------------------------- /definitions/reporting/user_stats.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "table", 3 | schema: "reporting", 4 | tags: ["daily"], 5 | description: "Create a summary table for all users including statistics on questions, answeers and badges received.", 6 | assertions: { 7 | uniqueKey: ["user_id"], 8 | rowConditions: ["badge_count >= 0"] 9 | } 10 | } 11 | 12 | select 13 | stg_users.user_id, 14 | stg_users.age, 15 | stg_users.creation_date, 16 | stg_users.user_tenure, 17 | count(distinct stg_badges.badge_id) as badge_count, 18 | count(distinct posts_all.post_id) as questions_and_answer_count, 19 | count(distinct if(type="question", posts_all.post_id, null)) as question_count, 20 | count(distinct if(type="answer", posts_all.post_id, null)) as answer_count, 21 | max(stg_badges.award_timestamp) as last_badge_received_at, 22 | max(posts_all.created_at) as last_posted_at, 23 | max(if(type="question", posts_all.created_at, null)) as last_question_posted_at, 24 | max(if(type="answer", posts_all.created_at, null)) as last_answer_posted_at 25 | from 26 | ${ref("stg_users")} as stg_users 27 | left join ${ref("stg_badges")} as stg_badges 28 | on stg_users.user_id = stg_badges.user_id 29 | left join ${ref("posts_combined")} as posts_all 30 | on stg_users.user_id = posts_all.owner_user_id 31 | group by 32 | 1,2,3,4 -------------------------------------------------------------------------------- /definitions/sources/badges.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "declaration", 3 | database: "bigquery-public-data", 4 | schema: "stackoverflow", 5 | name: "badges", 6 | description: "raw badges table" 7 | } -------------------------------------------------------------------------------- /definitions/sources/posts_answers.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "declaration", 3 | database: "bigquery-public-data", 4 | schema: "stackoverflow", 5 | name: "posts_answers", 6 | description: "raw posts_answers table" 7 | } -------------------------------------------------------------------------------- /definitions/sources/posts_questions.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "declaration", 3 | database: "bigquery-public-data", 4 | schema: "stackoverflow", 5 | name: "posts_questions", 6 | description: "raw posts_questions table" 7 | } -------------------------------------------------------------------------------- /definitions/sources/users.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "declaration", 3 | database: "bigquery-public-data", 4 | schema: "stackoverflow", 5 | name: "users", 6 | description: "raw users table" 7 | } -------------------------------------------------------------------------------- /definitions/staging/stg_badges.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "view", 3 | schema: "staging", 4 | description: "Cleaned version of stackoverflow.badges", 5 | tags: ["staging"] 6 | } 7 | 8 | select 9 | id as badge_id, 10 | name as badge_name, 11 | date as award_timestamp, 12 | user_id 13 | from 14 | ${ref("badges")} 15 | -------------------------------------------------------------------------------- /definitions/staging/stg_posts_answers.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "view", 3 | schema: "staging", 4 | description: "Cleaned version of stackoverflow.posts_answers", 5 | tags: ["staging"] 6 | } 7 | 8 | select 9 | id as post_id, 10 | creation_date as created_at, 11 | 'answer' as type, 12 | title, 13 | body, 14 | owner_user_id, 15 | cast(parent_id as string) as parent_id 16 | from 17 | ${ref("posts_answers")} 18 | where 19 | -- limit to recent data for the purposes of this demo project 20 | creation_date >= timestamp("2019-01-01") 21 | -------------------------------------------------------------------------------- /definitions/staging/stg_posts_questions.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "view", 3 | schema: "staging", 4 | description: "Cleaned version of stackoverflow.posts_questions", 5 | tags: ["staging"] 6 | } 7 | 8 | select 9 | id as post_id, 10 | creation_date as created_at, 11 | 'question' as type, 12 | title, 13 | body, 14 | owner_user_id, 15 | parent_id 16 | from 17 | ${ref("posts_questions")} 18 | where 19 | -- limit to recent data for the purposes of this demo project 20 | creation_date >= timestamp("2019-01-01") 21 | -------------------------------------------------------------------------------- /definitions/staging/stg_users.sqlx: -------------------------------------------------------------------------------- 1 | config { 2 | type: "view", 3 | schema: "staging", 4 | description: "Cleaned version of stackoverflow.users table", 5 | tags: ["staging"] 6 | } 7 | 8 | select 9 | id as user_id, 10 | age, 11 | creation_date, 12 | round(timestamp_diff(current_timestamp(), creation_date, day)/365) as user_tenure 13 | from 14 | ${ref("users")} 15 | -------------------------------------------------------------------------------- /environments.json: -------------------------------------------------------------------------------- 1 | { 2 | "environments": [ 3 | { 4 | "name": "production", 5 | "configOverride": {}, 6 | "gitRef": "master" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "no-name-set", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "dependencies": { 8 | "@dataform/core": "3.0.8" 9 | } 10 | }, 11 | "node_modules/@dataform/core": { 12 | "version": "3.0.8", 13 | "resolved": "https://registry.npmjs.org/@dataform/core/-/core-3.0.8.tgz", 14 | "integrity": "sha512-tHA82saUMA9CNQCVBUZf8OTVSdnbLiMs/4eLpSVR37Zmxa66fVV//0Oiujd82JWxI2oAkQQQRWF82DPjCYHW2Q==" 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@dataform/core": "3.0.8" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /schedules.json: -------------------------------------------------------------------------------- 1 | { 2 | "schedules": [{ 3 | "name": "daily", 4 | "options": { 5 | "includeDependencies": false, 6 | "fullRefresh": false, 7 | "tags": ["daily"] 8 | }, 9 | "cron": "00 09 * * *", 10 | "notification": { 11 | "onSuccess": false, 12 | "onFailure": false 13 | }, 14 | "notifications": [{ 15 | "events": ["failure"], 16 | "channels": ["email dan"] 17 | }] 18 | }], 19 | "notificationChannels": [{ 20 | "name": "email jo", 21 | "email": { 22 | "to": ["jo_bloggs@acme.co"] 23 | } 24 | }] 25 | } 26 | --------------------------------------------------------------------------------