├── .github
├── CODEOWNERS
├── release-drafter.yml
├── dependabot.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── pull_request_template.md
└── workflows
│ ├── release.yml
│ └── ci.yml
├── integration_tests
├── packages.yml
├── requirements.txt
├── docs
│ └── integration_tests_diagram.png
├── for_CI
│ ├── change_of_database.sh
│ ├── change_dbt_project_airflow_source.sh
│ ├── change_dbt_project_adf_source.sh
│ ├── change_dbt_project_from_databricks_to_airflow.sh
│ └── change_dbt_project_databricks_source.sh
├── macros
│ ├── create_schema.sql
│ ├── drop_schema.sql
│ ├── seed__task_fail.sql
│ ├── adf_activity_runs.sql
│ ├── seed__dag.sql
│ ├── seed__dag_run.sql
│ ├── seed__task_instance.sql
│ ├── adf_pipeline_runs.sql
│ ├── adf_triggers.sql
│ └── jobs.sql
├── profiles.yml
├── dbt_project.yml
├── README.md
└── seeds
│ ├── airflow
│ ├── task_instance.csv
│ ├── task_fail.csv
│ ├── dag.csv
│ └── dag_run.csv
│ └── adf
│ ├── adf_activity_runs.csv
│ └── adf_pipelines.csv
├── .gitignore
├── packages.yml
├── models
├── staging
│ ├── databricks_workflow_sources
│ │ ├── stg_task_instance_databricks_workflow.sql
│ │ ├── stg_task_fail_databricks_workflow.sql
│ │ ├── stg_dag_run_databricks_workflow.sql
│ │ ├── stg_dag_databricks_workflow.sql
│ │ └── source.yml
│ ├── dbt_utils_day.sql
│ ├── airflow_sources
│ │ ├── stg_dag_run_airflow.sql
│ │ ├── stg_task_fail_airflow.sql
│ │ ├── stg_dag_airflow.sql
│ │ ├── stg_task_instance_airflow.sql
│ │ └── source.yml
│ └── adf_sources
│ │ ├── stg_dag_run_adf.sql
│ │ ├── stg_task_fail_adf.sql
│ │ ├── stg_task_instance_adf.sql
│ │ ├── stg_dag_adf.sql
│ │ └── source.yml
├── marts
│ ├── dim_dag_monitoring_dag.sql
│ ├── bridge_dag_monitoring.yml
│ ├── dim_dag_monitoring_task.yml
│ ├── dim_dag_monitoring_dag.yml
│ ├── fact_dag_monitoring_dag_run.yml
│ ├── fact_dag_monitoring_task_fail.yml
│ ├── bridge_dag_monitoring.sql
│ ├── fact_dag_monitoring_task_instance.yml
│ ├── fact_dag_monitoring_dag_run.sql
│ ├── fact_dag_monitoring_task_fail.sql
│ ├── dim_dag_monitoring_task.sql
│ └── fact_dag_monitoring_task_instance.sql
├── docs
│ └── universal.md
└── calendar
│ └── dim_dag_monitoring_dates.sql
├── package-lock.yml
├── requirements.txt
├── macros
├── cast_as_date.sql
├── day_of_year.sql
├── cast_as_timestamp.sql
├── adf_pipelines_name.sql
├── flatten_data.sql
├── date_diff.sql
├── replace_dot_for_colon_notation.sql
├── date_add.sql
├── date_format.sql
├── cast_as_string.sql
├── day_of_week.sql
└── model_task_instance_databricks_workflow.sql
├── example.env
├── setup.sh
├── catalog-dag-monitoring.yaml
├── profiles.yml
├── dbt_project.yml
├── README.md
└── LICENSE
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @techindicium/central-de-dados
--------------------------------------------------------------------------------
/integration_tests/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - local: ../
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | dbt_packages
3 | target
4 | *logs
5 | .env
6 | env
7 | .user.yml
--------------------------------------------------------------------------------
/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - package: dbt-labs/dbt_utils
3 | version: 1.1.1
--------------------------------------------------------------------------------
/integration_tests/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-snowflake==1.8.3
2 | dbt-databricks==v1.8.5
3 | databricks-sdk==0.17.0
4 | dbt-core==1.8.5
--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_task_instance_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | {{ model_task_instance_databricks_workflow() }}
--------------------------------------------------------------------------------
/package-lock.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - package: dbt-labs/dbt_utils
3 | version: 1.1.1
4 | sha1_hash: b0e601a7edf623823e7381fcbae7d8a2d0999fe4
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-core==1.8.5
2 | dbt-databricks==v1.8.5
3 | databricks-sdk==0.17.0
4 | dbt-snowflake==1.8.3
5 | google-cloud==0.34.0
6 | dbt-bigquery==1.8.2
--------------------------------------------------------------------------------
/integration_tests/docs/integration_tests_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techindicium/dbt-dag-monitoring/HEAD/integration_tests/docs/integration_tests_diagram.png
--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | template: |
2 | ## What's Changed
3 | $CHANGES
4 |
5 | **Full Changelog**: https://github.com/$OWNER/$REPOSITORY/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION
6 |
--------------------------------------------------------------------------------
/integration_tests/for_CI/change_of_database.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source=$1
4 |
5 | sed -i "s/dag_monitoring_${source}_database: $2/dag_monitoring_${source}_database: $3/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
6 |
--------------------------------------------------------------------------------
/integration_tests/macros/create_schema.sql:
--------------------------------------------------------------------------------
1 | {% macro create_schema(schema_name) %}
2 | {% set sql %}
3 | CREATE SCHEMA IF NOT EXISTS {{ schema_name }}
4 | {% endset %}
5 | {{ run_query(sql) }}
6 | {% endmacro %}
--------------------------------------------------------------------------------
/macros/cast_as_date.sql:
--------------------------------------------------------------------------------
1 | {% macro cast_as_date(column) -%}
2 | {{ return(adapter.dispatch('cast_as_date')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__cast_as_date(column) -%}
7 | cast({{ column }} as date)
8 | {%- endmacro %}
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "pip"
4 | directory: "/"
5 | schedule:
6 | interval: "daily"
7 | rebase-strategy: "disabled"
8 | ignore:
9 | - dependency-name: "*"
10 | update-types:
11 | - version-update:semver-patch
12 |
--------------------------------------------------------------------------------
/example.env:
--------------------------------------------------------------------------------
1 | # this is an example of how to fill the information in each variable
2 |
3 |
4 | # default configurations, you don't have to change it unless you have a specific need
5 | export DBT_DEFAULT_TARGET="dev";
6 | export DEV_CATALOG_NAME="";
7 | export DEV_SCHEMA_NAME="";
8 | export DEV_HOST="";
9 | export DEV_HTTP_PATH="";
10 | export DEV_TOKEN="";
--------------------------------------------------------------------------------
/macros/day_of_year.sql:
--------------------------------------------------------------------------------
1 | {% macro day_of_year(column) -%}
2 | {{ return(adapter.dispatch('day_of_year')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__day_of_year(column) -%}
7 | extract(dayofyear from {{ column }})
8 | {%- endmacro %}
9 |
10 |
11 | {% macro databricks__day_of_year(column) -%}
12 | extract(doy from {{ column }})
13 | {%- endmacro %}
--------------------------------------------------------------------------------
/integration_tests/macros/drop_schema.sql:
--------------------------------------------------------------------------------
1 | {% macro drop_schema(schema_name) %}
2 |
3 | {% set drop_schema_query %}
4 | DROP SCHEMA IF EXISTS {{ schema_name }} CASCADE;
5 | {% endset %}
6 |
7 | {% if execute %}
8 | {{ run_query(drop_schema_query) }}
9 | {% endif %}
10 |
11 | {% do log("Dropped schema " ~ schema_name, info = true) %}
12 |
13 | {% endmacro %}
--------------------------------------------------------------------------------
/macros/cast_as_timestamp.sql:
--------------------------------------------------------------------------------
1 | {% macro cast_as_timestamp(column, n=1000) -%}
2 | {{ return(adapter.dispatch('cast_as_timestamp')(column, n=1000)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__cast_as_timestamp(column, n=1000) -%}
7 | to_timestamp({{ column }} / {{ n }} )
8 | {%- endmacro %}
9 |
10 | {% macro bigquery__cast_as_timestamp(column, n=1000) -%}
11 | TIMESTAMP_SECONDS(cast({{ column }} / {{ n }} as int))
12 | {%- endmacro %}
--------------------------------------------------------------------------------
/macros/adf_pipelines_name.sql:
--------------------------------------------------------------------------------
1 | {% macro adf_pipelines_name(column) -%}
2 | {{ return(adapter.dispatch('adf_pipelines_name')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__adf_pipelines_name(column) -%}
7 | {{ column }}
8 | {%- endmacro %}
9 |
10 | {% macro bigquery__adf_pipelines_name(column) -%}
11 | {% if column == 'pipelines.pipelineReference.referenceName' -%}
12 | {{ 'pipelineReference.referenceName' }}
13 | {% endif %}
14 | {%- endmacro %}
--------------------------------------------------------------------------------
/macros/flatten_data.sql:
--------------------------------------------------------------------------------
1 | {% macro flatten_data(column) -%}
2 | {{ return(adapter.dispatch('flatten_data')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro databricks__flatten_data(column) -%}
7 | lateral view explode ({{ column }})
8 | {%- endmacro %}
9 |
10 | {% macro snowflake__flatten_data(column) -%}
11 | , lateral flatten(input => {{ column }})
12 | {%- endmacro %}
13 |
14 | {% macro bigquery__flatten_data(column) -%}
15 | , unnest({{ column }})
16 | {%- endmacro %}
17 |
--------------------------------------------------------------------------------
/macros/date_diff.sql:
--------------------------------------------------------------------------------
1 | {% macro date_diff(datepart, start_date, end_date) -%}
2 | {{ return(adapter.dispatch('date_diff')(datepart, start_date, end_date)) }}
3 | {%- endmacro %}
4 |
5 | {% macro default__date_diff(datepart, start_date, end_date) -%}
6 | datediff({{ datepart }}, {{ start_date }}, {{ end_date }})
7 | {%- endmacro %}
8 |
9 | {% macro bigquery__date_diff(datepart, start_date, end_date) -%}
10 | date_diff({{ end_date }}, {{ start_date }}, {{ datepart }})
11 | {%- endmacro %}
--------------------------------------------------------------------------------
/macros/replace_dot_for_colon_notation.sql:
--------------------------------------------------------------------------------
1 | {% macro replace_dot_for_colon(struct_column, column_item) -%}
2 | {{ return(adapter.dispatch('replace_dot_for_colon')(struct_column, column_item)) }}
3 | {%- endmacro %}
4 |
5 | {% macro default__replace_dot_for_colon(struct_column, column_item) -%}
6 | {{ struct_column }}.{{ column_item }}
7 | {%- endmacro %}
8 |
9 | {% macro snowflake__replace_dot_for_colon(struct_column, column_item) -%}
10 | {{ struct_column }}:{{ column_item }}
11 | {%- endmacro %}
--------------------------------------------------------------------------------
/macros/date_add.sql:
--------------------------------------------------------------------------------
1 | {% macro date_add(datepart, interval, column, default='INTERVAL') -%}
2 | {{ return(adapter.dispatch('date_add')(datepart, interval, column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__date_add(datepart, interval, column, default='INTERVAL') -%}
7 | dateadd({{ datepart }}, {{ interval }}, {{ column }} )
8 | {%- endmacro %}
9 |
10 | {% macro bigquery__date_add(datepart, interval, column, default='INTERVAL') -%}
11 | date_add({{ column }}, {{ default }} {{ interval }} {{ datepart }} )
12 | {%- endmacro %}
--------------------------------------------------------------------------------
/models/staging/dbt_utils_day.sql:
--------------------------------------------------------------------------------
1 | {% set my_query %}
2 | select cast({{current_timestamp()}} as date)
3 | {% endset %}
4 |
5 | {% if execute %}
6 | {% set today = run_query(my_query).columns[0].values()[0] %}
7 | {% set tomorrow = dateadd('day', 1, "'" ~ today ~ "'") %}
8 | {% set start_date = var('dbt_dag_monitoring')['dag_monitoring_start_date'] %}
9 | {% else %}
10 | {% set tomorrow = ' ' %}
11 | {% set start_date = ' ' %}
12 | {% endif %}
13 |
14 | {{ dbt_utils.date_spine(
15 | datepart="day",
16 | start_date=start_date,
17 | end_date=tomorrow
18 | )
19 | }}
--------------------------------------------------------------------------------
/macros/date_format.sql:
--------------------------------------------------------------------------------
1 | {% macro month_day(column, format='') -%}
2 | {{ return(adapter.dispatch('month_day')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro databricks__month_day(column, format='dd-MM') -%}
7 | date_format({{ column }}, '{{ format }}')
8 | {%- endmacro %}
9 |
10 | {% macro snowflake__month_day(column, format='dd-MM') -%}
11 | to_char(cast({{ column }} as date), '{{ format }}')
12 | {%- endmacro %}
13 |
14 |
15 | {% macro bigquery__month_day(column, format='%d-%m') -%}
16 | cast(parse_date('{{ format }}', cast({{ column }} as string)) as string)
17 | {%- endmacro %}
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #if something goes wrong, stop the script
4 | set -e
5 |
6 | # exports all variables in .env file. Any variable defined in the script will be available in the environment.
7 | set -a
8 |
9 | # Install virtualenv if not installed
10 | pip install virtualenv
11 |
12 | # Create a new virtual environment
13 | virtualenv env
14 |
15 | # It gives permission to activate the virtual environment
16 | chmod +x env/bin/activate
17 |
18 | # Activate the virtual environment
19 | source env/bin/activate # On Windows, use `env\Scripts\activate`
20 |
21 | # # Load the environment variables
22 | source .env
23 |
24 | # Install requirements
25 | pip install -r ./requirements.txt
26 |
27 | dbt deps
--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_dag_run_airflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('id') }} as dag_run_id
5 | , {{ cast_as_string('dag_id') }} as dag_id
6 | , {{ cast_as_date('start_date') }} as run_date
7 | , state as dag_state
8 | , external_trigger
9 | , start_date as execution_start_date
10 | , end_date as execution_end_date
11 | , {{ date_diff('second', 'start_date', 'end_date') }} as duration
12 | , run_type
13 | , {{ cast_as_string('run_id') }} as run_id
14 | from {{ source('raw_airflow_monitoring', 'dag_run') }}
15 | )
16 | select *
17 | from renamed
18 |
--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_task_fail_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select
4 | {{ cast_as_string('task_id') }} as task_fail_id
5 | , {{ cast_as_string('task_id') }} as task_id
6 | , {{ cast_as_string('dag_id') }} as dag_id
7 | , run_id
8 | , execution_date
9 | , execution_start_date
10 | , execution_end_date
11 | , duration
12 | , 'not_implemented_for_databricks_workflow' as map_index
13 | from {{ ref('stg_task_instance_databricks_workflow') }}
14 | where state_task_instance in ('MAXIMUM_CONCURRENT_RUNS_REACHED', 'CANCELED', 'FAILED', 'UPSTREAM_FAILED')
15 | )
16 | select *
17 | from renamed
18 |
--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_dag_run_adf.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('id') }} as dag_run_id
5 | , {{ cast_as_string('pipelineName') }} as dag_id
6 | , {{ cast_as_date('runStart') }} as run_date
7 | , status as dag_state
8 | , {{ cast_as_string('invokedBy') }} as external_trigger
9 | , runStart as execution_start_date
10 | , runEnd as execution_end_date
11 | , durationInMs / 1000 as duration
12 | , "not_implemented_by_adf" as run_type
13 | , {{ cast_as_string('runId') }} as run_id
14 | from {{ source('raw_adf_monitoring', 'adf_pipeline_runs') }}
15 | )
16 | select *
17 | from renamed
18 |
--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_task_fail_airflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('id') }} as task_fail_id
5 | , {{ cast_as_string('task_id') }} as task_id
6 | , {{ cast_as_string('dag_id') }} as dag_id
7 | , {{ cast_as_string('run_id') }} as run_id
8 | , {{ cast_as_date('start_date') }} as execution_date
9 | , start_date as execution_start_date
10 | , end_date as execution_end_date
11 | , duration
12 | , case
13 | when map_index = -1 then 'no mapping'
14 | end as map_index
15 | from {{ source('raw_airflow_monitoring', 'task_fail') }}
16 | )
17 | select *
18 | from renamed
19 |
--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_dag.sql:
--------------------------------------------------------------------------------
1 | with
2 | stg_dag as (
3 | {% for src in var('enabled_sources') -%}
4 | select
5 | dag_id
6 | , dag_name
7 | , dag_description
8 | , dag_frequency
9 | , timetable_description
10 | , is_paused
11 | , is_active
12 | , fileloc
13 | , owners
14 | , '{{ src }}' as source_system
15 | from
16 | {{ ref('stg_dag_' + src) }}
17 | {% if not loop.last -%} union {% endif -%}
18 | {% endfor -%}
19 | )
20 | , stg_dag_with_sk as (
21 | select
22 | {{ dbt_utils.generate_surrogate_key(['dag_id']) }} as dag_sk
23 | , *
24 | from stg_dag
25 | )
26 | select *
27 | from stg_dag_with_sk
--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_task_fail_adf.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('activityRunId') }} as task_fail_id
5 | , {{ cast_as_string('activityRunId') }} as task_id
6 | , {{ cast_as_string('pipelineName') }} as dag_id
7 | , pipelineRunId as run_id
8 | , {{ cast_as_date('activityRunStart') }} as execution_date
9 | , activityRunStart as execution_start_date
10 | , activityRunEnd as execution_end_date
11 | , durationInMs / 1000 as duration
12 | , "not_implemented_for_adf" as map_index
13 | from {{ source('raw_adf_monitoring', 'adf_activity_runs') }}
14 | where status in ('TimedOut', 'Cancelled', 'Failed')
15 | )
16 | select *
17 | from renamed
18 |
19 |
--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_dag_run_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select
4 | {{ cast_as_string('run_id') }} as dag_run_id
5 | , {{ cast_as_string('job_id') }} as dag_id
6 | , {{cast_as_timestamp('start_time')}} as run_date
7 | , {{replace_dot_for_colon('state','result_state')}} as dag_state
8 | , "trigger" as external_trigger
9 | , {{cast_as_timestamp('start_time')}} as execution_start_date
10 | , {{cast_as_timestamp('end_time')}} as execution_end_date
11 | , execution_duration / 1000 as duration
12 | , run_type
13 | , {{ cast_as_string('run_id') }} as run_id
14 | from {{ source('raw_databricks_workflow_monitoring', 'job_runs') }}
15 | )
16 | select *
17 | from renamed
18 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Operational System (please complete the following information):**
27 | - OS:
28 | - Version:
29 |
30 | **Do you use WSL? Which version? (please complete the following information):**
31 |
32 | **Additional context**
33 | Add any other context about the problem here.
--------------------------------------------------------------------------------
/macros/cast_as_string.sql:
--------------------------------------------------------------------------------
1 | {% macro cast_as_string(column) -%}
2 | {{ return(adapter.dispatch('cast_as_string')(column)) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro databricks__cast_as_string(column) -%}
7 | cast({{ column }} as string)
8 | {%- endmacro %}
9 |
10 | {% macro bigquery__cast_as_string(column) -%}
11 | {% if column == 'invokedBy' or column == 'properties.typeProperties.recurrence.schedule' -%}
12 | {{ column }}
13 | {% else -%}
14 | cast({{ column }} as string)
15 | {% endif -%}
16 | {%- endmacro %}
17 |
18 | {% macro snowflake__cast_as_string(column) -%}
19 | {% if column == 'null' -%}
20 | {{ column }}
21 | {% else -%}
22 | cast({{ column }} as string)
23 | {% endif -%}
24 | {%- endmacro %}
25 |
26 |
27 | {% macro redshift__cast_as_string(column) -%}
28 | cast({{ column }} as varchar)
29 | {%- endmacro %}
30 |
--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_airflow_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Enable adf sources in dbt_project.yml
4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
5 |
6 | # Disable databricks sources in dbt_project.yml
7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)false/\1true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
8 |
9 | # Enable adf models in dbt_project.yml
10 | sed -i '/adf_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 |
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 |
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Propose a feature request, new capability or improvement.
4 | title: ''
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
22 | ## Constraints and Assumptions
23 | Call out any constraint and/or assumption relevant for the development and use of this feature.
24 |
25 | ## Tests
26 | Describe here any new test requirement for this feature.
27 |
28 | ## References
29 |
--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_adf_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Enable adf sources in dbt_project.yml
4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
5 |
6 | # Disable databricks sources in dbt_project.yml
7 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
8 |
9 | # Enable adf models in dbt_project.yml
10 | sed -i '/adf_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 |
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 |
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['adf'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 |
--------------------------------------------------------------------------------
/models/marts/bridge_dag_monitoring.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: bridge_dag_monitoring
5 | description: "Bridge table used to make relationships between marts."
6 | columns:
7 | - name: 'generated_date'
8 | description: "Date of execution of the DAG."
9 |
10 | - name: dag_fk
11 | description: "Foreign key for each dag."
12 |
13 | - name: task_fk
14 | description: "Foreign key for each task."
15 |
16 | - name: dag_run_fk
17 | description: "Foreign key for fact_dag_monitoring_dag_run. Composed of: dag_run_id, execution_start_date and execution_end_date"
18 |
19 | - name: task_fail_fk
20 | description: "Foreign key for fact_dag_monitoring_task_fail. Composed of: task_fail_id, execution_end_date and execution_start_date"
21 |
22 | - name: task_instance_fk
23 | description: "Foreign key for fact_dag_monitoring_task_instance. Composed of: task_instance_id, execution_end_date, and execution_start_date"
--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Disable databricks sources in dbt_project.yml
4 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
5 |
6 | # Enable airflow sources in dbt_project.yml
7 | sed -i '/raw_airflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
8 |
9 | # Enable airflow models in dbt_project.yml
10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 |
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 |
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 |
--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_databricks_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Enable databricks sources in dbt_project.yml
4 | sed -i '/raw_databricks_workflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
5 |
6 | # Disable airflow sources in dbt_project.yml
7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
8 |
9 | # Disable airflow_workflow models in dbt_project.yml
10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 |
12 | # Enable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 |
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['databricks_workflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
--------------------------------------------------------------------------------
/catalog-dag-monitoring.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: backstage.io/v1alpha1
2 | kind: Component
3 | metadata:
4 | name: dbt_dag_monitoring
5 | description: |
6 | DAG Monitoring is a product designed to monitor orquestration metadata, formed by a tap, a dbt package and a Power BI dashboard.
7 | tags:
8 | - product
9 | - airflow
10 | - monitoring
11 | - powerbi
12 | links:
13 | - title: Wiki
14 | url: https://wiki.indicium.tech/en/central_dados/squad_produtos_horizontais/squad_produtos_horizontais/dag-monitoring
15 | - title: Repository
16 | url: https://github.com/techindicium/dbt-dag-monitoring
17 | annotations:
18 | indicium.tech/product-url: https://app.powerbi.com/groups/1c5de32c-67f7-493c-ad6d-1d1c574b98bb/reports/132e0228-08ba-4f24-b6c4-a4974414e4b8/ReportSection?experience=power-bi
19 | spec:
20 | title: DAG Monitoring
21 | team: Produtos Horizontais
22 | class: Técnico
23 | vertical: Monitoring
24 | businessUnit: TI
25 | interface: Dashboard
26 | language: Portuguese
27 | owner: Indicium
28 | type: product
29 | lifecycle: experimental
30 | system: public-websites
--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_dag_airflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('dag_id') }} as dag_id
5 | , {{ cast_as_string('dag_id') }} as dag_name
6 | , description as dag_description
7 | , case
8 | when timetable_description like '% hour, between %' then 'hourly'
9 | when timetable_description like 'Between %' then 'hourly'
10 | when timetable_description like '% on day % month' then 'monthly'
11 | when timetable_description like '% in %' then 'monthly'
12 | when timetable_description like '%:% on %' then 'weekly'
13 | when timetable_description like '%:%' then 'daily'
14 | else timetable_description
15 | end as dag_frequency
16 | , timetable_description
17 | , is_paused
18 | , is_active
19 | , fileloc
20 | , owners
21 | from {{ source('raw_airflow_monitoring', 'dag') }}
22 | )
23 | select *
24 | from renamed
25 |
--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_task.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: dim_dag_monitoring_task
5 | description: "dimensions table for Airflow tasks"
6 |
7 | columns:
8 | - name: task_sk
9 | description: "Surrogate key. Composed of: task_id and dag_id"
10 | tests:
11 | - unique
12 | - not_null
13 |
14 | - name: task_id
15 | description: "task id."
16 | tests:
17 | - not_null
18 |
19 | - name: dag_id
20 | description: "DAG id."
21 | tests:
22 | - not_null
23 |
24 | - name: map_index
25 | description: "Index for mapping."
26 |
27 | - name: hostname
28 | description: "Task hostname."
29 |
30 | - name: operator
31 | description: " Task operator model."
32 |
33 | - name: task_pool
34 | description: " Airflow's pool in which the task should be executed."
35 |
36 | - name: source_system
37 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
38 |
--------------------------------------------------------------------------------
/integration_tests/macros/seed__task_fail.sql:
--------------------------------------------------------------------------------
1 | {% macro seed__task_fail() %}
2 | {% set create_table %}
3 | CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_fail (
4 | id INT64,
5 | task_id STRING,
6 | dag_id STRING,
7 | start_date TIMESTAMP,
8 | end_date TIMESTAMP,
9 | duration INT64,
10 | map_index INT64,
11 | run_id STRING
12 | );
13 | {% endset %}
14 |
15 | {% set insert_table %}
16 |
17 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_fail (
18 | id, task_id, dag_id, start_date, end_date, duration, map_index, run_id
19 | ) VALUES
20 | (1, 'dbt_freshness', 'dbt_bitrix', TIMESTAMP('2023-12-27T02:30:16.714366Z'), TIMESTAMP('2023-12-21T09:01:57.631415Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00'),
21 | (2, 'dbt_source_test', 'dbt_bitrix', TIMESTAMP('2023-12-21T09:01:57.073097Z'), TIMESTAMP('2023-12-23T08:30:25.791135Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00');
22 |
23 | {% endset %}
24 |
25 | {% do run_query(create_table) %}
26 | {% do log("finished creating table task_fail", info=true) %}
27 |
28 | {% do run_query(insert_table) %}
29 | {% do log("finished insert table task_fail", info=true) %}
30 |
31 | {% endmacro %}
--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_dag_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select
4 | {{ cast_as_string('job_id') }} as dag_id
5 | , {{replace_dot_for_colon('settings','name')}} as dag_name
6 | , 'not_implemented_for_databricks_workflow' as dag_description
7 | , 'not_implemented_for_databricks_workflow' as dag_frequency
8 | , {{replace_dot_for_colon('settings','schedule.quartz_cron_expression')}} as timetable_description
9 | , case
10 | when {{replace_dot_for_colon('settings','schedule.pause_status')}} = 'PAUSED' then true
11 | else false
12 | end as is_paused
13 | , case
14 | when {{replace_dot_for_colon('settings','schedule.pause_status')}} != 'PAUSED' then true
15 | else false
16 | end as is_active
17 | , 'not_implemented_for_databricks_workflow' as fileloc
18 | , creator_user_name as owners
19 | , null as ind_extraction_date
20 | , {{replace_dot_for_colon('settings','schedule.pause_status')}} as pause_status
21 | from {{ source('raw_databricks_workflow_monitoring', 'jobs') }}
22 | )
23 | select *
24 | from renamed
25 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 | PR Checklist
7 |
8 | ### PR Structure
9 |
10 | - [ ] This PR has reasonably narrow scope (if not, break it down into smaller PRs).
11 | - [ ] This PR avoids mixing refactoring changes with feature changes (split into two PRs
12 | otherwise).
13 |
14 | ### Thoroughness
15 |
16 | - [ ] This PR adds tests for the most critical parts of the new functionality or fixes.
17 | - [ ] I've updated the docs and README with the added features, breaking changes, new instructions on how to use the repository.
18 |
19 | ### Release planning
20 |
21 | - [ ] I've decided if this PR requires a new major/minor/patch version accordingly to
22 | [semver](https://semver.org/), and I've changed the name of the BRANCH to release/* , feature/* or patch/* .
23 |
24 |
25 | ### What
26 |
27 | [TODO: Short statement about what is changing.]
28 |
29 | ### Why
30 |
31 | [TODO: Why this change is being made. Include any context required to understand the why.]
32 |
33 | ### Known limitations
34 |
35 | [TODO or N/A]
--------------------------------------------------------------------------------
/integration_tests/profiles.yml:
--------------------------------------------------------------------------------
1 | dbt_dag_monitoring_integration_tests:
2 | target: '{{ env_var(''DBT_DEFAULT_TARGET'', ''databricks'')}}'
3 | outputs:
4 | databricks:
5 | ansi_mode: false
6 | catalog: '{{ env_var(''DEV_CATALOG_NAME'')}}'
7 | host: '{{ env_var(''DEV_HOST'') }}'
8 | http_path: '{{ env_var(''DEV_HTTP_PATH'') }}'
9 | schema: '{{ env_var(''DEV_SCHEMA_NAME'')}}'
10 | threads: 16
11 | token: '{{ env_var(''DEV_TOKEN'') }}'
12 | type: databricks
13 |
14 | bigquery:
15 | dataset: "{{ env_var('BIGQUERY_DATASET') }}"
16 | project: "{{ env_var('BIGQUERY_PROJECT') }}"
17 | job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}"
18 | threads: "{{ env_var('DBT_THREADS') | int }}"
19 | job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}"
20 | method: oauth
21 | location: us
22 | priority: interactive
23 | type: bigquery
24 |
25 | snowflake:
26 | type: "snowflake"
27 | account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
28 | user: "{{ env_var('SNOWFLAKE_USER') }}"
29 | password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
30 | role: "{{ env_var('SNOWFLAKE_ROLE') }}"
31 | database: "{{ env_var('SNOWFLAKE_DATABASE') }}"
32 | warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
33 | schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
34 | threads: 10
35 |
--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_dag.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: dim_dag_monitoring_dag
5 | description: "Dimension table for Airflow DAGs"
6 | columns:
7 | - name: 'dag_sk'
8 | description: "Surrogate key. Composed of: dag_id"
9 | tests:
10 | - unique
11 | - not_null
12 |
13 | - name: dag_id
14 | description: "Source table ID."
15 | tests:
16 | - unique
17 | - not_null
18 |
19 | - name: dag_name
20 | description: "Descriptive name for DAG."
21 |
22 | - name: dag_description
23 | description: "Description about the DAG."
24 |
25 | - name: dag_frequency
26 | description: "DAG frequency for execution."
27 |
28 | - name: timetable_description
29 | description: "DAGs execution scheduling."
30 |
31 | - name: is_paused
32 | description: "Is the DAG paused."
33 |
34 | - name: is_active
35 | description: "Is the DAG active."
36 |
37 | - name: fileloc
38 | description: "path to file that needs to be imported to load this DAG. `source_code` in source: 11"
39 |
40 | - name: owners
41 | description: "DAG owner."
42 |
43 | - name: source_system
44 | description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow"
45 |
--------------------------------------------------------------------------------
/profiles.yml:
--------------------------------------------------------------------------------
1 | dbt_dag_monitoring:
2 | target: "{{ env_var('DBT_DEFAULT_TARGET', 'databricks')}}"
3 | outputs:
4 | databricks:
5 | type: databricks
6 | catalog: "{{ env_var('DEV_CATALOG_NAME')}}"
7 | schema: "{{ env_var('DEV_SCHEMA_NAME')}}"
8 | host: "{{ env_var('DEV_HOST') }}"
9 | http_path: "{{ env_var('DEV_HTTP_PATH') }}"
10 | token: "{{ env_var('DEV_TOKEN') }}"
11 | threads: 16
12 | ansi_mode: false
13 |
14 | bigquery:
15 | dataset: "{{ env_var('BIGQUERY_DATASET') }}"
16 | project: "{{ env_var('BIGQUERY_PROJECT') }}"
17 | job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}"
18 | threads: "{{ env_var('DBT_THREADS') | int }}"
19 | job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}"
20 | method: oauth
21 | location: us
22 | priority: interactive
23 | type: bigquery
24 |
25 | snowflake:
26 | type: "snowflake"
27 | account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
28 | user: "{{ env_var('SNOWFLAKE_USER') }}"
29 | password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
30 | role: "{{ env_var('SNOWFLAKE_ROLE') }}"
31 | database: "{{ env_var('SNOWFLAKE_DATABASE') }}"
32 | warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
33 | schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
34 | threads: 10
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_dag_run.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: fact_dag_monitoring_dag_run
5 | description: "Events table of Airflow DAG executions"
6 | columns:
7 | - name: 'dag_run_sk'
8 | description: "Surrogate key. Composed of: dag_run_id, execution_start_date and execution_end_date"
9 | tests:
10 | - unique
11 | - not_null
12 |
13 | - name: dag_fk
14 | description: "DAGs ids."
15 | tests:
16 | - relationships:
17 | to: ref('dim_dag_monitoring_dag')
18 | field: dag_sk
19 |
20 | - name: generated_date
21 | description: "date of execution of the DAG."
22 | tests:
23 | - relationships:
24 | to: ref('dbt_utils_day')
25 | field: date_day
26 |
27 | - name: external_trigger
28 | description: "Points out if the DAG execution was triggered externally (True / False)."
29 |
30 | - name: execution_start_date
31 | description: "Data e hora em que iniciou a execução da DAG."
32 |
33 | - name: execution_end_date
34 | description: "Date and hour when the DAG execution ended."
35 |
36 | - name: run_type
37 | description: "Type of execution of the DAG."
38 |
39 | - name: duration
40 | description: "Execution time in seconds."
41 |
42 | - name: source_system
43 | description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
44 |
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_fail.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: fact_dag_monitoring_task_fail
5 | description: "Events table of Airflow tasks failures"
6 | columns:
7 | - name: task_fail_sk
8 | description: "Surrogate key. Composed of: task_fail_id, execution_end_date and execution_start_date"
9 | tests:
10 | - unique
11 | - not_null
12 |
13 | - name: task_fk
14 | description: "Unique identifier of the task execution."
15 | tests:
16 | - relationships:
17 | to: ref('dim_dag_monitoring_task')
18 | field: task_sk
19 |
20 | - name: dag_fk
21 | description: "Id of the DAG."
22 | tests:
23 | - relationships:
24 | to: ref('dim_dag_monitoring_dag')
25 | field: dag_sk
26 |
27 | - name: generated_date
28 | description: "Date of execution of the DAG."
29 | tests:
30 | - relationships:
31 | to: ref('dbt_utils_day')
32 | field: date_day
33 |
34 | - name: execution_start_date
35 | description: " Date and hour when the DAG execution started."
36 |
37 | - name: execution_end_date
38 | description: "Date and hour when the DAG execution ended."
39 |
40 | - name: duration
41 | description: "Duration of the execution in seconds."
42 |
43 | - name: source_system
44 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
45 |
--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_task_instance_airflow.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('task_id') }} as task_id
5 | , {{ cast_as_string('dag_id') }} as dag_id
6 | , {{ cast_as_string('run_id') }} as run_id
7 | , {{ cast_as_date('start_date') }} as execution_date
8 | , start_date as execution_start_date
9 | , end_date as execution_end_date
10 | , duration
11 | , state as state_task_instance
12 | , try_number
13 | , hostname
14 | , pool as task_pool
15 | , priority_weight
16 | , operator
17 | , case
18 | when map_index = -1 then 'no mapping'
19 | end as map_index
20 | from {{ source('raw_airflow_monitoring', 'task_instance') }}
21 | )
22 | , created_id as (
23 | /*Table does not have a unique identifier, the id was created as the unique identification of records*/
24 | select
25 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
26 | , task_id
27 | , dag_id
28 | , run_id
29 | , execution_date
30 | , execution_start_date
31 | , execution_end_date
32 | , duration
33 | , state_task_instance
34 | , try_number
35 | , hostname
36 | , task_pool
37 | , priority_weight
38 | , operator
39 | , map_index
40 | from renamed
41 | )
42 | select *
43 | from created_id
44 |
--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_task_instance_adf.sql:
--------------------------------------------------------------------------------
1 | with
2 | renamed as (
3 | select distinct
4 | {{ cast_as_string('activityRunId') }} as task_id
5 | , {{ cast_as_string('pipelineName') }} as dag_id
6 | , {{ cast_as_string('pipelineRunId') }} as run_id
7 | , {{ cast_as_date('activityRunStart') }} as execution_date
8 | , activityRunStart as execution_start_date
9 | , activityRunEnd as execution_end_date
10 | , durationInMs / 1000 as duration
11 | , status as state_task_instance
12 | , retryAttempt as try_number
13 | , "not_implemented_for_adf" as hostname
14 | , "not_implemented_for_adf" as task_pool
15 | , "not_implemented_for_adf" as priority_weight
16 | , activityName as operator
17 | , "not_implemented_for_adf" as map_index
18 | from {{ source('raw_adf_monitoring', 'adf_activity_runs') }}
19 | )
20 | , created_id as (
21 | /*Im not sure this is necessary for adf*/
22 | select
23 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
24 | , task_id
25 | , dag_id
26 | , run_id
27 | , execution_date
28 | , execution_start_date
29 | , execution_end_date
30 | , duration
31 | , state_task_instance
32 | , try_number
33 | , hostname
34 | , task_pool
35 | , priority_weight
36 | , operator
37 | , map_index
38 | from renamed
39 | )
40 | select *
41 | from created_id
42 |
--------------------------------------------------------------------------------
/macros/day_of_week.sql:
--------------------------------------------------------------------------------
1 | {% macro day_of_week(column) %}
2 | {{ return(adapter.dispatch('day_of_week')(column)) }}
3 | {%- endmacro %}
4 |
5 | {% macro databricks__day_of_week(column) %}
6 | case
7 | when {{ column }} = 1 then 'Sunday'
8 | when {{ column }} = 2 then 'Monday'
9 | when {{ column }} = 3 then 'Tuesday'
10 | when {{ column }} = 4 then 'Wednesday'
11 | when {{ column }} = 5 then 'Thursday'
12 | when {{ column }} = 6 then 'Friday'
13 | when {{ column }} = 7 then 'Saturday'
14 | end as name_of_day
15 | {% endmacro %}
16 |
17 | {% macro snowflake__day_of_week(column) %}
18 | case
19 | when {{ column }} = 0 then 'Sunday'
20 | when {{ column }} = 1 then 'Monday'
21 | when {{ column }} = 2 then 'Tuesday'
22 | when {{ column }} = 3 then 'Wednesday'
23 | when {{ column }} = 4 then 'Thursday'
24 | when {{ column }} = 5 then 'Friday'
25 | when {{ column }} = 6 then 'Saturday'
26 | end as name_of_day
27 | {% endmacro %}
28 |
29 | {% macro bigquery__day_of_week(column) %}
30 | case
31 | when {{ column }} = 1 then 'Sunday'
32 | when {{ column }} = 2 then 'Monday'
33 | when {{ column }} = 3 then 'Tuesday'
34 | when {{ column }} = 4 then 'Wednesday'
35 | when {{ column }} = 5 then 'Thursday'
36 | when {{ column }} = 6 then 'Friday'
37 | when {{ column }} = 7 then 'Saturday'
38 | end as name_of_day
39 | {% endmacro %}
--------------------------------------------------------------------------------
/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'dbt_dag_monitoring_integration_tests'
2 | version: '0.2.0'
3 |
4 | require-dbt-version: [">=1.0.0", "<2.0.0"]
5 |
6 | config-version: 2
7 |
8 | target-path: "target"
9 | clean-targets: ["target", "dbt_modules", "dbt_packages"]
10 | macro-paths: ["macros"]
11 | log-path: "logs"
12 | seed-paths: ["seeds"]
13 |
14 | profile: dbt_dag_monitoring_integration_tests
15 |
16 | dispatch:
17 | - macro_namespace: 'dbt_utils'
18 | search_order: ['dbt_utils_integration_tests', 'dbt_utils']
19 |
20 | sources:
21 | dbt_dag_monitoring:
22 | staging:
23 | adf_sources:
24 | raw_adf_monitoring:
25 | +enabled: false
26 | databricks_workflow_sources:
27 | raw_databricks_workflow_monitoring:
28 | +enabled: true
29 | airflow_sources:
30 | raw_airflow_monitoring:
31 | +enabled: false
32 |
33 | models:
34 | dbt_dag_monitoring:
35 | marts:
36 | +materialized: table
37 | staging:
38 | adf_sources:
39 | +enabled: false
40 | airflow_sources:
41 | +enabled: false
42 | databricks_workflow_sources:
43 | +enabled: true
44 | +materialized: view
45 |
46 | vars:
47 | dbt_dag_monitoring:
48 | enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
49 | dag_monitoring_start_date: cast('2023-01-01' as date)
50 | dag_monitoring_airflow_database: cdi_dev
51 | dag_monitoring_airflow_schema: ci_dbt_dag_monitoring
52 | dag_monitoring_databricks_database: cdi_dev
53 | dag_monitoring_databricks_schema: ci_dbt_dag_monitoring
54 | dag_monitoring_adf_database: cdi_dev
55 | dag_monitoring_adf_schema: ci_dbt_dag_monitoring
--------------------------------------------------------------------------------
/models/marts/bridge_dag_monitoring.sql:
--------------------------------------------------------------------------------
1 | with
2 | fact_dag_run as (
3 | select
4 | dag_run_sk
5 | , dag_fk
6 | , generated_date
7 | from {{ ref('fact_dag_monitoring_dag_run') }}
8 | )
9 | , fact_task_fail as (
10 | select
11 | task_fail_sk
12 | , dag_fk
13 | , task_fk
14 | , generated_date
15 | from {{ ref('fact_dag_monitoring_task_fail') }}
16 | )
17 | , fact_task_instance as (
18 | select
19 | task_instance_sk
20 | , dag_fk
21 | , task_fk
22 | , generated_date
23 | from {{ ref('fact_dag_monitoring_task_instance') }}
24 | )
25 | , bridge as (
26 | select
27 | coalesce(fact_task_instance.generated_date, fact_dag_run.generated_date, fact_task_fail.generated_date) as generated_date
28 | , coalesce(fact_dag_run.dag_fk, fact_task_fail.dag_fk, fact_task_instance.dag_fk) as dag_fk
29 | , coalesce(fact_task_instance.task_fk, fact_task_fail.task_fk) as task_fk
30 | , fact_dag_run.dag_run_sk as dag_run_fk
31 | , fact_task_fail.task_fail_sk as task_fail_fk
32 | , fact_task_instance.task_instance_sk as task_instance_fk
33 | from fact_task_instance
34 | full outer join fact_task_fail
35 | on fact_task_instance.task_fk = fact_task_fail.task_fk
36 | and fact_task_instance.generated_date = fact_task_fail.generated_date
37 | full outer join fact_dag_run
38 | on coalesce(fact_task_instance.dag_fk, fact_task_fail.dag_fk) = fact_dag_run.dag_fk
39 | and fact_task_instance.generated_date = fact_dag_run.generated_date
40 | )
41 | select *
42 | from bridge
--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'dbt_dag_monitoring'
2 | version: '0.2.0'
3 |
4 | require-dbt-version: [">=1.3.0", "<2.0.0"]
5 |
6 | config-version: 2
7 |
8 | target-path: "target"
9 | clean-targets: ["target", "dbt_modules", "dbt_packages"]
10 | macro-paths: ["macros"]
11 | log-path: "logs"
12 | seed-paths: ["seeds"]
13 |
14 | profile: dbt_dag_monitoring
15 |
16 | # When using it for testing purposes, you can take out all the comments below and set to true only the sources, models and vars you want to test
17 |
18 | # sources:
19 | # dbt_dag_monitoring:
20 | # staging:
21 | # adf_sources:
22 | # raw_adf_monitoring:
23 | # +enabled: false
24 | # databricks_workflow_sources:
25 | # raw_databricks_workflow_monitoring:
26 | # +enabled: true
27 | # airflow_sources:
28 | # raw_airflow_monitoring:
29 | # +enabled: false
30 |
31 | # models:
32 | # dbt_dag_monitoring:
33 | # marts:
34 | # +materialized: table
35 | # staging:
36 | # adf_sources:
37 | # +enabled: false
38 | # airflow_sources:
39 | # +enabled: false
40 | # databricks_workflow_sources:
41 | # +enabled: true
42 | # +materialized: view
43 |
44 | # Only one type of enabled sources can be turned on at a time
45 |
46 | # vars:
47 | # dbt_dag_monitoring:
48 | # enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
49 | # dag_monitoring_start_date: cast('2023-01-01' as date)
50 | # dag_monitoring_airflow_database: cdi_dev
51 | # dag_monitoring_airflow_schema: ci_dbt_dag_monitoring
52 | # dag_monitoring_databricks_database: cdi_dev
53 | # dag_monitoring_databricks_schema: ci_dbt_dag_monitoring
54 | # dag_monitoring_adf_database: cdi_dev
55 | # dag_monitoring_adf_schema: ci_dbt_dag_monitoring
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_instance.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: fact_dag_monitoring_task_instance
5 | description: "Events table of Airflow task instances"
6 | columns:
7 | - name: task_instance_sk
8 | description: "Surrogate key. Composed of: task_instance_id, execution_end_date, and execution_start_date"
9 | tests:
10 | - unique
11 | - not_null
12 |
13 | - name: task_fk
14 | description: "Id of the tasks"
15 | tests:
16 | - relationships:
17 | to: ref('dim_dag_monitoring_task')
18 | field: task_sk
19 |
20 | - name: dag_fk
21 | description: "Id of the DAG."
22 | tests:
23 | - relationships:
24 | to: ref('dim_dag_monitoring_dag')
25 | field: dag_sk
26 |
27 | - name: generated_date
28 | description: "Date of execution of the DAG."
29 | tests:
30 | - relationships:
31 | to: ref('dbt_utils_day')
32 | field: date_day
33 |
34 | - name: execution_start_date
35 | description: " Date and hour when the DAG execution started."
36 |
37 | - name: execution_end_date
38 | description: "Date and hour when the DAG execution ended."
39 |
40 | - name: duration
41 | description: "Duration of the execution in seconds."
42 |
43 | - name: state_task_instance
44 | description: "The state of the task execution."
45 |
46 | - name: try_number
47 | description: "The number of attempts to execute."
48 |
49 | - name: priority_weight
50 | description: "Task priority."
51 |
52 | - name: source_system
53 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
54 |
--------------------------------------------------------------------------------
/integration_tests/README.md:
--------------------------------------------------------------------------------
1 | > [!WARNING]
2 | > ADF source and models were NOT tested in Snowflake connection!
3 |
4 | This README is about the integration tests step inside the ci.yml.
5 |
6 | Integration tests work in a similar way to how an user can reference the dbt-dag-monitoring in their project. That is how we start it, running
7 | dbt deps in the packages: local: ../
8 | In that way, we are pulling dbt-dag-monitoring.
9 |
10 | As we are simulating the use of the project somewhere else, when checking dbt_project.yml inside the integration_tests folder, we can see that we configured the sources, the models, and vars of it. Those settings are crucial to run the project.
11 |
12 | When running the continuous integration in the Github actions, where the integration tests are actually analyzed, we are using the dbt_project.yml inside the integration_tests folder as reference.
13 |
14 | By looking at the profiles.yml folder, we can see that we use 3 connections: Databricks, BigQuery and Snowflake. Whatever modifications are done in the project, it must pass successfully in the three data warehouses to be accepted to merge.
15 |
16 | When testing new features in the project, the user can save time by having credentials at each DW to test the changes locally, before passing them to the pull request, due to the fact that each commit that is analyzed by the CI, takes 7 minutes minimum to run.
17 |
18 | In case you do not find the schema to observe on a DW, it is because of the schema creation and schema deletion just after the CI is done for that specific DW.
19 | To clarify how the integration test functions in the continuous integration, we can take a look at the diagram below:
20 |
21 |
22 |
23 | > [!NOTE]
24 | > Databricks works as a DW and as a source for the models.
25 |
26 | As you can see on the image above, the .sh files are used to give a transition between an origin and a destination source.
27 |
28 |
29 | As we have pattern on the sources of making the transitions between Databricks to ADF to Airflow, we needed a new shell file that is the “change_dbt_project_from_databricks_to_airflow.sh” to make the last transition Databricks to Airflow, as ADF was not tested on Snowflake.
30 |
31 |
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_dag_run.sql:
--------------------------------------------------------------------------------
1 | with
2 | dim_dag as (
3 | select
4 | dag_id
5 | , dag_sk as dag_fk
6 | from {{ ref('dim_dag_monitoring_dag') }}
7 | )
8 | , util_days as (
9 | select cast(date_day as date) as date_day
10 | from {{ ref('dbt_utils_day') }}
11 | )
12 | , stg_dag_run as (
13 | {% for src in var('enabled_sources') -%}
14 | select
15 | dag_run_id
16 | , dag_id
17 | , run_id
18 | , run_date
19 | , execution_start_date
20 | , execution_end_date
21 | , duration
22 | , dag_state
23 | , external_trigger
24 | , run_type
25 | , '{{ src }}' as source_system
26 | from {{ ref('stg_dag_run_' + src) }}
27 | {% if not loop.last -%} union {% endif -%}
28 | {% endfor -%}
29 | )
30 | , joined as (
31 | select
32 | stg_dag_run.dag_run_id
33 | , dim_dag.dag_fk
34 | , dim_dag.dag_id
35 | , stg_dag_run.run_id
36 | , util_days.date_day
37 | , stg_dag_run.execution_start_date
38 | , stg_dag_run.execution_end_date
39 | , stg_dag_run.dag_state
40 | , stg_dag_run.external_trigger
41 | , stg_dag_run.run_type
42 | , stg_dag_run.duration
43 | , stg_dag_run.source_system
44 | from stg_dag_run
45 | left join dim_dag on stg_dag_run.dag_id = dim_dag.dag_id
46 | left join util_days on {{ cast_as_date('stg_dag_run.run_date') }} = {{ cast_as_date('util_days.date_day') }}
47 | )
48 | , joined_with_sk as (
49 | select
50 | {{ dbt_utils.generate_surrogate_key([
51 | 'dag_run_id'
52 | , 'execution_start_date'
53 | , 'execution_end_date'
54 | , 'run_id']) }} as dag_run_sk
55 | , dag_fk
56 | , date_day as generated_date
57 | , execution_start_date
58 | , execution_end_date
59 | , dag_state
60 | , external_trigger
61 | , run_type
62 | , duration
63 | , source_system
64 | from joined
65 | )
66 | select *
67 | from joined_with_sk
68 |
--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/source.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: raw_databricks_workflow_monitoring
5 | description: "Raw data from databricks for databricks monitoring analysis."
6 | database: "{{ var('dag_monitoring_databricks_database', '')}}"
7 | schema: "{{ var('dag_monitoring_databricks_schema', raw_databricks_workflow_monitoring) }}"
8 | tables:
9 | - name: jobs
10 | description: " Table that contains information about databricks jobs(DAGs)."
11 | columns:
12 | - name: job_id
13 | description: "Table unique identifier."
14 | tests:
15 | - not_null
16 | - unique
17 |
18 | - name: settings
19 | description: '{{ doc("settings_doc") }}'
20 |
21 | - name: created_time
22 | description: "Timestamp of the job creation"
23 |
24 | - name: creator_user_name
25 | description: "E-mail of the job creator"
26 |
27 | - name: ind_extraction_date
28 | description: "Date of extraction of the table"
29 |
30 | - name: job_runs
31 | description: "Table that contains the execution data of the Databricks pipelines(jobs)"
32 | columns:
33 | - name: run_id
34 | description: "Unique identifier of the table. Job run id"
35 | tests:
36 | - not_null
37 | - unique
38 |
39 | - name: job_id
40 | description: "Job id."
41 |
42 | - name: state
43 | description: '{{ doc("state_doc") }}'
44 |
45 | - name: trigger
46 | description: "It tells how the pipeline execution was triggered."
47 |
48 | - name: start_time
49 | description: "Timestamp when the DAG execution started."
50 |
51 | - name: end_time
52 | description: "Timestamp when the DAG execution ended."
53 |
54 | - name: execution_duration
55 | description: "Duration of DAG in milliseconds."
56 |
57 | - name: run_type
58 | description: "Type of execution"
59 |
60 | - name: tasks
61 | description: '{{ doc("tasks_doc") }}'
62 |
63 | - name: dbt_utils_day
64 | description: "Table that contains data of the dates created by the dbt_utils macro."
65 |
--------------------------------------------------------------------------------
/integration_tests/macros/adf_activity_runs.sql:
--------------------------------------------------------------------------------
1 | {% macro adf_activity_runs() %}
2 | {% set create_table %}
3 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_activity_runs (
4 | activityRunEnd TIMESTAMP,
5 | activityName STRING,
6 | activityRunStart TIMESTAMP,
7 | activityType STRING,
8 | durationInMs INT,
9 | retryAttempt INT,
10 | error_errorCode STRING,
11 | error_message STRING,
12 | error_failureType STRING,
13 | error_target STRING,
14 | activityRunId STRING,
15 | linkedServiceName STRING,
16 | pipelineName STRING,
17 | pipelineRunId STRING,
18 | status STRING,
19 | output_effectiveIntegrationRuntime STRING,
20 | input_source_type STRING
21 | );
22 | {% endset %}
23 |
24 | {% set insert_table %}
25 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_activity_runs VALUES
26 | (
27 | CAST('2024-08-20T03:30:10.973554Z' AS TIMESTAMP),
28 | CAST('Set CurrentDate' AS STRING),
29 | CAST('2024-08-20T03:30:10.742531Z' AS TIMESTAMP),
30 | CAST('SetVariable' AS STRING),
31 | CAST(231 AS INT),
32 | NULL,
33 | NULL,
34 | NULL,
35 | NULL,
36 | CAST('Set CurrentDate' AS STRING),
37 | CAST('f653c43a-6508-42f8-8467-0e10152aa3f9' AS STRING),
38 | NULL,
39 | CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING),
40 | CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING),
41 | CAST('Succeeded' AS STRING),
42 | NULL,
43 | NULL
44 | ),
45 | (
46 | CAST('2024-08-20T03:30:11.538784Z' AS TIMESTAMP),
47 | CAST('Set Timestamp' AS STRING),
48 | CAST('2024-08-20T03:30:11.274576Z' AS TIMESTAMP),
49 | CAST('SetVariable' AS STRING),
50 | CAST(264 AS INT),
51 | NULL,
52 | NULL,
53 | NULL,
54 | NULL,
55 | CAST('Set Timestamp' AS STRING),
56 | CAST('b8c48c2f-b0e6-45f0-a502-cee31dffba2e' AS STRING),
57 | NULL,
58 | CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING),
59 | CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING),
60 | CAST('Succeeded' AS STRING),
61 | NULL,
62 | NULL
63 | );
64 |
65 | {% endset %}
66 |
67 | {% do run_query(create_table) %}
68 | {% do log("finished creating table adf_activity_runs", info=true) %}
69 |
70 | {% do run_query(insert_table) %}
71 | {% do log("finished insert table adf_activity_runs", info=true) %}
72 | {% endmacro %}
--------------------------------------------------------------------------------
/integration_tests/macros/seed__dag.sql:
--------------------------------------------------------------------------------
1 | {% macro seed__dag() -%}
2 | {{ return(adapter.dispatch('seed__dag')()) }}
3 | {%- endmacro %}
4 |
5 | {% macro default__seed__dag() %}
6 | {% set create_table %}
7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag (
8 | dag_id STRING,
9 | is_paused BOOLEAN,
10 | is_subdag BOOLEAN,
11 | is_active BOOLEAN,
12 | last_parsed_time TIMESTAMP,
13 | last_pickled TIMESTAMP,
14 | last_expired TIMESTAMP,
15 | scheduler_lock STRING,
16 | pickle_id INT64,
17 | fileloc STRING,
18 | owners STRING,
19 | description STRING,
20 | default_view STRING,
21 | schedule_interval STRING,
22 | root_dag_id STRING,
23 | next_dagrun TIMESTAMP,
24 | next_dagrun_create_after TIMESTAMP,
25 | max_active_tasks INT64,
26 | has_task_concurrency_limits BOOLEAN,
27 | max_active_runs INT64,
28 | next_dagrun_data_interval_start TIMESTAMP,
29 | next_dagrun_data_interval_end TIMESTAMP,
30 | has_import_errors BOOLEAN,
31 | timetable_description STRING,
32 | processor_subdir STRING
33 | );
34 | {% endset %}
35 |
36 | {% set insert_table %}
37 |
38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag VALUES
39 | (
40 | 'test_docker', false, false, false, TIMESTAMP('2022-12-16 09:35:19.433-03'), NULL, NULL, '', NULL,
41 | '/opt/airflow/dags/repo/airflow/dags/test_dag.py', 'Indicium', '', 'grid', '0 6 * * *', '',
42 | TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), 16, false, 16,
43 | TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), false, 'At 06:00', ''
44 | ),
45 | (
46 | 'dbt', true, false, false, TIMESTAMP('2022-11-25 16:12:51.922-03'), NULL, NULL, '', NULL,
47 | '/opt/airflow/dags/repo/airflow/dags/all_dags.py', 'airflow', '', 'grid', '7/15 9-23 * * *', '',
48 | TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), 16, false, 1,
49 | TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), true,
50 | 'Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59', ''
51 | );
52 |
53 | {% endset %}
54 |
55 | {% do run_query(create_table) %}
56 | {% do log("finished creating table dag", info=true) %}
57 |
58 | {% do run_query(insert_table) %}
59 | {% do log("finished insert table dag", info=true) %}
60 |
61 |
62 | {% endmacro %}
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_fail.sql:
--------------------------------------------------------------------------------
1 | with
2 | dim_dag as (
3 | select
4 | dag_id
5 | , dag_sk as dag_fk
6 | from {{ ref('dim_dag_monitoring_dag') }}
7 | )
8 | , dim_task as (
9 | select
10 | task_sk as task_fk
11 | , task_id
12 | , dag_id
13 | from {{ ref('dim_dag_monitoring_task') }}
14 | )
15 | , util_days as (
16 | select cast(date_day as date) as date_day
17 | from {{ ref('dbt_utils_day') }}
18 | )
19 | , stg_task_fail as (
20 | {% for src in var('enabled_sources') -%}
21 | select
22 | task_fail_id
23 | , task_id
24 | , dag_id
25 | , run_id
26 | , execution_start_date
27 | , execution_end_date
28 | , duration
29 | , execution_date
30 | , map_index
31 | , '{{ src }}' as source_system
32 | from {{ ref('stg_task_fail_' + src) }}
33 | {% if not loop.last -%} union {% endif -%}
34 | {% endfor -%}
35 | )
36 | , joined as (
37 | select
38 | stg_task_fail.task_fail_id
39 | , stg_task_fail.task_id
40 | , dim_dag.dag_id
41 | , stg_task_fail.run_id
42 | , dim_dag.dag_fk
43 | , dim_task.task_fk
44 | , util_days.date_day
45 | , stg_task_fail.execution_start_date
46 | , stg_task_fail.execution_end_date
47 | , stg_task_fail.duration
48 | , stg_task_fail.source_system
49 | from stg_task_fail
50 | left join dim_dag on stg_task_fail.dag_id = dim_dag.dag_id
51 | left join dim_task on
52 | stg_task_fail.task_id = dim_task.task_id
53 | and stg_task_fail.dag_id = dim_task.dag_id
54 | left join util_days on {{ cast_as_date('stg_task_fail.execution_date') }} = {{ cast_as_date('util_days.date_day') }}
55 | )
56 | , surrogate_key as (
57 | select
58 | {{ dbt_utils.generate_surrogate_key([
59 | 'task_fail_id'
60 | , 'execution_start_date'
61 | , 'execution_end_date'
62 | , 'run_id']
63 | ) }} as task_fail_sk
64 | , dag_fk
65 | , task_fk
66 | , date_day as generated_date
67 | , execution_start_date
68 | , execution_end_date
69 | , duration
70 | , source_system
71 | from joined
72 | )
73 | select *
74 | from surrogate_key
75 |
--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/task_instance.csv:
--------------------------------------------------------------------------------
1 | "task_id","dag_id","run_id","start_date","end_date","duration","state","try_number","hostname","unixname","job_id","pool","queue","priority_weight","operator","queued_dttm","pid","max_tries","executor_config","pool_slots","queued_by_job_id","external_executor_id","trigger_id","trigger_timeout","next_method","next_kwargs","map_index","updated_at"
2 | dbt_source_test,dbt_bitrix,scheduled__2023-01-12T06:00:00+00:00,2023-01-13 03:01:07.644 -0300,2023-01-13 03:01:27.852 -0300,20.207217,success,1,dbtbitrixdbtsourcetest-32bc3af501374e48913fad10b54fdd67,root,332,default_pool,default,17,DockerOperator,2023-01-13 03:00:58.037 -0300,21,2,�\u0004}�.,1,201,,,,,"",-1,
3 | dump_table1_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.688 -0300,2022-11-25 15:58:28.388 -0300,0.700336,success,1,samplefistdumptable1todl-0d85d3eca2b14a58b822dbb5f5c21bec,root,20,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
4 | copy_table4_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
5 | dump_table3_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.252 -0300,2022-11-25 15:58:27.923 -0300,0.670813,success,1,samplefistdumptable3todl-dfadd4af7fde472593ee7c824e6ca2ae,root,18,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
6 | dump_table5_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:59:17.480 -0300,2022-11-25 15:59:18.020 -0300,0.540106,success,1,samplefistdumptable5todl-54e2543a9e694a63b55e112e99c2053d,root,22,default_pool,default,1,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
7 | copy_table3_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
8 | dump_table4_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.540 -0300,2022-11-25 15:58:28.263 -0300,0.722546,success,1,samplefistdumptable4todl-de6d057adeeb4f4b94b777491f5e3611,root,19,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,20,1,�\u0004}�.,1,7,,,,,"",-1,
9 | copy_table1_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
10 | delay,sample_fist,scheduled__2022-02-02T00:00:00+00:00,2022-11-25 15:55:20.412 -0300,2022-11-25 15:55:20.412 -0300,0.0,success,0,"",root,,default_pool,default,10,DummyOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
11 |
--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_dag_adf.sql:
--------------------------------------------------------------------------------
1 | with exploded_by_pipeline as (
2 | select
3 | *
4 | from
5 | {{ source('raw_adf_monitoring', 'adf_triggers') }}
6 | {{ flatten_data('properties.pipelines') }} as pipelines
7 | ),
8 |
9 | triggers_renamed as (
10 | select
11 | id as trigger_id
12 | , case
13 | when properties.typeProperties.recurrence.frequency = 'Hour' then 'hourly'
14 | when properties.typeProperties.recurrence.frequency = 'Day' then 'daily'
15 | when properties.typeProperties.recurrence.frequency = 'Week' then 'weekly'
16 | when properties.typeProperties.recurrence.frequency = 'Month' then 'monthly'
17 | when properties.typeProperties.recurrence.frequency = 'Minute' then 'minutely'
18 | end as dag_frequency
19 | ,{{ cast_as_string('properties.typeProperties.recurrence.schedule') }} as timetable_description
20 | ,properties.typeProperties.recurrence.frequency as adf_frequency
21 | ,properties.typeProperties.recurrence.startTime as start_time
22 | , case
23 | when properties.runtimeState = 'Started' then 'true'
24 | else 'false'
25 | end as is_active
26 | , case
27 | when properties.runtimeState = 'Started' then 'false'
28 | else 'true'
29 | end as is_paused
30 | ,properties.runtimeState
31 | ,{{adf_pipelines_name('pipelines.pipelineReference.referenceName') }} as pipeline_name
32 |
33 | from exploded_by_pipeline
34 | ),
35 | pipeline_with_row_number as (
36 | select
37 | *,
38 | row_number() over (partition by id order by etag desc) row_number
39 | from {{ source('raw_adf_monitoring', 'adf_pipelines') }}
40 | ),
41 | pipeline_dedup as (
42 | select * from
43 | pipeline_with_row_number
44 | where row_number = 1
45 | ),
46 | pipelines_and_triggers as (
47 | select
48 | pipelines.id as dag_id
49 | ,pipelines.name as dag_name
50 | ,triggers.*
51 | from pipeline_dedup pipelines
52 | left join triggers_renamed triggers
53 | on pipelines.name = triggers.pipeline_name
54 | )
55 | select
56 | {{ cast_as_string('dag_name') }} as dag_id
57 | , {{ cast_as_string('dag_name') }} as dag_name
58 | , "not_implemented_for_adf" as dag_description
59 | , dag_frequency
60 | , timetable_description
61 | , is_paused
62 | , is_active
63 | , 'not_implemented_for_adf' as fileloc
64 | , 'not_implemented_for_adf' as owners
65 | , null as ind_extraction_date
66 | from
67 | pipelines_and_triggers
68 |
--------------------------------------------------------------------------------
/integration_tests/macros/seed__dag_run.sql:
--------------------------------------------------------------------------------
1 | {% macro seed__dag_run() -%}
2 | {{ return(adapter.dispatch('seed__dag_run')()) }}
3 | {%- endmacro %}
4 |
5 | {% macro default__seed__dag_run() %}
6 | {% set create_table %}
7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag_run (
8 | id INT64,
9 | dag_id STRING,
10 | execution_date TIMESTAMP,
11 | state STRING,
12 | run_id STRING,
13 | external_trigger BOOLEAN,
14 | conf STRING,
15 | end_date TIMESTAMP,
16 | start_date TIMESTAMP,
17 | run_type STRING,
18 | last_scheduling_decision TIMESTAMP,
19 | dag_hash STRING,
20 | creating_job_id INT64,
21 | queued_at TIMESTAMP,
22 | data_interval_start TIMESTAMP,
23 | data_interval_end TIMESTAMP,
24 | log_template_id INT64,
25 | updated_at TIMESTAMP
26 | );
27 |
28 | {% endset %}
29 |
30 | {% set insert_table %}
31 |
32 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag_run VALUES
33 | (
34 | 87755,
35 | 'airflow_status_monitoring',
36 | TIMESTAMP('2024-07-04 10:56:00.000-03'),
37 | 'success',
38 | 'scheduled__2024-07-04T13:56:00+00:00',
39 | false,
40 | '�\u0004}�.',
41 | TIMESTAMP('2024-07-04 10:57:15.119-03'),
42 | TIMESTAMP('2024-07-04 10:57:00.256-03'),
43 | 'scheduled',
44 | TIMESTAMP('2024-07-04 10:57:15.109-03'),
45 | '2606cfccb8540961ee80c09fe32dcc8d',
46 | 110841,
47 | TIMESTAMP('2024-07-04 10:57:00.210-03'),
48 | TIMESTAMP('2024-07-04 10:56:00.000-03'),
49 | TIMESTAMP('2024-07-04 10:57:00.000-03'),
50 | 1,
51 | TIMESTAMP('2024-07-04 10:57:15.120-03')
52 | ),
53 | (
54 | 84260,
55 | 'dag_tags_rbac',
56 | TIMESTAMP('2024-07-02 05:30:00.000-03'),
57 | 'success',
58 | 'scheduled__2024-07-02T08:30:00+00:00',
59 | false,
60 | '�\u0004}�.',
61 | TIMESTAMP('2024-07-02 06:01:20.844-03'),
62 | TIMESTAMP('2024-07-02 06:00:00.624-03'),
63 | 'scheduled',
64 | TIMESTAMP('2024-07-02 06:01:20.836-03'),
65 | '3616896069a7d5a3b40f4478372f03da',
66 | 110841,
67 | TIMESTAMP('2024-07-02 06:00:00.550-03'),
68 | TIMESTAMP('2024-07-02 05:30:00.000-03'),
69 | TIMESTAMP('2024-07-02 06:00:00.000-03'),
70 | 1,
71 | TIMESTAMP('2024-07-02 06:01:20.848-03')
72 | );
73 |
74 | {% endset %}
75 |
76 | {% do run_query(create_table) %}
77 | {% do log("finished creating table dag_run", info=true) %}
78 |
79 | {% do run_query(insert_table) %}
80 | {% do log("finished insert table dag_run", info=true) %}
81 |
82 |
83 | {% endmacro %}
--------------------------------------------------------------------------------
/models/docs/universal.md:
--------------------------------------------------------------------------------
1 | [comment]: < Universal >
2 |
3 | {% docs state_doc %}
4 | Json with state property of the pipeline execution in the following format:
5 | {
6 | "life_cycle_state": "TERMINATED",
7 | "result_state": "SUCCESS",
8 | "state_message": "",
9 | "user_cancelled_or_timedout": false
10 | }
11 | {% enddocs %}
12 |
13 |
14 | {% docs tasks_doc %}
15 | List of objects with information about the tasks. Example of a task in json:
16 | {
17 | "attempt_number": "0",
18 | "cleanup_duration": "0",
19 | "cluster_instance": {
20 | "cluster_id": "0426-123-kq2r1tew",
21 | "spark_context_id": "123"
22 | },
23 | "dbt_task": null,
24 | "depends_on": null,
25 | "description": null,
26 | "end_time": "1701855074931",
27 | "execution_duration": "110000",
28 | "existing_cluster_id": "0426-123-kq2r1tew",
29 | "git_source": null,
30 | "libraries": null,
31 | "notebook_task": {
32 | "notebook_path": "/notebook",
33 | "source": "WORKSPACE"
34 | },
35 | "run_id": "123",
36 | "setup_duration": "1000",
37 | "start_time": "1701854963851",
38 | "state": {
39 | "life_cycle_state": "TERMINATED",
40 | "result_state": "SUCCESS",
41 | "state_message": "",
42 | "user_cancelled_or_timedout": false
43 | },
44 | "task_key": "ADFafb-123"
45 | }
46 | {% enddocs %}
47 |
48 | {% docs settings_doc %}
49 | Job configuration json like the following:
50 | {
51 | "email_notifications": {
52 | "no_alert_for_skipped_runs": false,
53 | "on_failure": null,
54 | "on_start": null,
55 | "on_success": null
56 | },
57 | "format": "MULTI_TASK",
58 | "max_concurrent_runs": "1",
59 | "name": "Fact_TransactionProtected_V2",
60 | "notification_settings": null,
61 | "schedule": {
62 | "pause_status": "UNPAUSED",
63 | "quartz_cron_expression": "19 0 9 * * ?",
64 | "timezone_id": "America/Sao_Paulo"
65 | },
66 | "timeout_seconds": "0"
67 | }
68 | {% enddocs %}
69 |
--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/task_fail.csv:
--------------------------------------------------------------------------------
1 | "id","task_id","dag_id","start_date","end_date","duration","map_index","run_id"
2 | 1,dbt_freshness,dbt_bitrix,2022-12-12 14:59:57.480 -0300,2022-12-12 14:59:59.035 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00
3 | 2,dbt_source_test,dbt_bitrix,2022-12-12 15:01:00.909 -0300,2022-12-12 15:01:02.861 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00
4 | 3,dbt_freshness,dbt_bitrix,2022-12-15 16:09:59.406 -0300,2022-12-15 16:10:01.164 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
5 | 4,dbt_freshness,dbt_bitrix,2022-12-15 16:11:33.938 -0300,2022-12-15 16:11:35.532 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
6 | 5,dbt_freshness,dbt_bitrix,2022-12-15 16:16:44.720 -0300,2022-12-15 16:16:46.632 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
7 | 6,dbt_freshness,dbt_bitrix,2022-12-15 17:59:26.557 -0300,2022-12-15 17:59:28.156 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
8 | 7,dbt_freshness,dbt_bitrix,2022-12-15 18:04:37.127 -0300,2022-12-15 18:04:38.722 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
9 | 8,dbt_freshness,dbt_bitrix,2022-12-15 18:18:55.583 -0300,2022-12-15 18:18:57.274 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
10 | 9,dbt_freshness,dbt_bitrix,2022-12-15 18:26:31.585 -0300,2022-12-15 18:26:33.287 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
11 | 10,testing_dockeroperator,test_docker,2022-12-15 18:55:53.329 -0300,2022-12-15 18:55:55.427 -0300,2,-1,manual__2022-12-15T21:55:43.582236+00:00
12 | 11,dbt_freshness,dbt_bitrix,2022-12-16 09:49:25.205 -0300,2022-12-16 09:50:02.437 -0300,37,-1,scheduled__2022-12-15T06:00:00+00:00
13 | 12,dbt_freshness,dbt_bitrix,2022-12-16 11:27:41.768 -0300,2022-12-16 11:27:54.878 -0300,13,-1,scheduled__2022-12-15T06:00:00+00:00
14 | 13,dbt_freshness,dbt_bitrix,2022-12-16 14:14:03.698 -0300,2022-12-16 14:14:17.741 -0300,14,-1,scheduled__2022-12-15T06:00:00+00:00
15 | 14,dbt_freshness,dbt_bitrix,2022-12-16 14:24:27.859 -0300,2022-12-16 14:24:42.344 -0300,14,-1,manual__2022-12-16T17:24:17.778111+00:00
16 | 15,dbt_freshness,dbt_bitrix,2022-12-16 14:55:23.124 -0300,2022-12-16 14:55:36.900 -0300,13,-1,manual__2022-12-16T17:55:14.271759+00:00
17 | 16,dbt_freshness,dbt_bitrix,2022-12-16 15:31:16.824 -0300,2022-12-16 15:31:55.576 -0300,38,-1,manual__2022-12-16T18:31:06.560155+00:00
18 | 17,dbt_freshness,dbt_bitrix,2022-12-17 03:00:17.475 -0300,2022-12-17 03:00:55.782 -0300,38,-1,scheduled__2022-12-16T06:00:00+00:00
19 | 18,dbt_freshness,dbt_bitrix,2022-12-17 03:06:06.742 -0300,2022-12-17 03:06:43.661 -0300,36,-1,scheduled__2022-12-16T06:00:00+00:00
20 | 19,dbt_freshness,dbt_bitrix,2022-12-18 03:00:19.544 -0300,2022-12-18 03:00:57.838 -0300,38,-1,scheduled__2022-12-17T06:00:00+00:00
21 | 20,dbt_freshness,dbt_bitrix,2022-12-18 03:06:07.809 -0300,2022-12-18 03:06:44.903 -0300,37,-1,scheduled__2022-12-17T06:00:00+00:00
--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_task.sql:
--------------------------------------------------------------------------------
1 | with
2 | stg_task_instance as (
3 | {% for src in var('enabled_sources') -%}
4 | select distinct
5 | task_id
6 | , dag_id
7 | , hostname
8 | , operator
9 | , task_pool
10 | , map_index
11 | , '{{ src }}' as source_system
12 | from {{ ref('stg_task_instance_' + src) }}
13 | {% if not loop.last -%} union {% endif -%}
14 | {% endfor -%}
15 | )
16 | , stg_task_fail as (
17 | {% for src in var('enabled_sources') -%}
18 | select distinct
19 | task_id
20 | , dag_id
21 | , map_index
22 | , {{ cast_as_string('null') }} as hostname
23 | , {{ cast_as_string('null') }} as operator
24 | , {{ cast_as_string('null') }} as task_pool
25 | , '{{ src }}' as source_system
26 | from {{ ref('stg_task_fail_' + src) }}
27 | {% if not loop.last -%} union {% endif -%}
28 | {% endfor -%}
29 | )
30 | , union_task_instance_with_fail as (
31 | select
32 | task_id
33 | , dag_id
34 | , map_index
35 | , hostname
36 | , operator
37 | , task_pool
38 | , source_system
39 | from stg_task_instance
40 | union all
41 | select
42 | task_id
43 | , dag_id
44 | , map_index
45 | , hostname
46 | , operator
47 | , task_pool
48 | , source_system
49 | from stg_task_fail
50 | )
51 | , dedup_dim_task as (
52 | select
53 | task_id
54 | , dag_id
55 | , map_index
56 | , hostname
57 | , operator
58 | , task_pool
59 | , source_system
60 | , row_number() over(
61 | partition by
62 | task_id
63 | , dag_id
64 | , source_system
65 | order by
66 | task_id
67 | , dag_id
68 | , source_system
69 | ) as dedup
70 | from union_task_instance_with_fail
71 | )
72 | , dim_task_with_sk as (
73 | select
74 | {{ dbt_utils.generate_surrogate_key([
75 | 'task_id'
76 | , 'dag_id']
77 | ) }} as task_sk
78 | , task_id
79 | , dag_id
80 | , map_index
81 | , hostname
82 | , operator
83 | , task_pool
84 | , source_system
85 | from dedup_dim_task
86 | where dedup = 1
87 | )
88 | select *
89 | from dim_task_with_sk
90 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release Drafter and Publisher
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - closed
7 | branches:
8 | - main
9 |
10 |
11 | permissions:
12 | contents: read
13 |
14 | jobs:
15 | new_release:
16 | if: github.event.pull_request.merged == true
17 | permissions:
18 | # write permission is required to create a github release
19 | contents: write
20 | # write permission is required for autolabeler
21 | # otherwise, read permission is required at least
22 | pull-requests: write
23 | runs-on: ubuntu-latest
24 |
25 | steps:
26 | - name: Checkout code
27 | uses: actions/checkout@v2
28 | with:
29 | fetch-depth: 0
30 |
31 | - name: Get branch name
32 | id: getbranch
33 | run: echo ::set-output name=BRANCH::${GITHUB_HEAD_REF}
34 |
35 | # ${{ github.ref }} was not giving v* as tag name, but refs/tags/v* instead, so I had to abbreviate it
36 | - name: Get latest abbreviated tag
37 | id: gettag
38 | run: echo ::set-output name=TAG::$(git describe --tags $(git rev-list --tags --max-count=1)) # get the latest tag across all branches and put it in the output TAG
39 |
40 | - name: Calculate next version
41 | id: nextversion
42 | run: |
43 | BRANCH_NAME="${{ steps.getbranch.outputs.BRANCH }}"
44 | CURRENT_VERSION="${{ steps.gettag.outputs.TAG }}"
45 | IFS='.' read -ra VERSION_PARTS <<< "$CURRENT_VERSION"
46 | if [[ $BRANCH_NAME =~ ^(major|release|Major|Release)/ ]]; then
47 | VERSION_PARTS[0]=$((VERSION_PARTS[0] + 1))
48 | VERSION_PARTS[1]=0
49 | VERSION_PARTS[2]=0
50 | elif [[ $BRANCH_NAME =~ ^(feature|minor|Feature|Minor)/ ]]; then
51 | VERSION_PARTS[1]=$((VERSION_PARTS[1] + 1))
52 | VERSION_PARTS[2]=0
53 | elif [[ $BRANCH_NAME =~ ^(patch|fix|hotfix|bugfix|Patch|Fix|Hotfix|Bugfix)/ ]]; then
54 | VERSION_PARTS[2]=$((VERSION_PARTS[2] + 1))
55 | fi
56 | NEXT_VERSION="${VERSION_PARTS[0]}.${VERSION_PARTS[1]}.${VERSION_PARTS[2]}"
57 | echo ::set-output name=NEXT_VERSION::"$NEXT_VERSION"
58 |
59 | - name: Create and publish new tag
60 | run: |
61 | git tag ${{ steps.nextversion.outputs.NEXT_VERSION }}
62 | git push origin ${{ steps.nextversion.outputs.NEXT_VERSION }}
63 |
64 | - uses: release-drafter/release-drafter@v5
65 | with:
66 | commitish: main
67 | name: "dbt-dag-monitoring ${{ steps.nextversion.outputs.NEXT_VERSION }}"
68 | tag: ${{ steps.nextversion.outputs.NEXT_VERSION }}
69 | publish: true
70 | env:
71 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72 |
--------------------------------------------------------------------------------
/integration_tests/seeds/adf/adf_activity_runs.csv:
--------------------------------------------------------------------------------
1 | activityRunEnd,activityName,activityRunStart,activityType,durationInMs,retryAttempt,error_errorCode,error_message,error_failureType,error_target,activityRunId,linkedServiceName,pipelineName,pipelineRunId,status,output_effectiveIntegrationRuntime,input_source_type
2 | 2024-08-20T03:30:10.9735549Z,Set CurrentDate,2024-08-20T03:30:10.742531Z,SetVariable,231,,,,,Set CurrentDate,f653c43a-6508-42f8-8467-0e10152aa3f9,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
3 | 2024-08-20T03:30:11.5387841Z,Set Timestamp,2024-08-20T03:30:11.2745768Z,SetVariable,264,,,,,Set Timestamp,b8c48c2f-b0e6-45f0-a502-cee31dffba2e,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
4 | 2024-08-20T03:38:09.0201429Z,Restart LoadingControl,2024-08-20T03:30:11.9547107Z,DatabricksNotebook,477065,,,,,Restart LoadingControl,af8e3927-c2e3-4c54-9b07-b4c0df7d6564,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),
5 | 2024-08-20T03:39:13.7734401Z,Get All Tables,2024-08-20T03:38:10.4390219Z,Lookup,63334,,,,,Get All Tables,8df489d3-f7d3-4462-9080-6e5557e78638,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),AzureDatabricksDeltaLakeSource
6 | 2024-08-20T03:42:27.2972053Z,For Each Tables,2024-08-20T03:39:15.8346054Z,ForEach,191462,,,,,For Each Tables,35a4c708-cda1-470e-b202-ae76aa743c0d,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
7 | 2024-08-20T03:39:16.8973044Z,Set CurrentDate foreach,2024-08-20T03:39:16.6507636Z,SetVariable,246,,,,,Set CurrentDate foreach,913afaa0-b40b-4c8c-b95a-48011c5e0e1c,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
8 | 2024-08-20T03:39:16.907916Z,Set CurrentDate foreach,2024-08-20T03:39:16.652598Z,SetVariable,255,,,,,Set CurrentDate foreach,9316bfbf-e4e8-4c3a-a214-474524a71eac,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
9 | 2024-08-20T03:39:16.8983084Z,Set CurrentDate foreach,2024-08-20T03:39:16.6742498Z,SetVariable,224,,,,,Set CurrentDate foreach,98aaf33e-86eb-4b32-98c4-7af526d677c5,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
10 | 2024-08-20T03:39:16.9416579Z,Set CurrentDate foreach,2024-08-20T03:39:16.673797Z,SetVariable,267,,,,,Set CurrentDate foreach,1d466d96-8210-4f9a-94b9-d25405dae8a7,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
11 | 2024-08-20T03:40:37.1476554Z,Update StartDate,2024-08-20T03:39:17.2774453Z,DatabricksNotebook,79870,,,,,Update StartDate,6e608cd1-4444-4061-8384-cb36946508a2,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),
12 |
--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_instance.sql:
--------------------------------------------------------------------------------
1 | with
2 | dim_dag as (
3 | select
4 | dag_id
5 | , dag_sk as dag_fk
6 | from {{ ref('dim_dag_monitoring_dag') }}
7 | )
8 | , dim_task as (
9 | select
10 | task_sk as task_fk
11 | , task_id
12 | , dag_id
13 | from {{ ref('dim_dag_monitoring_task') }}
14 | )
15 | , util_days as (
16 | select cast(date_day as date) as date_day
17 | from {{ ref('dbt_utils_day') }}
18 | )
19 | , stg_task_instance as (
20 | {% for src in var('enabled_sources') -%}
21 | select
22 | task_instance_sk
23 | , task_id
24 | , dag_id
25 | , run_id
26 | , execution_date
27 | , execution_start_date
28 | , execution_end_date
29 | , duration
30 | , state_task_instance
31 | , try_number
32 | , priority_weight
33 | , '{{ src }}' as source_system
34 | from {{ ref('stg_task_instance_' + src) }}
35 | {% if not loop.last -%} union {% endif -%}
36 | {% endfor -%}
37 | )
38 | , joined as (
39 | select
40 | stg_task_instance.task_instance_sk
41 | , stg_task_instance.task_id
42 | , stg_task_instance.dag_id
43 | , stg_task_instance.run_id
44 | , dim_dag.dag_fk
45 | , dim_task.task_fk
46 | , util_days.date_day
47 | , stg_task_instance.execution_start_date
48 | , stg_task_instance.execution_end_date
49 | , stg_task_instance.duration
50 | , stg_task_instance.state_task_instance
51 | , stg_task_instance.try_number
52 | , stg_task_instance.priority_weight
53 | , stg_task_instance.source_system
54 | from stg_task_instance
55 | left join dim_dag on stg_task_instance.dag_id = dim_dag.dag_id
56 | left join dim_task on
57 | stg_task_instance.task_id = dim_task.task_id
58 | and stg_task_instance.dag_id = dim_task.dag_id
59 | left join util_days on {{ cast_as_date('stg_task_instance.execution_date') }} = {{ cast_as_date('util_days.date_day') }}
60 | )
61 | , surrogate_key as (
62 | select
63 | {{ dbt_utils.generate_surrogate_key([
64 | 'task_instance_sk'
65 | , 'execution_start_date'
66 | , 'execution_end_date'
67 | , 'run_id']) }} as task_instance_sk
68 | , dag_fk
69 | , task_fk
70 | , date_day as generated_date
71 | , execution_start_date
72 | , execution_end_date
73 | , duration
74 | , state_task_instance
75 | , try_number
76 | , priority_weight
77 | , source_system
78 | from joined
79 | )
80 | select *
81 | from surrogate_key
82 |
--------------------------------------------------------------------------------
/integration_tests/macros/seed__task_instance.sql:
--------------------------------------------------------------------------------
1 | {% macro seed__task_instance() %}
2 | {% set create_table %}
3 | CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_instance (
4 | TASK_ID STRING,
5 | DAG_ID STRING,
6 | RUN_ID STRING,
7 | START_DATE TIMESTAMP,
8 | END_DATE TIMESTAMP,
9 | DURATION FLOAT64,
10 | STATE STRING,
11 | TRY_NUMBER INT64,
12 | HOSTNAME STRING,
13 | UNIXNAME STRING,
14 | JOB_ID NUMERIC,
15 | POOL STRING,
16 | QUEUE STRING,
17 | PRIORITY_WEIGHT INT64,
18 | OPERATOR STRING,
19 | QUEUED_DTTM TIMESTAMP,
20 | PID INT64,
21 | MAX_TRIES INT64,
22 | EXECUTOR_CONFIG STRING,
23 | POOL_SLOTS INT64,
24 | QUEUED_BY_JOB_ID NUMERIC,
25 | EXTERNAL_EXECUTOR_ID NUMERIC,
26 | TRIGGER_ID NUMERIC,
27 | TRIGGER_TIMEOUT INT64,
28 | NEXT_METHOD INT64,
29 | NEXT_KWARGS INT64,
30 | MAP_INDEX INT64,
31 | UPDATED_AT TIMESTAMP
32 | );
33 |
34 | {% endset %}
35 |
36 | {% set insert_table %}
37 |
38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_instance (
39 | TASK_ID,
40 | DAG_ID,
41 | RUN_ID,
42 | START_DATE,
43 | END_DATE,
44 | DURATION,
45 | STATE,
46 | TRY_NUMBER,
47 | HOSTNAME,
48 | UNIXNAME,
49 | JOB_ID,
50 | POOL,
51 | QUEUE,
52 | PRIORITY_WEIGHT,
53 | OPERATOR,
54 | QUEUED_DTTM,
55 | PID,
56 | MAX_TRIES,
57 | EXECUTOR_CONFIG,
58 | POOL_SLOTS,
59 | QUEUED_BY_JOB_ID,
60 | EXTERNAL_EXECUTOR_ID,
61 | TRIGGER_ID,
62 | TRIGGER_TIMEOUT,
63 | NEXT_METHOD,
64 | NEXT_KWARGS,
65 | MAP_INDEX,
66 | UPDATED_AT
67 | )
68 | VALUES
69 | (
70 | 'dbt_source_test',
71 | 'dbt_bitrix',
72 | 'scheduled__2023-01-12T06:00:00+00:00',
73 | TIMESTAMP('2023-12-27T02:30:16.714366Z'),
74 | TIMESTAMP('2023-12-21T09:01:57.631415Z'),
75 | 20.207217,
76 | 'success',
77 | 1,
78 | 'dbtbitrixdb1374e48913fad10b54fdd67',
79 | 'root',
80 | 332,
81 | 'default_pool',
82 | 'default',
83 | 17,
84 | 'DockerOperator',
85 | '2024-02-02T11:01:54.071588Z',
86 | 21,
87 | 2,
88 | '�\u0004}�.',
89 | 1,
90 | 201,
91 | NULL,
92 | NULL,
93 | NULL,
94 | NULL,
95 | NULL,
96 | -1,
97 | NULL
98 | ),
99 | (
100 | 'dump_table1_to_DL',
101 | 'sample_fist',
102 | 'scheduled__2022-02-03T00:00:00+00:00',
103 | TIMESTAMP('2023-12-21T09:01:57.631415Z'),
104 | TIMESTAMP('2023-12-23T08:30:25.791135Z'),
105 | 0.700336,
106 | 'success',
107 | 1,
108 | 'samplefistdumpa58b822dbb5f5c21bec',
109 | 'root',
110 | 20,
111 | 'default_pool',
112 | 'default',
113 | 2,
114 | 'BashOperator',
115 | '2024-02-02T11:02:10.162511Z',
116 | 21,
117 | 1,
118 | '�\u0004}�.',
119 | 1,
120 | 7,
121 | NULL,
122 | NULL,
123 | NULL,
124 | NULL,
125 | NULL,
126 | -1,
127 | NULL
128 | );
129 |
130 |
131 | {% endset %}
132 |
133 | {% do run_query(create_table) %}
134 | {% do log("finished creating table task_instance", info=true) %}
135 |
136 | {% do run_query(insert_table) %}
137 | {% do log("finished insert table task_instance", info=true) %}
138 |
139 | {% endmacro %}
--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/dag.csv:
--------------------------------------------------------------------------------
1 | "dag_id","is_paused","is_subdag","is_active","last_parsed_time","last_pickled","last_expired","scheduler_lock","pickle_id","fileloc","owners","description","default_view","schedule_interval","root_dag_id","next_dagrun","next_dagrun_create_after","max_active_tasks","has_task_concurrency_limits","max_active_runs","next_dagrun_data_interval_start","next_dagrun_data_interval_end","has_import_errors","timetable_description","processor_subdir"
2 | test_docker,false,false,false,2022-12-16 09:35:19.433 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/test_dag.py,Indicium,,grid,"""0 6 * * * """,,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,16,false,16,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,false,At 06:00,
3 | dbt,true,false,false,2022-11-25 16:12:51.922 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""7/15 9-23 * * *""",,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,16,false,1,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,true,"Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59",
4 | enterprise_sync_marketing,true,false,false,2024-03-19 17:41:15.253 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 18 * * *""",,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,16,false,1,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,true,At 18:00,/opt/airflow/dags/b50e21f1f72af1012e31506b48198ba61244fd4f/airflow/dags
5 | sample_fist,true,false,false,2022-11-25 16:12:51.927 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""@daily""",,2022-02-02 21:00:00.000 -0300,,16,false,1,2022-02-02 21:00:00.000 -0300,2022-02-03 21:00:00.000 -0300,true,At 00:00,
6 | sheets-projects,false,false,false,2023-08-17 16:48:17.895 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/sheets-projects.py,DSaaS,Dag to run xxx pipeline,grid,"""0 6 * * * """,,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,16,false,16,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,true,At 06:00,/opt/airflow/dags/7d473a2591c8679c8529ce9b4600489ee12e7c62/airflow/dags
7 | dbt_snowflake_enterprise_dsaas,true,false,false,2024-04-16 10:33:35.408 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,null,,,,16,false,1,,,false,"Never, external triggers only",/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
8 | enterprise_sync,true,false,false,2024-04-16 10:33:35.411 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 17 * * *""",,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,16,false,1,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,false,At 17:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
9 | enterprise_sync_public,true,false,false,2024-04-16 10:33:35.417 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 16 * * *""",,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,16,false,1,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,false,At 16:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
10 | bitrix_projects_validate_dag,true,false,false,2023-12-19 19:51:48.161 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/bitrix_projects validate_dag.py,DSaaS,Dag to run hours extractions pipeline,grid,"""0 0/12 * * *""",,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,16,false,16,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,false,Every 12 hours,/opt/airflow/dags/fb3710f58ae5f8df430f2bbe40ef211ccebc012b/airflow/dags
11 |
--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/dag_run.csv:
--------------------------------------------------------------------------------
1 | "id","dag_id","execution_date","state","run_id","external_trigger","conf","end_date","start_date","run_type","last_scheduling_decision","dag_hash","creating_job_id","queued_at","data_interval_start","data_interval_end","log_template_id","updated_at"
2 | 87755,airflow_status_monitoring,2024-07-04 10:56:00.000 -0300,success,scheduled__2024-07-04T13:56:00+00:00,false,�\u0004}�.,2024-07-04 10:57:15.119 -0300,2024-07-04 10:57:00.256 -0300,scheduled,2024-07-04 10:57:15.109 -0300,"2606cfccb8540961ee80c09fe32dcc8d",110841,2024-07-04 10:57:00.210 -0300,2024-07-04 10:56:00.000 -0300,2024-07-04 10:57:00.000 -0300,1,2024-07-04 10:57:15.120 -0300
3 | 84260,dag_tags_rbac,2024-07-02 05:30:00.000 -0300,success,scheduled__2024-07-02T08:30:00+00:00,false,�\u0004}�.,2024-07-02 06:01:20.844 -0300,2024-07-02 06:00:00.624 -0300,scheduled,2024-07-02 06:01:20.836 -0300,"3616896069a7d5a3b40f4478372f03da",110841,2024-07-02 06:00:00.550 -0300,2024-07-02 05:30:00.000 -0300,2024-07-02 06:00:00.000 -0300,1,2024-07-02 06:01:20.848 -0300
4 | 84507,airflow_status_monitoring,2024-07-02 09:40:00.000 -0300,success,scheduled__2024-07-02T12:40:00+00:00,false,�\u0004}�.,2024-07-02 09:41:17.534 -0300,2024-07-02 09:41:00.866 -0300,scheduled,2024-07-02 09:41:17.528 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:41:00.832 -0300,2024-07-02 09:40:00.000 -0300,2024-07-02 09:41:00.000 -0300,1,2024-07-02 09:41:17.536 -0300
5 | 84775,airflow_status_monitoring,2024-07-02 13:48:00.000 -0300,success,scheduled__2024-07-02T16:48:00+00:00,false,�\u0004}�.,2024-07-02 13:49:15.733 -0300,2024-07-02 13:49:00.347 -0300,scheduled,2024-07-02 13:49:15.728 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 13:49:00.289 -0300,2024-07-02 13:48:00.000 -0300,2024-07-02 13:49:00.000 -0300,1,2024-07-02 13:49:15.742 -0300
6 | 84508,airflow_status_monitoring,2024-07-02 09:41:00.000 -0300,success,scheduled__2024-07-02T12:41:00+00:00,false,�\u0004}�.,2024-07-02 09:42:14.676 -0300,2024-07-02 09:42:00.647 -0300,scheduled,2024-07-02 09:42:14.666 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:42:00.591 -0300,2024-07-02 09:41:00.000 -0300,2024-07-02 09:42:00.000 -0300,1,2024-07-02 09:42:14.678 -0300
7 | 84509,airflow_status_monitoring,2024-07-02 09:42:00.000 -0300,success,scheduled__2024-07-02T12:42:00+00:00,false,�\u0004}�.,2024-07-02 09:43:16.490 -0300,2024-07-02 09:43:00.343 -0300,scheduled,2024-07-02 09:43:16.485 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:43:00.262 -0300,2024-07-02 09:42:00.000 -0300,2024-07-02 09:43:00.000 -0300,1,2024-07-02 09:43:16.491 -0300
8 | 84346,airflow_status_monitoring,2024-07-02 07:14:00.000 -0300,success,scheduled__2024-07-02T10:14:00+00:00,false,�\u0004}�.,2024-07-02 07:15:17.620 -0300,2024-07-02 07:15:00.566 -0300,scheduled,2024-07-02 07:15:17.614 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 07:15:00.528 -0300,2024-07-02 07:14:00.000 -0300,2024-07-02 07:15:00.000 -0300,1,2024-07-02 07:15:17.621 -0300
9 | 84510,airflow_status_monitoring,2024-07-02 09:43:00.000 -0300,success,scheduled__2024-07-02T12:43:00+00:00,false,�\u0004}�.,2024-07-02 09:44:16.026 -0300,2024-07-02 09:44:00.704 -0300,scheduled,2024-07-02 09:44:16.020 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:44:00.665 -0300,2024-07-02 09:43:00.000 -0300,2024-07-02 09:44:00.000 -0300,1,2024-07-02 09:44:16.027 -0300
10 | 84776,bitrix_refresh_access_token,2024-07-02 13:00:00.000 -0300,success,scheduled__2024-07-02T16:00:00+00:00,false,�\u0004}�.,2024-07-02 13:50:19.597 -0300,2024-07-02 13:50:00.487 -0300,scheduled,2024-07-02 13:50:19.592 -0300,b71d3629e5a26934dd20b6e9a3335f84,110841,2024-07-02 13:50:00.361 -0300,2024-07-02 13:00:00.000 -0300,2024-07-02 13:50:00.000 -0300,1,2024-07-02 13:50:19.599 -0300
11 |
--------------------------------------------------------------------------------
/macros/model_task_instance_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | {% macro model_task_instance_databricks_workflow() -%}
2 | {{ return(adapter.dispatch('model_task_instance_databricks_workflow')()) }}
3 | {%- endmacro %}
4 |
5 |
6 | {% macro default__model_task_instance_databricks_workflow() -%}
7 | with
8 | flatten_data as (
9 | select
10 | job_runs.job_id
11 | , job_runs.inserteddate as inserted_date
12 | , exploded_tasks.*
13 | from
14 | {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs
15 | {{ flatten_data('tasks') }} as exploded_tasks
16 | )
17 | , renamed as (
18 | select
19 | {{ cast_as_string("flatten_data.task_key") }} as task_id
20 | , {{ cast_as_string("flatten_data.job_id") }} as dag_id
21 | , {{ cast_as_string("flatten_data.run_id") }} as run_id
22 | , {{cast_as_timestamp('flatten_data.start_time')}} as execution_date
23 | , {{cast_as_timestamp('flatten_data.start_time')}} as execution_start_date
24 | , {{cast_as_timestamp('flatten_data.end_time')}} as execution_end_date
25 | , (flatten_data.execution_duration / 1000) as duration
26 | , {{replace_dot_for_colon('state','result_state')}} as state_task_instance
27 | , attempt_number as try_number
28 | , {{replace_dot_for_colon('notebook_task','notebook_path')}} as hostname
29 | , 'not_implemented_for_databricks_workflow' as task_pool
30 | , 'not_implemented_for_databricks_workflow' as priority_weight
31 | , case
32 | when {{replace_dot_for_colon('notebook_task','notebook_path')}} is not null then
33 | {{replace_dot_for_colon('notebook_task','notebook_path')}}
34 | else flatten_data.task_key
35 | end as operator
36 | , 'not_implemented_for_databricks_workflow' as map_index
37 | from flatten_data
38 | )
39 | select
40 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
41 | , *
42 | from renamed
43 | {%- endmacro %}
44 |
45 | {% macro snowflake__model_task_instance_databricks_workflow() -%}
46 | with
47 | flatten_data as (
48 | select *
49 | from
50 | {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs
51 | {{ flatten_data('"tasks"') }} as exploded_tasks
52 | )
53 | , renamed as (
54 | select
55 | {{ cast_as_string("value:task_key") }} as task_id
56 | , {{ cast_as_string("job_id") }} as dag_id
57 | , {{ cast_as_string("run_id") }} as run_id
58 | , {{cast_as_timestamp('start_time')}} as execution_date
59 | , {{cast_as_timestamp('start_time')}} as execution_start_date
60 | , {{cast_as_timestamp('end_time')}} as execution_end_date
61 | , (execution_duration / 1000) as duration
62 | , {{replace_dot_for_colon('state','result_state')}} as state_task_instance
63 | , {{replace_dot_for_colon('value','attempt_number')}} as try_number
64 | , {{replace_dot_for_colon('value','notebook_task.notebook_path')}} as hostname
65 | , 'not_implemented_for_databricks_workflow' as task_pool
66 | , 'not_implemented_for_databricks_workflow' as priority_weight
67 | , case
68 | when {{replace_dot_for_colon('value','notebook_task.notebook_path')}} is not null then
69 | {{replace_dot_for_colon('value','notebook_task.notebook_path')}}
70 | else {{replace_dot_for_colon('value','task_key')}}
71 | end as operator
72 | , 'not_implemented_for_databricks_workflow' as map_index
73 | from flatten_data
74 | )
75 | select
76 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
77 | , *
78 | from renamed
79 | {%- endmacro %}
80 |
--------------------------------------------------------------------------------
/models/staging/adf_sources/source.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: raw_adf_monitoring
5 | description: " Raw data extracted from ADF for ADF monitoring analysis."
6 | database: "{{ var('dag_monitoring_adf_database', '')}}"
7 | schema: "{{ var('dag_monitoring_adf_schema', '') }}"
8 | tables:
9 | - name: adf_pipeline_runs
10 | description: "table that contains data from ADF pipeline runs."
11 | columns:
12 | - name: id
13 | description: "Table id."
14 | tests:
15 | - not_null
16 | - unique
17 |
18 | - name: pipelineName
19 | description: " Name of the pipeline of the dag run."
20 | tests:
21 | - not_null
22 |
23 | - name: runStart
24 | description: "Execution date"
25 |
26 | - name: status
27 | description: "DAG run state."
28 |
29 | - name: invokedBy
30 | description: "Points if the DAG run was triggered externally (True / False)."
31 |
32 | - name: runStart
33 | description: "Date and time when the DAG run started."
34 |
35 | - name: runEnd
36 | description: "Date and time when the DAG run ended."
37 |
38 | - name: durationInMs
39 | description: "DAG duration in Milliseconds"
40 |
41 | - name: adf_activity_runs
42 | description: "table that contains data from ADF activity runs."
43 | columns:
44 | - name: activityRunId
45 | description: "task id."
46 | tests:
47 | - not_null
48 |
49 | - name: pipelineName
50 | description: "Pipeline id to which this activity belongs."
51 | tests:
52 | - not_null
53 |
54 | - name: pipelineRunId
55 | description: "Pipeline execution id to which this activity belongs."
56 | tests:
57 | - not_null
58 |
59 | - name: activityRunStart
60 | description: " Date and time when the execution started."
61 |
62 | - name: activityRunEnd
63 | description: "Date and time when the execution ended."
64 |
65 | - name: durationInMs
66 | description: "Duration of the execution in Milliseconds."
67 |
68 | - name: map_index
69 | description: "Mapping index"
70 |
71 | - name: adf_pipelines
72 | description: "Table that contains information about ADF pipelines."
73 | columns:
74 | - name: id
75 | description: "table id."
76 | tests:
77 | - not_null
78 | - unique
79 |
80 | - name: is_paused
81 | description: "If the dag is paused."
82 |
83 | - name: is_active
84 | description: "If the DAG is active."
85 |
86 | - name: description
87 | description: "DAG description"
88 |
89 | - name: fileloc
90 | description: "File path that needs to be imported to load this DAG."
91 |
92 | - name: owners
93 | description: "DAG owner."
94 |
95 | - name: timetable_description
96 | description: "Description of the scheduling table"
97 |
98 | - name: ind_extraction_date
99 | description: "Date of extraction of the table"
100 |
101 | - name: adf_triggers
102 | description: "Table that contains information about ADF triggers."
103 | columns:
104 | - name: id
105 | description: "Identification of the table."
106 | tests:
107 | - not_null
108 | - unique
109 |
110 | - name: properties.runtimeState
111 | description: "If the trigger is active or not."
112 |
113 | - name: properties.annotations
114 | description: "Annotations in the trigger."
115 |
116 | - name: properties.pipelines
117 | description: "Pipelines that are executed by this trigger."
118 |
119 | - name: properties.typeProperties.recurrence.frequency
120 | description: "Frequency with which the pipeline is executed e.g Hour, Day, Week, Month"
121 |
122 | - name: properties.typeProperties.recurrence.interval
123 | description: "In how many 'frequency' this trigger is executed e.g 1 Day, 2 Week, being 1 and 2 the interval"
124 |
125 | - name: properties.typeProperties.recurrence.schedule
126 | description: "Scheduling defined by the table"
127 |
128 | - name: properties.typeProperties.recurrence.startTime
129 | description: "First execution"
130 |
131 | - name: properties.typeProperties.recurrence.timeZone
132 | description: "Time zone of the trigger"
133 |
134 | - name: dbt_utils_day
135 | description: "Table that contains data from the dates created from the dbt_utils macro."
136 |
--------------------------------------------------------------------------------
/models/staging/airflow_sources/source.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: raw_airflow_monitoring
5 | description: "Raw data extracted from Airflow for Airflow monitoring analysis."
6 | database: "{{ var('dag_monitoring_airflow_database', '')}}"
7 | schema: "{{ var('dag_monitoring_airflow_schema', '') }}"
8 | tables:
9 | - name: dag_run
10 | description: "Table that contains data from Airflow DAG runs."
11 | columns:
12 | - name: id
13 | description: "Id of the table"
14 | tests:
15 | - not_null
16 | - unique
17 |
18 | - name: dag_id
19 | description: "Id of the dag run."
20 | tests:
21 | - not_null
22 |
23 | - name: execution_date
24 | description: "Date of execution."
25 |
26 | - name: state
27 | description: "state of the DAG run."
28 |
29 | - name: external_trigger
30 | description: "Points if the DAG run was triggered externally (True / False)."
31 |
32 | - name: start_date
33 | description: "Date and time when the DAG run started."
34 |
35 | - name: end_date
36 | description: "Date and time when the DAG run ended."
37 |
38 | - name: run_type
39 | description: "Type of DAG run."
40 |
41 | - name: task_instance
42 | description: "Table that contains data from Airflow task instances."
43 | columns:
44 | - name: task_id
45 | description: "Id of the executed task"
46 | tests:
47 | - not_null
48 |
49 | - name: dag_id
50 | description: "Identification of the dag."
51 | tests:
52 | - not_null
53 |
54 | - name: run_id
55 | description: "Identification of the run."
56 |
57 | - name: start_date
58 | description: " Date and time when the execution started."
59 |
60 | - name: end_date
61 | description: "Date and time when the execution ended."
62 |
63 | - name: duration
64 | description: "Duration of the execution in seconds."
65 |
66 | - name: state
67 | description: "The state of the task execution."
68 |
69 | - name: try_number
70 | description: "Number of execution attempts."
71 |
72 | - name: hostname
73 | description: "Task hostname."
74 |
75 | - name: pool
76 | description: "The airflow pool in which the task should be executed."
77 |
78 | - name: priority_weight
79 | description: "Priority of the task."
80 |
81 | - name: operator
82 | description: "Task model operator."
83 |
84 | - name: queue
85 | description: "Task queue."
86 |
87 | - name: pool_slots
88 | description: "Pool slots quantity."
89 |
90 | - name: map_index
91 | description: "Mapping index."
92 |
93 | - name: task_fail
94 | description: "Table that contains data from Airflow tasks with failures."
95 | columns:
96 | - name: id
97 | description: "Table id."
98 | tests:
99 | - not_null
100 | - unique
101 |
102 | - name: task_id
103 | description: "Task id."
104 | tests:
105 | - not_null
106 |
107 | - name: dag_id
108 | description: "Dag id."
109 | tests:
110 | - not_null
111 |
112 | - name: start_date
113 | description: "Date and time when the execution started."
114 |
115 | - name: end_date
116 | description: "Date and time when the execution ended."
117 |
118 | - name: duration
119 | description: "Duration of the execution in seconds."
120 |
121 | - name: map_index
122 | description: "Mapping index"
123 |
124 | - name: dag
125 | description: "Table that contains information about Airflow DAGs."
126 | columns:
127 | - name: dag_id
128 | description: "Dag id."
129 | tests:
130 | - not_null
131 | - unique
132 |
133 | - name: is_paused
134 | description: "If the dag is paused."
135 |
136 | - name: is_active
137 | description: "If the DAG is active."
138 |
139 | - name: description
140 | description: "DAG description."
141 |
142 | - name: fileloc
143 | description: "File path that needs to be imported to load this DAG."
144 |
145 | - name: owners
146 | description: "DAG owner."
147 |
148 | - name: timetable_description
149 | description: "Description of the scheduling table."
150 |
151 | - name: dbt_utils_day
152 | description: "Table that contains data from dates created from the dbt_utils macro."
153 |
--------------------------------------------------------------------------------
/models/calendar/dim_dag_monitoring_dates.sql:
--------------------------------------------------------------------------------
1 | {% set end_date_query %}
2 | select {{ date_add("year", "100", "current_date()") }}
3 | {% endset %}
4 |
5 | {% if execute %}
6 | {%set end_date = run_query(end_date_query).columns[0].values()[0] %}
7 | {% else %}
8 | {% set end_date = ' ' %}
9 | {% endif %}
10 |
11 | /* generating dates using a dbt-utils macro */
12 | with
13 | dates_raw as (
14 | {{ dbt_utils.date_spine(
15 | datepart="day",
16 | start_date="cast('1970-01-01' as date)",
17 | end_date="cast('" ~ end_date ~ "' as date)"
18 | )
19 | }}
20 | )
21 |
22 | /* extracting some date information*/
23 | , days_info as (
24 | select
25 | cast(date_day as date) as date_day
26 | , extract(DAYOFWEEK from date_day) as week_day
27 | , extract(month from date_day) as month_number
28 | , extract(quarter from date_day) as quarter_number
29 | , {{ day_of_year("date_day") }} as day_of_year
30 | , extract(year from date_day) as year_date
31 | , {{ month_day('date_day') }} as month_day
32 | from dates_raw
33 | )
34 |
35 | /**/
36 | , days_named as (
37 | select
38 | *
39 | , {{ day_of_week('week_day') }}
40 | , case
41 | when month_number = 1 then 'January'
42 | when month_number = 2 then 'February'
43 | when month_number = 3 then 'March'
44 | when month_number = 4 then 'April'
45 | when month_number = 5 then 'May'
46 | when month_number = 6 then 'June'
47 | when month_number = 7 then 'July'
48 | when month_number = 8 then 'August'
49 | when month_number = 9 then 'September'
50 | when month_number = 10 then 'October'
51 | when month_number = 11 then 'November'
52 | else 'December'
53 | end as month_name
54 | , case
55 | when month_number = 1 then 'Jan'
56 | when month_number = 2 then 'Feb'
57 | when month_number = 3 then 'Mar'
58 | when month_number = 4 then 'Apr'
59 | when month_number = 5 then 'May'
60 | when month_number = 6 then 'Jun'
61 | when month_number = 7 then 'Jul'
62 | when month_number = 8 then 'Aug'
63 | when month_number = 9 then 'Sep'
64 | when month_number = 10 then 'Oct'
65 | when month_number = 11 then 'Nov'
66 | else 'Dec'
67 | end as month_short
68 | , case
69 | when quarter_number = 1 then '1º quarter'
70 | when quarter_number = 2 then '2º quarter'
71 | when quarter_number = 3 then '3º quarter'
72 | else '4º quarter'
73 | end as quarter_name
74 | , case
75 | when quarter_number in(1,2) then 1
76 | else 2
77 | end as semester
78 | , case
79 | when quarter_number in(1,2) then '1º Semester'
80 | else '2º Semester'
81 | end as semester_name
82 | from days_info
83 | )
84 |
85 | , flags_cte as (
86 | /*flags related to holidays and business days*/
87 | select
88 | *
89 | , case
90 | when month_day = '01-01' then true
91 | when month_day = '21-04' then true
92 | when month_day = '01-05' then true
93 | when month_day = '07-09' then true
94 | when month_day = '12-10' then true
95 | when month_day = '02-11' then true
96 | when month_day = '15-11' then true
97 | when month_day = '25-12' then true
98 | else false
99 | end as fl_holiday
100 | , case
101 | when week_day in(6, 0) then false
102 | when month_day = '01-01' then false
103 | when month_day = '21-04' then false
104 | when month_day = '01-05' then false
105 | when month_day = '07-09' then false
106 | when month_day = '12-10' then false
107 | when month_day = '02-11' then false
108 | when month_day = '15-11' then false
109 | when month_day = '25-12' then false
110 | else true
111 | end as fl_business_day
112 | , coalesce(week_day in(6, 0), false) as fl_weekends
113 | from days_named
114 | )
115 |
116 | /* reorganizing the columns */
117 | , final_cte as (
118 | select
119 | date_day
120 | , week_day
121 | , name_of_day
122 | , month_number
123 | , month_name
124 | , month_short
125 | , quarter_number
126 | , quarter_name
127 | , semester
128 | , semester_name
129 | , fl_holiday
130 | , fl_business_day
131 | , fl_weekends
132 | , day_of_year
133 | , year_date
134 | from flags_cte
135 | )
136 |
137 | select *
138 | from final_cte
--------------------------------------------------------------------------------
/integration_tests/macros/adf_pipeline_runs.sql:
--------------------------------------------------------------------------------
1 | {% macro adf_pipeline_runs() -%}
2 | {{ return(adapter.dispatch('adf_pipeline_runs')()) }}
3 | {%- endmacro %}
4 |
5 | {%- macro default__adf_pipeline_runs() -%}
6 | {% set create_table %}
7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs(
8 | id STRING,
9 | runId STRING,
10 | debugRunId STRING,
11 | runGroupId STRING,
12 | pipelineName STRING,
13 | parameters STRUCT<
14 | ENVIRONMENT STRING,
15 | RESET_TYPE STRING,
16 | DAYS_BEFORE STRING
17 | >,
18 | invokedBy STRUCT<
19 | id STRING,
20 | name STRING,
21 | invokedByType STRING,
22 | pipelineName STRING,
23 | pipelineRunId STRING
24 | >,
25 | runStart TIMESTAMP,
26 | runEnd TIMESTAMP,
27 | durationInMs BIGINT,
28 | status STRING,
29 | message STRING,
30 | pipelineReturnValue MAP,
31 | lastUpdated TIMESTAMP,
32 | annotations ARRAY,
33 | runDimension MAP,
34 | isLatest BOOLEAN
35 | );
36 | {% endset %}
37 |
38 | {% set insert_table %}
39 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs
40 | VALUES
41 | (
42 | '/SUBSCRIPTIONS/9F075ORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5',
43 | '9f81a5eb-db6082b73db5',
44 | NULL,
45 | '9f81a5eb-a7c82b73db5',
46 | 'PL-FEMFILESCSLandingZone-N',
47 | NAMED_STRUCT(
48 | 'ENVIRONMENT', 'FILESCSV',
49 | 'RESET_TYPE', 'FULL',
50 | 'DAYS_BEFORE', NULL
51 | ),
52 | NAMED_STRUCT(
53 | 'id', 'cf613b7b-e0c4cfbe8',
54 | 'name', 'cf613b7b-e0ce924dc0cfbe8',
55 | 'invokedByType', 'PipelineActivity',
56 | 'pipelineName', 'PL-OrquestraZeroLoadingControl',
57 | 'pipelineRunId', '598ec8ee-6e049c4b8c558'
58 | ),
59 | '2024-08-20T03:30:06.6061079Z',
60 | '2024-08-20T03:47:03.9865228Z',
61 | 1017380,
62 | 'Succeeded',
63 | NULL,
64 | MAP(),
65 | '2024-08-20T03:47:03.9879388Z',
66 | ARRAY(),
67 | MAP(),
68 | true
69 | ),
70 | (
71 | '/SUBSCRIPTIONS/9//pipelinerua1-9313-73fa5c0a3f0e',
72 | '64c7a8c7--73fa5c0a3f0e',
73 | NULL,
74 | '64c7a8c7-30e23fa5c0a3f0e',
75 | 'NET_REC_DAILY_PRICE_UPDATE',
76 | NAMED_STRUCT(
77 | 'ENVIRONMENT', NULL,
78 | 'RESET_TYPE', NULL,
79 | 'DAYS_BEFORE', '1'
80 | ),
81 | NAMED_STRUCT(
82 | 'id', '0858477451681969CU22',
83 | 'name', 'NET_REC_DAILY_UPDATE',
84 | 'invokedByType', 'ScheduleTrigger',
85 | 'pipelineName', NULL,
86 | 'pipelineRunId', NULL
87 | ),
88 | '2024-08-20T12:00:31.2728264Z',
89 | '2024-08-20T13:15:52.6545498Z',
90 | 4521381,
91 | 'Succeeded',
92 | NULL,
93 | MAP(),
94 | '2024-08-20T13:15:52.6550273Z',
95 | ARRAY(
96 | 'ted'
97 | ),
98 | MAP(),
99 | true
100 | );
101 | {% endset %}
102 |
103 | {% do run_query(create_table) %}
104 | {% do log("finished creating table adf_pipeline_runs", info=true) %}
105 |
106 | {% do run_query(insert_table) %}
107 | {% do log("finished insert table adf_pipeline_runs", info=true) %}
108 | {%- endmacro -%}
109 |
110 |
111 | {%- macro bigquery__adf_pipeline_runs() -%}
112 | {% set create_table %}
113 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs(
114 | id STRING,
115 | runId STRING,
116 | debugRunId STRING,
117 | runGroupId STRING,
118 | pipelineName STRING,
119 | parameters STRUCT<
120 | ENVIRONMENT STRING,
121 | RESET_TYPE STRING,
122 | DAYS_BEFORE STRING
123 | >,
124 | invokedBy STRUCT<
125 | id STRING,
126 | name STRING,
127 | invokedByType STRING,
128 | pipelineName STRING,
129 | pipelineRunId STRING
130 | >,
131 | runStart TIMESTAMP,
132 | runEnd TIMESTAMP,
133 | durationInMs BIGINT,
134 | status STRING,
135 | message STRING,
136 | pipelineReturnValue ARRAY>,
137 | lastUpdated TIMESTAMP,
138 | annotations ARRAY,
139 | runDimension ARRAY>,
140 | isLatest BOOLEAN
141 | );
142 |
143 | {% endset %}
144 |
145 | {% set insert_table %}
146 | INSERT INTO `{{ target.database }}.{{ target.schema }}.adf_pipeline_runs`
147 | VALUES
148 | (
149 | '/SUBSCRIPTIONS/9FFACTORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5',
150 | '9f81a5eb-db6082b73db5',
151 | NULL,
152 | '9f81a5eb-a73e-db6082b73db5',
153 | 'PL-FEMFILESCSingZone-N',
154 | STRUCT(
155 | 'FILESCSV' AS ENVIRONMENT,
156 | 'FULL' AS RESET_TYPE,
157 | NULL AS DAYS_BEFORE
158 | ),
159 | STRUCT(
160 | 'cf613b7b-e04dc0cfbe8' AS id,
161 | 'cf613b7b-e0c4924dc0cfbe8' AS name,
162 | 'PipelineActivity' AS invokedByType,
163 | 'PL-OrquestradorSooLoadingControl' AS pipelineName,
164 | '598ec8ee-604c-47c7-a3c0-e049c4b8c558' AS pipelineRunId
165 | ),
166 | TIMESTAMP('2024-08-20T03:30:06.606107Z'),
167 | TIMESTAMP('2024-08-20T03:47:03.986522Z'),
168 | 1017380,
169 | 'Succeeded',
170 | NULL,
171 | ARRAY>[],
172 | TIMESTAMP('2024-08-20T03:47:03.987938Z'),
173 | ARRAY[],
174 | ARRAY>[],
175 | TRUE
176 | ),
177 | (
178 | '/SUBSCRIPTIONS/9/PROVIDERS/MO-ADF-D/pipelinerua1-9313-73fa5c0a3f0e',
179 | '64c7a8c7-30313-73fa5c0a3f0e',
180 | NULL,
181 | '64c7a8c7-30313-73fa5c0a3f0e',
182 | 'NET_REC_DAILY_PRICE_UPDATE',
183 | STRUCT(
184 | NULL AS ENVIRONMENT,
185 | NULL AS RESET_TYPE,
186 | '1' AS DAYS_BEFORE
187 | ),
188 | STRUCT(
189 | '08584774516819036014561066769CU22' AS id,
190 | 'NET_REC_DAILY_UPDATE' AS name,
191 | 'ScheduleTrigger' AS invokedByType,
192 | NULL AS pipelineName,
193 | NULL AS pipelineRunId
194 | ),
195 | TIMESTAMP('2024-08-20T12:00:31.272826Z'),
196 | TIMESTAMP('2024-08-20T13:15:52.654549Z'),
197 | 4521381,
198 | 'Succeeded',
199 | NULL,
200 | ARRAY>[],
201 | TIMESTAMP('2024-08-20T13:15:52.655027Z'),
202 | ARRAY['ted'],
203 | ARRAY>[],
204 | TRUE
205 | );
206 |
207 |
208 | {% endset %}
209 |
210 | {% do run_query(create_table) %}
211 | {% do log("finished creating table adf_pipeline_runs", info=true) %}
212 |
213 | {% do run_query(insert_table) %}
214 | {% do log("finished insert table adf_pipeline_runs", info=true) %}
215 | {%- endmacro -%}
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dag Monitoring
2 | This package allows you to easily monitor your DAGs from well known orchestration tools, providing helpful info to improve your data pipeline.
3 |
4 | # Table of Contents
5 |
6 | - [Before creating a branch](#Before-creating-a-branch)
7 | - [Revisions](#revisions)
8 | - [Tools supported](#tools-supported)
9 | - [Quickstart](#:running:-Quickstart)
10 | - [requirements](#requirements)
11 | - [Profiles](#profiles)
12 | - [Installation](#installation)
13 | - [Configuring models package](#Configuring-models-package)
14 | - [Airflow metadata](#Airflow-metadata)
15 | - [ADF metadata](#ADF-metadata)
16 | - [Databricks Workflow Data](#Databricks-Workflow-Data)
17 | - [Integration tests](#Integration-tests)
18 |
19 | # Before creating a branch
20 |
21 | Pay attention, it is very important to know if your modification to this repository is a release/major (breaking changes), a feature/minor (functionalities) or a patch(to fix bugs). With that information, create your branch name like this:
22 |
23 | - `release/` or `major/` or `Release/` or `Major/`
24 | - `feature/` or `minor/` with capitalised letters work as well
25 | - `patch/` or `fix/` or `hotfix/` with capitalised letters work as well
26 |
27 | # Revisions
28 | 0.3.0 - For Snowflake warehouses
29 | 0.3.1 - For Redshift warehouses
30 |
31 | ## Tools supported:
32 |
33 | - Azure Datafactory
34 | - Apache Airflow
35 | - Databricks Workflows
36 |
37 | If you are cloning this repository, we recommend that the clone happens via SSH key.
38 |
39 | # :running: Quickstart
40 |
41 | New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/).
42 |
43 | ## Requirements
44 | dbt version
45 | * ```dbt version >= 1.3.0```
46 |
47 | dbt_utils package. Read more about them [here](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/).
48 | * ```dbt-labs/dbt_utils version: 1.1.1```
49 |
50 | This package works for most of EL processes and depends on the metadata generated by the respective platform.
51 |
52 | ## Profiles
53 | Using as example a profile for Databricks workflows, when testing the repository, it is necessary to fill the profiles information below by changing the `example.env` to `.env`, and filling its variables with the adequate values.
54 |
55 | ```
56 | dbt_dag_monitoring:
57 | target: "{{ env_var('DBT_DEFAULT_TARGET', 'dev')}}"
58 | outputs:
59 | dev:
60 | type: databricks
61 | catalog: "{{ env_var('DEV_CATALOG_NAME')}}"
62 | schema: "{{ env_var('DEV_SCHEMA_NAME')}}"
63 | host: "{{ env_var('DEV_HOST') }}"
64 | http_path: "{{ env_var('DEV_HTTP_PATH') }}"
65 | token: "{{ env_var('DEV_TOKEN') }}"
66 | threads: 16
67 | ansi_mode: false
68 | ```
69 |
70 | When it is done, there are two necessary commands for working locally without difficulties:
71 |
72 | `chmod +x setup.sh`
73 |
74 | and
75 |
76 | `source setup.sh`
77 |
78 | ## Installation
79 |
80 | 1. Include this package in your `packages.yml` file.
81 | ```yaml
82 | packages:
83 | - git: "https://github.com/techindicium/dbt-dag-monitoring.git"
84 | revision: # 0.3.0 or 0.3.1
85 | ```
86 |
87 | 2. Run `dbt deps` to install the package.
88 |
89 | ## Configuring models package
90 |
91 | ### Models:
92 | The functioning of the package on the desired platform depends on the configuration of dbt_project.yml. To define which platform we are transforming the data to, the enabled field must be "true", for the desired platform, and "false" for all others.
93 |
94 | ### Vars:
95 | Then, we define the variables: in the first line we determine which platform dbt should consider the variables for. In the third line we define which data the monitoring will be based on, and in the following lines we define which database and data schema will be used, according to the platform defined above.
96 | ```
97 | models:
98 | dbt_dag_monitoring:
99 | marts:
100 | +materialized: table
101 | staging:
102 | +materialized: view
103 | airflow_sources:
104 | +enabled: true
105 | adf_sources:
106 | +enabled: false
107 | databricks_workflow_sources:
108 | +enabled: false
109 |
110 | sources:
111 | dbt_dag_monitoring:
112 | staging:
113 | adf_sources:
114 | raw_adf_monitoring:
115 | +enabled: false
116 | databricks_workflow_sources:
117 | raw_databricks_workflow_monitoring:
118 | +enabled: false
119 | airflow_sources:
120 | raw_airflow_monitoring:
121 | +enabled: true
122 | ```
123 | ...
124 |
125 | When the vars are added to the dbt_project, it suppresses dbt compilation errors.
126 | ```
127 | vars:
128 | dbt_dag_monitoring:
129 | enabled_sources: ['airflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
130 | dag_monitoring_start_date: cast('2023-01-01' as date)
131 | dag_monitoring_airflow_database: #landing_zone
132 | dag_monitoring_airflow_schema: #airflow_metadata
133 | dag_monitoring_databricks_database: #raw_catalog
134 | dag_monitoring_databricks_schema: #databricks_metadata
135 | dag_monitoring_adf_database: #raw
136 | dag_monitoring_adf_schema: #adf_metadata
137 | ```
138 |
139 | ## Airflow metadata
140 |
141 | The airflow sources are based on the Airflow metadata database, any form of extraction from it should suffice.
142 |
143 | The package is consistent with any type of EL process, and the data warehouse must have the following tables:
144 | - dag_run
145 | - task_instance
146 | - task_fail
147 | - dag
148 |
149 | ## ADF Metadata
150 |
151 | The adf models rely on sources extracted by our adf tap:
152 |
153 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/6b9c9e970518db1e21086ec75a7442d1b6978c93/plugins/custom/tap-azuredatafactory/?at=featuer%2Fadd_adf_extractor
154 |
155 | ## Databricks Workflow Data
156 | The databricks workflow models rely on sources extracted by our adf tap:
157 |
158 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/main/plugins/custom/tap-databricksops/
159 |
160 | specifically the streams:
161 |
162 | - jobs
163 | - job_runs
164 |
165 | ## Integration tests
166 |
167 | > [!IMPORTANT]
168 | > When using the integration tests folder, for the sake of the continuous integration code run seamlessly, you can NOT change in your pull request the default value of the vars, models and sources being Databricks inside the integration_tests/dbt_project.yml. Following the source pattern is important.
169 |
170 | More information on the README.md in integration_tests folder.
171 |
172 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - opened
7 | - synchronize
8 | branches:
9 | - main
10 |
11 | env:
12 | DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests
13 | DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests
14 | DBT_DEFAULT_TARGET: databricks
15 | DEV_CATALOG_NAME: cdi_dev
16 | DEV_SCHEMA_NAME: ci_dbt_dag_monitoring
17 | DEV_HOST: ${{ secrets.DATABRICKS_HOST }}
18 | DEV_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
19 | DEV_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
20 |
21 | BIGQUERY_DATASET: ci_dbt_dag_monitoring
22 | BIGQUERY_PROJECT: indicium-sandbox
23 | DBT_JOB_TIMEOUT: 300
24 | DBT_THREADS: 16
25 | DBT_JOB_RETRIES: 1
26 |
27 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT}}
28 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
29 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD}}
30 | SNOWFLAKE_ROLE: INTERNAL_PRODUCTS_CICD
31 | SNOWFLAKE_DATABASE: SANDBOX
32 | SNOWFLAKE_WAREHOUSE: SANDBOX_WAREHOUSE
33 | SNOWFLAKE_SCHEMA: ci_dbt_dag_monitoring
34 |
35 |
36 | jobs:
37 | dbt-checks:
38 | runs-on: ubuntu-latest
39 |
40 | steps:
41 | - name: Checkout repository
42 | uses: actions/checkout@v2
43 |
44 | - name: Set up Python
45 | uses: actions/setup-python@v2
46 | with:
47 | python-version: '3.8'
48 |
49 | - name: Install dependencies
50 | run: |
51 | python -m pip install --upgrade pip
52 | pip install -r requirements.txt
53 |
54 | - name: Authenticate to GCP
55 | uses: "google-github-actions/auth@v2"
56 | with:
57 | credentials_json: "${{ secrets.BIGQUERY_AUTH }}"
58 |
59 | - name: Run dbt debug for Databricks
60 | run: dbt debug
61 |
62 | - name: Run dbt debug for BigQuery
63 | run: dbt debug --target bigquery
64 |
65 | - name: Run dbt debug for Snowflake
66 | run: dbt debug --target snowflake
67 |
68 | - name: dbt deps
69 | run: dbt deps
70 |
71 | - name: dbt compile
72 | run: dbt compile
73 |
74 | integration-test:
75 | runs-on: ubuntu-latest
76 | steps:
77 | - name: Checkout repository
78 | uses: actions/checkout@v2
79 |
80 | - name: Set up Python
81 | uses: actions/setup-python@v2
82 | with:
83 | python-version: '3.8'
84 |
85 | - name: Install dependencies
86 | run: |
87 | python -m pip install --upgrade pip
88 | pip install -r requirements.txt
89 |
90 | - name: enter integration tests
91 | run: |
92 | cd integration_tests/
93 |
94 | - name: Authenticate to GCP
95 | uses: "google-github-actions/auth@v2"
96 | with:
97 | credentials_json: "${{ secrets.BIGQUERY_AUTH }}"
98 |
99 | - name: Run dbt integration tests Databricks source in Databricks connection
100 | run: |
101 | dbt deps --target databricks
102 |
103 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target databricks
104 |
105 | dbt seed --target databricks
106 |
107 | dbt run-operation jobs --target databricks
108 | dbt run-operation job_runs --target databricks
109 |
110 | dbt test -s source:* --target databricks
111 |
112 | dbt build --target databricks
113 |
114 | - name: switch enabled sources for adf source
115 | run:
116 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh
117 |
118 | - name: Run dbt tasks for ADF source in Databricks connection
119 | run: |
120 | dbt deps
121 |
122 | dbt seed --target databricks
123 |
124 | dbt run-operation adf_pipeline_runs --target databricks
125 | dbt run-operation adf_triggers --target databricks
126 |
127 | dbt test -s source:* --target databricks
128 |
129 | dbt build --target databricks
130 |
131 | - name: switch enabled sources for airflow source
132 | run: |
133 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh
134 |
135 | - name: Run dbt tasks for Airflow source in Databricks connection
136 | run: |
137 | dbt deps
138 |
139 | dbt seed --target databricks
140 |
141 | dbt test -s source:* --target databricks
142 |
143 | dbt build --target databricks
144 |
145 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}'
146 |
147 | - name: change databricks database to bigquery database
148 | run: |
149 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh
150 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks cdi_dev indicium-sandbox
151 |
152 | - name: Run dbt integration tests Databricks source in BigQuery connection
153 | run: |
154 | dbt deps --target bigquery
155 |
156 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery
157 |
158 | dbt run-operation jobs --target bigquery
159 | dbt run-operation job_runs --target bigquery
160 |
161 | dbt test -s source:* --target bigquery
162 |
163 | dbt build --exclude-resource-type seed --target bigquery
164 |
165 | - name: switch enabled sources for adf source
166 | run: |
167 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh adf cdi_dev indicium-sandbox
168 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh
169 |
170 | - name: Run dbt integration tests ADF source in BigQuery connection
171 | run: |
172 | dbt deps
173 |
174 | dbt seed -s adf_pipelines --target bigquery
175 |
176 | dbt run-operation adf_activity_runs --target bigquery
177 | dbt run-operation adf_pipeline_runs --target bigquery
178 | dbt run-operation adf_triggers --target bigquery
179 |
180 | dbt test -s source:* --target bigquery
181 |
182 | dbt build --exclude-resource-type seed --target bigquery
183 |
184 | - name: switch enabled sources for airflow source
185 | run: |
186 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh
187 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow cdi_dev indicium-sandbox
188 |
189 | - name: Run dbt tasks for Airflow source in BigQuery connection
190 | run: |
191 | dbt deps
192 |
193 | dbt run-operation seed__dag_run --target bigquery
194 | dbt run-operation seed__dag --target bigquery
195 | dbt run-operation seed__task_fail --target bigquery
196 | dbt run-operation seed__task_instance --target bigquery
197 |
198 | dbt test -s source:* --target bigquery
199 |
200 | dbt build --exclude-resource-type seed --target bigquery
201 |
202 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery
203 |
204 | - name: change BigQuery database to Snowflake database
205 | run: |
206 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks indicium-sandbox sandbox
207 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh
208 |
209 | - name: Run dbt integration tests Databricks source in Snowflake connection
210 | run: |
211 | dbt deps
212 |
213 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake
214 |
215 | dbt run-operation jobs --target snowflake
216 | dbt run-operation job_runs --target snowflake
217 |
218 | dbt test -s source:* --target snowflake
219 |
220 | dbt build --exclude-resource-type seed --target snowflake
221 |
222 | - name: switch enabled sources for airflow source
223 | run: |
224 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow indicium-sandbox sandbox
225 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh
226 |
227 | - name: Run dbt integration tests Airflow source in Snowflake connection
228 | env:
229 | DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests
230 | DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests
231 | run: |
232 | dbt deps
233 |
234 | dbt seed -s seeds/airflow/* --target snowflake
235 |
236 | dbt test -s source:* --target snowflake
237 |
238 | dbt build --exclude-resource-type seed --target snowflake
239 |
240 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake
241 |
242 |
--------------------------------------------------------------------------------
/integration_tests/macros/adf_triggers.sql:
--------------------------------------------------------------------------------
1 | {% macro adf_triggers() -%}
2 | {{ return(adapter.dispatch('adf_triggers')()) }}
3 | {%- endmacro %}
4 |
5 | {%- macro default__adf_triggers() -%}
6 | {% set create_table %}
7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers (
8 | id STRING,
9 | name STRING,
10 | type STRING,
11 | properties STRUCT<
12 | annotations ARRAY,
13 | pipelines ARRAY<
14 | STRUCT<
15 | pipelineReference STRUCT<
16 | referenceName STRING,
17 | type STRING
18 | >,
19 | parameters STRUCT<
20 | days_before STRING,
21 | environment STRING,
22 | reset_type STRING,
23 | Job_ID STRING,
24 | DatabricksWorkspaceID STRING,
25 | WaitRecheckSeconds INT
26 | >
27 | >
28 | >,
29 | type STRING,
30 | typeProperties STRUCT<
31 | recurrence STRUCT<
32 | frequency STRING,
33 | interval INT,
34 | startTime STRING,
35 | timeZone STRING,
36 | schedule STRUCT<
37 | minutes ARRAY,
38 | hours ARRAY,
39 | weekDays ARRAY,
40 | monthDays ARRAY
41 | >
42 | >,
43 | parentTrigger STRING,
44 | requestedStartTime STRING,
45 | requestedEndTime STRING,
46 | rerunConcurrency INT
47 | >,
48 | runtimeState STRING
49 | >
50 | );
51 | {% endset %}
52 |
53 | {% set insert_table %}
54 | insert into `{{ target.database }}`.{{ target.schema }}.adf_triggers VALUES
55 | (
56 | '/subscriptions/9f07555crvices-atastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly',
57 | 'TR-fd-prod-dion-monthly',
58 | 'Microsoft.Dataes/triggers',
59 | NAMED_STRUCT(
60 | 'annotations', ARRAY('fraud-detection', 'prod', 'duration-estimation'),
61 | 'pipelines', ARRAY(
62 | NAMED_STRUCT(
63 | 'pipelineReference', NAMED_STRUCT(
64 | 'referenceName', 'fd-prod-duration_estimation',
65 | 'type', 'PipelineReference'
66 | ),
67 | 'parameters', NAMED_STRUCT(
68 | 'days_before', NULL,
69 | 'environment', NULL,
70 | 'reset_type', NULL,
71 | 'Job_ID', NULL,
72 | 'DatabricksWorkspaceID', NULL,
73 | 'WaitRecheckSeconds', NULL
74 | )
75 | )
76 | ),
77 | 'type', 'ScheduleTrigger',
78 | 'typeProperties', NAMED_STRUCT(
79 | 'recurrence', NAMED_STRUCT(
80 | 'frequency', 'Month',
81 | 'interval', 1,
82 | 'startTime', '2020-10-14T04:30:00',
83 | 'timeZone', 'E. South America Standard Time',
84 | 'schedule', NAMED_STRUCT(
85 | 'minutes', ARRAY(30),
86 | 'hours', ARRAY(4),
87 | 'weekDays', NULL,
88 | 'monthDays', ARRAY(14)
89 | )
90 | ),
91 | 'parentTrigger', NULL,
92 | 'requestedStartTime', NULL,
93 | 'requestedEndTime', NULL,
94 | 'rerunConcurrency', NULL
95 | ),
96 | 'runtimeState', NULL
97 | )
98 | ),
99 | (
100 | '/subscriptions/TR-fd-dev-predict-main',
101 | 'TR-fd-dev-predict-main',
102 | 'Microsoft.Dats/triggers',
103 | NAMED_STRUCT(
104 | 'annotations', ARRAY('fraud-detection', 'dev', 'predict'),
105 | 'pipelines', ARRAY(
106 | NAMED_STRUCT(
107 | 'pipelineReference', NAMED_STRUCT(
108 | 'referenceName', 'fd-dev-predict-main',
109 | 'type', 'PipelineReference'
110 | ),
111 | 'parameters', NAMED_STRUCT(
112 | 'days_before', NULL,
113 | 'environment', NULL,
114 | 'reset_type', NULL,
115 | 'Job_ID', NULL,
116 | 'DatabricksWorkspaceID', NULL,
117 | 'WaitRecheckSeconds', NULL
118 | )
119 | )
120 | ),
121 | 'type', 'ScheduleTrigger',
122 | 'typeProperties', NAMED_STRUCT(
123 | 'recurrence', NAMED_STRUCT(
124 | 'frequency', 'Week',
125 | 'interval', 1,
126 | 'startTime', '2021-01-26T21:50:00',
127 | 'timeZone', 'E. South America Standard Time',
128 | 'schedule', NAMED_STRUCT(
129 | 'minutes', ARRAY(0),
130 | 'hours', ARRAY(5),
131 | 'weekDays', ARRAY('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
132 | 'monthDays', NULL
133 | )
134 | ),
135 | 'parentTrigger', NULL,
136 | 'requestedStartTime', NULL,
137 | 'requestedEndTime', NULL,
138 | 'rerunConcurrency', NULL
139 | ),
140 | 'runtimeState', 'Stopped'
141 | )
142 | );
143 | {% endset %}
144 |
145 | {% do run_query(create_table) %}
146 | {% do log("finished creating table triggers", info=true) %}
147 |
148 | {% do run_query(insert_table) %}
149 | {% do log("finished insert table triggers", info=true) %}
150 | {%- endmacro -%}
151 |
152 |
153 | {%- macro bigquery__adf_triggers() -%}
154 | {% set create_table %}
155 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers (
156 | id STRING,
157 | name STRING,
158 | type STRING,
159 | properties STRUCT<
160 | annotations ARRAY,
161 | pipelines ARRAY<
162 | STRUCT<
163 | pipelineReference STRUCT<
164 | referenceName STRING,
165 | type STRING
166 | >,
167 | parameters STRUCT<
168 | days_before STRING,
169 | environment STRING,
170 | reset_type STRING,
171 | Job_ID STRING,
172 | DatabricksWorkspaceID STRING,
173 | WaitRecheckSeconds INT
174 | >
175 | >
176 | >,
177 | type STRING,
178 | typeProperties STRUCT<
179 | recurrence STRUCT<
180 | frequency STRING,
181 | `interval` INT,
182 | startTime STRING,
183 | timeZone STRING,
184 | schedule STRUCT<
185 | minutes ARRAY,
186 | hours ARRAY,
187 | weekDays ARRAY,
188 | monthDays ARRAY
189 | >
190 | >,
191 | parentTrigger STRING,
192 | requestedStartTime STRING,
193 | requestedEndTime STRING,
194 | rerunConcurrency INT
195 | >,
196 | runtimeState STRING
197 | >
198 | );
199 |
200 | {% endset %}
201 |
202 | {% set insert_table %}
203 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_triggers
204 | VALUES
205 | (
206 | '/subscriptions/9f0755tories/tlo-datastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly',
207 | 'TR-fd-prod-ation-monthly',
208 | 'Microsoft.Dattories/triggers',
209 | STRUCT(
210 | ARRAY['fraud-detection', 'prod', 'duration-estimation'],
211 | ARRAY,
216 | parameters STRUCT<
217 | days_before STRING,
218 | environment STRING,
219 | reset_type STRING,
220 | Job_ID STRING,
221 | DatabricksWorkspaceID STRING,
222 | WaitRecheckSeconds INT64
223 | >
224 | >>[
225 | STRUCT(
226 | STRUCT(
227 | 'fd-prod-duration_estimation',
228 | 'PipelineReference'
229 | ),
230 | STRUCT(
231 | NULL,
232 | NULL,
233 | NULL,
234 | NULL,
235 | NULL,
236 | NULL
237 | )
238 | )
239 | ],
240 | 'ScheduleTrigger',
241 | STRUCT(
242 | STRUCT(
243 | 'Month',
244 | 1,
245 | '2020-10-14T04:30:00',
246 | 'E. South America Standard Time',
247 | STRUCT(
248 | ARRAY[30],
249 | ARRAY[4],
250 | NULL,
251 | ARRAY[14]
252 | )
253 | ),
254 | NULL,
255 | NULL,
256 | NULL,
257 | NULL
258 | ),
259 | NULL
260 | )
261 | ),
262 | (
263 | '/subscriptions/y/factories/tlo-datastudio-adf-d/triggers/TR-fd-dev-predict-main',
264 | 'TR-fd-dev-predict-main',
265 | 'Microsoft.DataFactory/factories/triggers',
266 | STRUCT(
267 | ARRAY['fraud-detection', 'dev', 'predict'],
268 | ARRAY,
273 | parameters STRUCT<
274 | days_before STRING,
275 | environment STRING,
276 | reset_type STRING,
277 | Job_ID STRING,
278 | DatabricksWorkspaceID STRING,
279 | WaitRecheckSeconds INT64
280 | >
281 | >>[
282 | STRUCT(
283 | STRUCT(
284 | 'fd-dev-predict-main',
285 | 'PipelineReference'
286 | ),
287 | STRUCT(
288 | NULL,
289 | NULL,
290 | NULL,
291 | NULL,
292 | NULL,
293 | NULL
294 | )
295 | )
296 | ],
297 | 'ScheduleTrigger',
298 | STRUCT(
299 | STRUCT(
300 | 'Week',
301 | 1,
302 | '2021-01-26T21:50:00',
303 | 'E. South America Standard Time',
304 | STRUCT(
305 | ARRAY[0],
306 | ARRAY[5],
307 | ARRAY['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
308 | NULL
309 | )
310 | ),
311 | NULL,
312 | NULL,
313 | NULL,
314 | NULL
315 | ),
316 | 'Stopped'
317 | )
318 | );
319 |
320 | {% endset %}
321 |
322 | {% do run_query(create_table) %}
323 | {% do log("finished creating table triggers", info=true) %}
324 |
325 | {% do run_query(insert_table) %}
326 | {% do log("finished insert table triggers", info=true) %}
327 | {%- endmacro -%}
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/integration_tests/macros/jobs.sql:
--------------------------------------------------------------------------------
1 | {% macro jobs() -%}
2 | {{ return(adapter.dispatch('jobs')()) }}
3 | {%- endmacro %}
4 |
5 | {% macro databricks__jobs() %}
6 | {% set create_table %}
7 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs (
8 | created_time BIGINT,
9 | creator_user_name STRING,
10 | job_id BIGINT,
11 | settings STRUCT<
12 | email_notifications STRUCT<
13 | on_failure ARRAY,
14 | no_alert_for_skipped_runs BOOLEAN
15 | >,
16 | format STRING,
17 | max_concurrent_runs BIGINT,
18 | name STRING,
19 | schedule STRUCT<
20 | pause_status STRING,
21 | quartz_cron_expression STRING,
22 | timezone_id STRING
23 | >,
24 | tags STRUCT<
25 | dev STRING,
26 | env STRING
27 | >,
28 | timeout_seconds bigint,
29 | trigger STRUCT<
30 | file_arrival STRUCT<
31 | url STRING
32 | >,
33 | pause_status STRING
34 | >
35 | >,
36 | insertedDate TIMESTAMP
37 | );
38 | {% endset %}
39 |
40 | {% set insert_table %}
41 |
42 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES
43 | (
44 | CAST(1722606667504 AS BIGINT),
45 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
46 | CAST(466340877826952 AS BIGINT),
47 | NAMED_STRUCT(
48 | 'email_notifications', NAMED_STRUCT(
49 | 'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
50 | 'no_alert_for_skipped_runs', NULL
51 | ),
52 | 'format', 'MULTI_TASK',
53 | 'max_concurrent_runs', 1,
54 | 'name', '[prod] core_dag_monitoring_data_transformation_dbt_job',
55 | 'schedule', NAMED_STRUCT(
56 | 'pause_status', NULL,
57 | 'quartz_cron_expression', NULL,
58 | 'timezone_id', NULL
59 | ),
60 | 'tags',NAMED_STRUCT(
61 | 'dev', NULL,
62 | 'env','prod'
63 | ),
64 | 'timeout_seconds', 0,
65 | 'trigger',NAMED_STRUCT(
66 | 'file_arrival',NAMED_STRUCT(
67 | 'url',NULL
68 | ),
69 | 'paused_status',NULL
70 | )
71 | ),
72 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
73 | ),
74 | (
75 | CAST(1722544845800 AS BIGINT),
76 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
77 | CAST(823250232903490 AS BIGINT),
78 | NAMED_STRUCT(
79 | 'email_notifications',NAMED_STRUCT(
80 | 'on_failure', ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
81 | 'no_alert_for_skipped_runs', NULL
82 | ),
83 | 'format','MULTI_TASK',
84 | 'max_concurrent_runs',1,
85 | 'name','[prod] core_dag_monitoring_extraction_meltano_job',
86 | 'schedule',NAMED_STRUCT(
87 | 'pause_status','UNPAUSED',
88 | 'quartz_cron_expression','0 0 0/3 * * ? *',
89 | 'timezone_id','UTC'
90 | ),
91 | 'tags',NAMED_STRUCT(
92 | 'dev', NULL,
93 | 'env','prod'
94 | ),
95 | 'timeout_seconds',0,
96 | 'trigger',NAMED_STRUCT(
97 | 'file_arrival',NAMED_STRUCT(
98 | 'url',NULL
99 | ),
100 | 'paused_status',NULL
101 | )
102 | ),
103 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
104 | ),
105 | (
106 | CAST(1722538441265 AS BIGINT),
107 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
108 | CAST(790689006770532 AS BIGINT),
109 | NAMED_STRUCT(
110 | 'email_notifications',NAMED_STRUCT(
111 | 'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
112 | 'no_alert_for_skipped_runs',NULL
113 | ),
114 | 'format','MULTI_TASK',
115 | 'max_concurrent_runs',1,
116 | 'name','[prod] investment_postgres_extraction_spark_job',
117 | 'schedule',NAMED_STRUCT(
118 | 'pause_status','UNPAUSED',
119 | 'quartz_cron_expression','0 0 0/4 * * ? *',
120 | 'timezone_id','UTC'
121 | ),
122 | 'tags',NAMED_STRUCT(
123 | 'dev', NULL,
124 | 'env','prod'
125 | ),
126 | 'timeout_seconds',0,
127 | 'trigger',NAMED_STRUCT(
128 | 'file_arrival',NAMED_STRUCT(
129 | 'url',NULL
130 | ),
131 | 'paused_status',NULL
132 | )
133 | ),
134 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
135 | );
136 |
137 | {% endset %}
138 |
139 | {% do run_query(create_table) %}
140 | {% do log("finished creating table jobs", info=true) %}
141 |
142 | {% do run_query(insert_table) %}
143 | {% do log("finished insert table jobs ", info=true) %}
144 |
145 |
146 | {% endmacro %}
147 |
148 | {% macro bigquery__jobs() %}
149 | {% set create_table %}
150 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs (
151 | created_time BIGINT,
152 | creator_user_name STRING,
153 | job_id BIGINT,
154 | settings STRUCT<
155 | email_notifications STRUCT<
156 | on_failure ARRAY,
157 | no_alert_for_skipped_runs BOOLEAN
158 | >,
159 | format STRING,
160 | max_concurrent_runs BIGINT,
161 | name STRING,
162 | schedule STRUCT<
163 | pause_status STRING,
164 | quartz_cron_expression STRING,
165 | timezone_id STRING
166 | >,
167 | tags STRUCT<
168 | dev STRING,
169 | env STRING
170 | >,
171 | timeout_seconds bigint,
172 | trigger STRUCT<
173 | file_arrival STRUCT<
174 | url STRING
175 | >,
176 | pause_status STRING
177 | >
178 | >,
179 | insertedDate TIMESTAMP
180 | );
181 | {% endset %}
182 |
183 | {% set insert_table %}
184 |
185 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES
186 | (
187 | CAST(1722606667504 AS INT64),
188 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
189 | CAST(466340877826952 AS INT64),
190 | STRUCT(
191 | STRUCT(
192 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
193 | NULL AS no_alert_for_skipped_runs
194 | ) AS email_notifications,
195 | 'MULTI_TASK' AS format,
196 | 1 AS max_concurrent_runs,
197 | '[prod] core_dag_monitoring_data_transformation_dbt_job' AS name,
198 | STRUCT(
199 | NULL AS pause_status,
200 | NULL AS quartz_cron_expression,
201 | NULL AS timezone_id
202 | ) AS schedule,
203 | STRUCT(
204 | NULL AS dev,
205 | 'prod' AS env
206 | ) AS tags,
207 | 0 AS timeout_seconds,
208 | STRUCT(
209 | STRUCT(
210 | NULL AS url
211 | ) AS file_arrival,
212 | NULL AS paused_status
213 | ) AS trigger
214 | ),
215 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
216 | ),
217 | (
218 | CAST(1722544845800 AS INT64),
219 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
220 | CAST(823250232903490 AS INT64),
221 | STRUCT(
222 | STRUCT(
223 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
224 | NULL AS no_alert_for_skipped_runs
225 | ) AS email_notifications,
226 | 'MULTI_TASK' AS format,
227 | 1 AS max_concurrent_runs,
228 | '[prod] core_dag_monitoring_extraction_meltano_job' AS name,
229 | STRUCT(
230 | 'UNPAUSED' AS pause_status,
231 | '0 0 0/3 * * ? *' AS quartz_cron_expression,
232 | 'UTC' AS timezone_id
233 | ) AS schedule,
234 | STRUCT(
235 | NULL AS dev,
236 | 'prod' AS env
237 | ) AS tags,
238 | 0 AS timeout_seconds,
239 | STRUCT(
240 | STRUCT(
241 | NULL AS url
242 | ) AS file_arrival,
243 | NULL AS paused_status
244 | ) AS trigger
245 | ),
246 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
247 | ),
248 | (
249 | CAST(1722538441265 AS INT64),
250 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
251 | CAST(790689006770532 AS INT64),
252 | STRUCT(
253 | STRUCT(
254 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
255 | NULL AS no_alert_for_skipped_runs
256 | ) AS email_notifications,
257 | 'MULTI_TASK' AS format,
258 | 1 AS max_concurrent_runs,
259 | '[prod] investment_postgres_extraction_spark_job' AS name,
260 | STRUCT(
261 | 'UNPAUSED' AS pause_status,
262 | '0 0 0/4 * * ? *' AS quartz_cron_expression,
263 | 'UTC' AS timezone_id
264 | ) AS schedule,
265 | STRUCT(
266 | NULL AS dev,
267 | 'prod' AS env
268 | ) AS tags,
269 | 0 AS timeout_seconds,
270 | STRUCT(
271 | STRUCT(
272 | NULL AS url
273 | ) AS file_arrival,
274 | NULL AS paused_status
275 | ) AS trigger
276 | ),
277 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
278 | );
279 |
280 | {% endset %}
281 |
282 | {% do run_query(create_table) %}
283 | {% do log("finished creating table jobs", info=true) %}
284 |
285 | {% do run_query(insert_table) %}
286 | {% do log("finished insert table jobs ", info=true) %}
287 |
288 |
289 | {% endmacro %}
290 |
291 | {% macro snowflake__jobs() %}
292 | {% set create_table %}
293 | CREATE OR REPLACE TABLE {{ target.database }}.{{ target.schema }}.jobs (
294 | created_time BIGINT,
295 | creator_user_name VARCHAR,
296 | job_id BIGINT,
297 | settings VARIANT,
298 | insertedDate TIMESTAMP
299 | );
300 | {% endset %}
301 |
302 | {% set insert_table %}
303 |
304 | INSERT INTO {{ target.database }}.{{ target.schema }}.jobs SELECT
305 | CAST(1722606667504 AS BIGINT),
306 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR),
307 | CAST(466340877826952 AS BIGINT),
308 | OBJECT_CONSTRUCT(
309 | 'email_notifications', OBJECT_CONSTRUCT(
310 | 'on_failure', ARRAY_CONSTRUCT('produtosgenie.net'),
311 | 'no_alert_for_skipped_runs', NULL
312 | ),
313 | 'format', 'MULTI_TASK',
314 | 'max_concurrent_runs', 1,
315 | 'name', '[prod] coreion_dbt_job',
316 | 'schedule', OBJECT_CONSTRUCT(
317 | 'pause_status', NULL,
318 | 'quartz_cron_expression', NULL,
319 | 'timezone_id', NULL
320 | ),
321 | 'tags', OBJECT_CONSTRUCT(
322 | 'dev', NULL,
323 | 'env', 'prod'
324 | ),
325 | 'timeout_seconds', 0,
326 | 'trigger', OBJECT_CONSTRUCT(
327 | 'file_arrival', OBJECT_CONSTRUCT(
328 | 'url', NULL
329 | ),
330 | 'paused_status', NULL
331 | )
332 | ),
333 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
334 | UNION ALL
335 | SELECT
336 | CAST(1722544845800 AS BIGINT),
337 | CAST('13bc3f4b44571518ae' AS VARCHAR),
338 | CAST(823250232903490 AS BIGINT),
339 | OBJECT_CONSTRUCT(
340 | 'email_notifications', OBJECT_CONSTRUCT(
341 | 'on_failure', ARRAY_CONSTRUCT('prod.opsgenie.net'),
342 | 'no_alert_for_skipped_runs', NULL
343 | ),
344 | 'format', 'MULTI_TASK',
345 | 'max_concurrent_runs', 1,
346 | 'name', '[prod] cltano_job',
347 | 'schedule', OBJECT_CONSTRUCT(
348 | 'pause_status', 'UNPAUSED',
349 | 'quartz_cron_expression', '0 0 0/3 * * ? *',
350 | 'timezone_id', 'UTC'
351 | ),
352 | 'tags', OBJECT_CONSTRUCT(
353 | 'dev', NULL,
354 | 'env', 'prod'
355 | ),
356 | 'timeout_seconds', 0,
357 | 'trigger', OBJECT_CONSTRUCT(
358 | 'file_arrival', OBJECT_CONSTRUCT(
359 | 'url', NULL
360 | ),
361 | 'paused_status', NULL
362 | )
363 | ),
364 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
365 | UNION ALL
366 | SELECT
367 | CAST(1722538441265 AS BIGINT),
368 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR),
369 | CAST(790689006770532 AS BIGINT),
370 | OBJECT_CONSTRUCT(
371 | 'email_notifications', OBJECT_CONSTRUCT(
372 | 'on_failure', ARRAY_CONSTRUCT('produtosopsgenie.net'),
373 | 'no_alert_for_skipped_runs', NULL
374 | ),
375 | 'format', 'MULTI_TASK',
376 | 'max_concurrent_runs', 1,
377 | 'name', '[prod] invspark_job',
378 | 'schedule', OBJECT_CONSTRUCT(
379 | 'pause_status', 'UNPAUSED',
380 | 'quartz_cron_expression', '0 0 0/4 * * ? *',
381 | 'timezone_id', 'UTC'
382 | ),
383 | 'tags', OBJECT_CONSTRUCT(
384 | 'dev', NULL,
385 | 'env', 'prod'
386 | ),
387 | 'timeout_seconds', 0,
388 | 'trigger', OBJECT_CONSTRUCT(
389 | 'file_arrival', OBJECT_CONSTRUCT(
390 | 'url', NULL
391 | ),
392 | 'paused_status', NULL
393 | )
394 | ),
395 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
396 | ;
397 |
398 |
399 | {% endset %}
400 |
401 | {% do run_query(create_table) %}
402 | {% do log("finished creating table jobs", info=true) %}
403 |
404 | {% do run_query(insert_table) %}
405 | {% do log("finished insert table jobs ", info=true) %}
406 | {% endmacro %}
--------------------------------------------------------------------------------
/integration_tests/seeds/adf/adf_pipelines.csv:
--------------------------------------------------------------------------------
1 | id,name,type,properties_description,properties_activities_0_name,properties_activities_0_type,properties_activities_1_name,properties_activities_1_type,properties_activities_1_typeProperties_items_value,properties_activities_1_typeProperties_items_type,properties_activities_1_typeProperties_isSequential,properties_activities_1_typeProperties_activities_0_name,properties_activities_1_typeProperties_activities_0_type,properties_activities_1_typeProperties_activities_1_name,properties_activities_1_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_2_name,properties_activities_1_typeProperties_activities_2_type,properties_activities_1_typeProperties_activities_2_typeProperties_source_type,properties_activities_1_typeProperties_activities_2_typeProperties_sink_type,properties_activities_1_typeProperties_activities_2_inputs_0_referenceName,properties_activities_1_typeProperties_activities_2_inputs_0_type,properties_activities_1_typeProperties_activities_2_outputs_0_referenceName,properties_activities_1_typeProperties_activities_2_outputs_0_type,properties_activities_1_typeProperties_activities_3_name,properties_activities_1_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_4_name,properties_activities_1_typeProperties_activities_4_type,properties_activities_1_typeProperties_activities_5_name,properties_activities_1_typeProperties_activities_5_type,properties_activities_2_name,properties_activities_2_type,properties_activities_3_name,properties_activities_3_type,properties_activities_4_name,properties_activities_4_type,properties_activities_5_name,properties_activities_5_type,properties_activities_6_name,properties_activities_6_type,etag,properties_activities_1_typeProperties_activities_6_name,properties_activities_1_typeProperties_activities_6_type,properties_activities_1_typeProperties_activities_7_name,properties_activities_1_typeProperties_activities_7_type,properties_activities_5_typeProperties_items_value,properties_activities_5_typeProperties_items_type,properties_activities_5_typeProperties_isSequential,properties_activities_5_typeProperties_activities_0_name,properties_activities_5_typeProperties_activities_0_type,properties_activities_5_typeProperties_activities_0_typeProperties_source_type,properties_activities_5_typeProperties_activities_0_typeProperties_sink_type,properties_activities_5_typeProperties_activities_0_inputs_0_referenceName,properties_activities_5_typeProperties_activities_0_inputs_0_type,properties_activities_5_typeProperties_activities_0_outputs_0_referenceName,properties_activities_5_typeProperties_activities_0_outputs_0_type,properties_activities_7_name,properties_activities_7_type,properties_activities_8_name,properties_activities_8_type,properties_activities_9_name,properties_activities_9_type,properties_activities_10_name,properties_activities_10_type,properties_activities_11_name,properties_activities_11_type,properties_activities_12_name,properties_activities_12_type,properties_activities_13_name,properties_activities_13_type,properties_activities_0_typeProperties_items_value,properties_activities_0_typeProperties_items_type,properties_activities_0_typeProperties_activities_0_name,properties_activities_0_typeProperties_activities_0_type,properties_activities_0_typeProperties_activities_0_typeProperties_source_type,properties_activities_0_typeProperties_activities_0_typeProperties_sink_type,properties_activities_0_typeProperties_activities_0_inputs_0_referenceName,properties_activities_0_typeProperties_activities_0_inputs_0_type,properties_activities_0_typeProperties_activities_0_outputs_0_referenceName,properties_activities_0_typeProperties_activities_0_outputs_0_type,properties_activities_1_typeProperties_activities_0_typeProperties_source_type,properties_activities_1_typeProperties_activities_0_typeProperties_sink_type,properties_activities_1_typeProperties_activities_0_inputs_0_referenceName,properties_activities_1_typeProperties_activities_0_inputs_0_type,properties_activities_1_typeProperties_activities_0_outputs_0_referenceName,properties_activities_1_typeProperties_activities_0_outputs_0_type,properties_activities_0_typeProperties_activities_1_name,properties_activities_0_typeProperties_activities_1_type,properties_activities_0_typeProperties_activities_2_name,properties_activities_0_typeProperties_activities_2_type,properties_activities_14_name,properties_activities_14_type,properties_activities_15_name,properties_activities_15_type,properties_activities_16_name,properties_activities_16_type,properties_activities_17_name,properties_activities_17_type,properties_activities_18_name,properties_activities_18_type,properties_activities_19_name,properties_activities_19_type,properties_activities_20_name,properties_activities_20_type,properties_activities_21_name,properties_activities_21_type,properties_activities_22_name,properties_activities_22_type,properties_activities_23_name,properties_activities_23_type,properties_activities_24_name,properties_activities_24_type,properties_activities_25_name,properties_activities_25_type,properties_activities_26_name,properties_activities_26_type,properties_activities_27_name,properties_activities_27_type,properties_activities_28_name,properties_activities_28_type,properties_activities_29_name,properties_activities_29_type,properties_activities_30_name,properties_activities_30_type,properties_activities_31_name,properties_activities_31_type,properties_activities_32_name,properties_activities_32_type,properties_activities_33_name,properties_activities_33_type,properties_activities_34_name,properties_activities_34_type,properties_activities_35_name,properties_activities_35_type,properties_activities_36_name,properties_activities_36_type,properties_activities_37_name,properties_activities_37_type,properties_activities_38_name,properties_activities_38_type,properties_activities_39_name,properties_activities_39_type,properties_activities_3_typeProperties_items_value,properties_activities_3_typeProperties_items_type,properties_activities_3_typeProperties_isSequential,properties_activities_3_typeProperties_activities_0_name,properties_activities_3_typeProperties_activities_0_type,properties_activities_3_typeProperties_activities_0_typeProperties_source_type,properties_activities_3_typeProperties_activities_1_name,properties_activities_3_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_1_typeProperties_source_type,properties_activities_1_typeProperties_activities_1_typeProperties_sink_type,properties_activities_1_typeProperties_activities_1_inputs_0_referenceName,properties_activities_1_typeProperties_activities_1_inputs_0_type,properties_activities_1_typeProperties_activities_1_outputs_0_referenceName,properties_activities_1_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_isSequential,properties_activities_0_typeProperties_activities_2_typeProperties_source_type,properties_activities_0_typeProperties_activities_2_typeProperties_sink_type,properties_activities_0_typeProperties_activities_2_inputs_0_referenceName,properties_activities_0_typeProperties_activities_2_inputs_0_type,properties_activities_0_typeProperties_activities_2_outputs_0_referenceName,properties_activities_0_typeProperties_activities_2_outputs_0_type,properties_activities_0_typeProperties_activities_3_name,properties_activities_0_typeProperties_activities_3_type,properties_activities_0_typeProperties_activities_4_name,properties_activities_0_typeProperties_activities_4_type,properties_activities_0_typeProperties_activities_5_name,properties_activities_0_typeProperties_activities_5_type,properties_activities_0_typeProperties_activities_6_name,properties_activities_0_typeProperties_activities_6_type,properties_activities_2_typeProperties_items_value,properties_activities_2_typeProperties_items_type,properties_activities_2_typeProperties_activities_0_name,properties_activities_2_typeProperties_activities_0_type,properties_activities_2_typeProperties_activities_0_typeProperties_source_type,properties_activities_2_typeProperties_activities_0_typeProperties_sink_type,properties_activities_2_typeProperties_activities_0_inputs_0_referenceName,properties_activities_2_typeProperties_activities_0_inputs_0_type,properties_activities_2_typeProperties_activities_0_outputs_0_referenceName,properties_activities_2_typeProperties_activities_0_outputs_0_type,properties_activities_4_typeProperties_items_value,properties_activities_4_typeProperties_items_type,properties_activities_4_typeProperties_isSequential,properties_activities_4_typeProperties_activities_0_name,properties_activities_4_typeProperties_activities_0_type,properties_activities_4_typeProperties_activities_1_name,properties_activities_4_typeProperties_activities_1_type,properties_activities_4_typeProperties_activities_2_name,properties_activities_4_typeProperties_activities_2_type,properties_activities_4_typeProperties_activities_3_name,properties_activities_4_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_8_name,properties_activities_1_typeProperties_activities_8_type,properties_activities_1_typeProperties_activities_6_typeProperties_source_type,properties_activities_2_typeProperties_isSequential,properties_activities_1_typeProperties_activities_9_name,properties_activities_1_typeProperties_activities_9_type,properties_activities_1_typeProperties_activities_10_name,properties_activities_1_typeProperties_activities_10_type,properties_activities_1_typeProperties_activities_11_name,properties_activities_1_typeProperties_activities_11_type,properties_activities_1_typeProperties_activities_12_name,properties_activities_1_typeProperties_activities_12_type,properties_activities_1_typeProperties_activities_13_name,properties_activities_1_typeProperties_activities_13_type,properties_activities_1_typeProperties_activities_14_name,properties_activities_1_typeProperties_activities_14_type,properties_activities_1_typeProperties_activities_15_name,properties_activities_1_typeProperties_activities_15_type,properties_activities_0_typeProperties_activities_1_typeProperties_source_type,properties_activities_0_typeProperties_activities_1_typeProperties_sink_type,properties_activities_0_typeProperties_activities_1_inputs_0_referenceName,properties_activities_0_typeProperties_activities_1_inputs_0_type,properties_activities_0_typeProperties_activities_1_outputs_0_referenceName,properties_activities_0_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_activities_7_name,properties_activities_0_typeProperties_activities_7_type,properties_activities_0_typeProperties_activities_8_name,properties_activities_0_typeProperties_activities_8_type,properties_activities_0_typeProperties_activities_9_name,properties_activities_0_typeProperties_activities_9_type,properties_activities_0_typeProperties_activities_10_name,properties_activities_0_typeProperties_activities_10_type,properties_activities_0_typeProperties_activities_11_name,properties_activities_0_typeProperties_activities_11_type,properties_activities_0_typeProperties_activities_12_name,properties_activities_0_typeProperties_activities_12_type,properties_activities_2_typeProperties_activities_1_name,properties_activities_2_typeProperties_activities_1_type,properties_activities_2_typeProperties_activities_1_typeProperties_source_type,properties_activities_2_typeProperties_activities_2_name,properties_activities_2_typeProperties_activities_2_type,properties_activities_2_typeProperties_activities_3_name,properties_activities_2_typeProperties_activities_3_type,properties_activities_3_typeProperties_activities_2_name,properties_activities_3_typeProperties_activities_2_type,properties_activities_3_typeProperties_activities_3_name,properties_activities_3_typeProperties_activities_3_type
2 | /subscriptions/9f0755ices-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-ESPPIFLEXTODatalakeLandingZone-N,PL-ESPPIFLEXTODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all IFLEX environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__ESPP__Generic,DatasetReference,DS__FEM__Equinix_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a33a-0000-0b00-0000-65fde01e0000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3 | /subscriptions/9f07555c-2taplatformfm-services-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-RepomSQLPROD3TODatalakeLandingZone-N,PL-RepomSQLPROD3TODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all SQLPROD3 environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__Repom__Generic,DatasetReference,DS__FEM__UolDiveo_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a23a-0000-0b00-0000-65fde01e0000,FilterType,IfCondition,Update EndDate Success Error CopyData,DatabricksNotebook,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4 |
--------------------------------------------------------------------------------