├── .github ├── CODEOWNERS ├── release-drafter.yml ├── dependabot.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── release.yml │ └── ci.yml ├── integration_tests ├── packages.yml ├── requirements.txt ├── docs │ └── integration_tests_diagram.png ├── for_CI │ ├── change_of_database.sh │ ├── change_dbt_project_airflow_source.sh │ ├── change_dbt_project_adf_source.sh │ ├── change_dbt_project_from_databricks_to_airflow.sh │ └── change_dbt_project_databricks_source.sh ├── macros │ ├── create_schema.sql │ ├── drop_schema.sql │ ├── seed__task_fail.sql │ ├── adf_activity_runs.sql │ ├── seed__dag.sql │ ├── seed__dag_run.sql │ ├── seed__task_instance.sql │ ├── adf_pipeline_runs.sql │ ├── adf_triggers.sql │ └── jobs.sql ├── profiles.yml ├── dbt_project.yml ├── README.md └── seeds │ ├── airflow │ ├── task_instance.csv │ ├── task_fail.csv │ ├── dag.csv │ └── dag_run.csv │ └── adf │ ├── adf_activity_runs.csv │ └── adf_pipelines.csv ├── .gitignore ├── packages.yml ├── models ├── staging │ ├── databricks_workflow_sources │ │ ├── stg_task_instance_databricks_workflow.sql │ │ ├── stg_task_fail_databricks_workflow.sql │ │ ├── stg_dag_run_databricks_workflow.sql │ │ ├── stg_dag_databricks_workflow.sql │ │ └── source.yml │ ├── dbt_utils_day.sql │ ├── airflow_sources │ │ ├── stg_dag_run_airflow.sql │ │ ├── stg_task_fail_airflow.sql │ │ ├── stg_dag_airflow.sql │ │ ├── stg_task_instance_airflow.sql │ │ └── source.yml │ └── adf_sources │ │ ├── stg_dag_run_adf.sql │ │ ├── stg_task_fail_adf.sql │ │ ├── stg_task_instance_adf.sql │ │ ├── stg_dag_adf.sql │ │ └── source.yml ├── marts │ ├── dim_dag_monitoring_dag.sql │ ├── bridge_dag_monitoring.yml │ ├── dim_dag_monitoring_task.yml │ ├── dim_dag_monitoring_dag.yml │ ├── fact_dag_monitoring_dag_run.yml │ ├── fact_dag_monitoring_task_fail.yml │ ├── bridge_dag_monitoring.sql │ ├── fact_dag_monitoring_task_instance.yml │ ├── fact_dag_monitoring_dag_run.sql │ ├── fact_dag_monitoring_task_fail.sql │ ├── dim_dag_monitoring_task.sql │ └── fact_dag_monitoring_task_instance.sql ├── docs │ └── universal.md └── calendar │ └── dim_dag_monitoring_dates.sql ├── package-lock.yml ├── requirements.txt ├── macros ├── cast_as_date.sql ├── day_of_year.sql ├── cast_as_timestamp.sql ├── adf_pipelines_name.sql ├── flatten_data.sql ├── date_diff.sql ├── replace_dot_for_colon_notation.sql ├── date_add.sql ├── date_format.sql ├── cast_as_string.sql ├── day_of_week.sql └── model_task_instance_databricks_workflow.sql ├── example.env ├── setup.sh ├── catalog-dag-monitoring.yaml ├── profiles.yml ├── dbt_project.yml ├── README.md └── LICENSE /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @techindicium/central-de-dados -------------------------------------------------------------------------------- /integration_tests/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - local: ../ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | dbt_packages 3 | target 4 | *logs 5 | .env 6 | env 7 | .user.yml -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.1.1 -------------------------------------------------------------------------------- /integration_tests/requirements.txt: -------------------------------------------------------------------------------- 1 | dbt-snowflake==1.8.3 2 | dbt-databricks==v1.8.5 3 | databricks-sdk==0.17.0 4 | dbt-core==1.8.5 -------------------------------------------------------------------------------- /models/staging/databricks_workflow_sources/stg_task_instance_databricks_workflow.sql: -------------------------------------------------------------------------------- 1 | {{ model_task_instance_databricks_workflow() }} -------------------------------------------------------------------------------- /package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: 1.1.1 4 | sha1_hash: b0e601a7edf623823e7381fcbae7d8a2d0999fe4 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dbt-core==1.8.5 2 | dbt-databricks==v1.8.5 3 | databricks-sdk==0.17.0 4 | dbt-snowflake==1.8.3 5 | google-cloud==0.34.0 6 | dbt-bigquery==1.8.2 -------------------------------------------------------------------------------- /integration_tests/docs/integration_tests_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techindicium/dbt-dag-monitoring/HEAD/integration_tests/docs/integration_tests_diagram.png -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | template: | 2 | ## What's Changed 3 | $CHANGES 4 | 5 | **Full Changelog**: https://github.com/$OWNER/$REPOSITORY/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION 6 | -------------------------------------------------------------------------------- /integration_tests/for_CI/change_of_database.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source=$1 4 | 5 | sed -i "s/dag_monitoring_${source}_database: $2/dag_monitoring_${source}_database: $3/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 6 | -------------------------------------------------------------------------------- /integration_tests/macros/create_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro create_schema(schema_name) %} 2 | {% set sql %} 3 | CREATE SCHEMA IF NOT EXISTS {{ schema_name }} 4 | {% endset %} 5 | {{ run_query(sql) }} 6 | {% endmacro %} -------------------------------------------------------------------------------- /macros/cast_as_date.sql: -------------------------------------------------------------------------------- 1 | {% macro cast_as_date(column) -%} 2 | {{ return(adapter.dispatch('cast_as_date')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__cast_as_date(column) -%} 7 | cast({{ column }} as date) 8 | {%- endmacro %} 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | rebase-strategy: "disabled" 8 | ignore: 9 | - dependency-name: "*" 10 | update-types: 11 | - version-update:semver-patch 12 | -------------------------------------------------------------------------------- /example.env: -------------------------------------------------------------------------------- 1 | # this is an example of how to fill the information in each variable 2 | 3 | 4 | # default configurations, you don't have to change it unless you have a specific need 5 | export DBT_DEFAULT_TARGET="dev"; 6 | export DEV_CATALOG_NAME=""; 7 | export DEV_SCHEMA_NAME=""; 8 | export DEV_HOST=""; 9 | export DEV_HTTP_PATH=""; 10 | export DEV_TOKEN=""; -------------------------------------------------------------------------------- /macros/day_of_year.sql: -------------------------------------------------------------------------------- 1 | {% macro day_of_year(column) -%} 2 | {{ return(adapter.dispatch('day_of_year')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__day_of_year(column) -%} 7 | extract(dayofyear from {{ column }}) 8 | {%- endmacro %} 9 | 10 | 11 | {% macro databricks__day_of_year(column) -%} 12 | extract(doy from {{ column }}) 13 | {%- endmacro %} -------------------------------------------------------------------------------- /integration_tests/macros/drop_schema.sql: -------------------------------------------------------------------------------- 1 | {% macro drop_schema(schema_name) %} 2 | 3 | {% set drop_schema_query %} 4 | DROP SCHEMA IF EXISTS {{ schema_name }} CASCADE; 5 | {% endset %} 6 | 7 | {% if execute %} 8 | {{ run_query(drop_schema_query) }} 9 | {% endif %} 10 | 11 | {% do log("Dropped schema " ~ schema_name, info = true) %} 12 | 13 | {% endmacro %} -------------------------------------------------------------------------------- /macros/cast_as_timestamp.sql: -------------------------------------------------------------------------------- 1 | {% macro cast_as_timestamp(column, n=1000) -%} 2 | {{ return(adapter.dispatch('cast_as_timestamp')(column, n=1000)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__cast_as_timestamp(column, n=1000) -%} 7 | to_timestamp({{ column }} / {{ n }} ) 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__cast_as_timestamp(column, n=1000) -%} 11 | TIMESTAMP_SECONDS(cast({{ column }} / {{ n }} as int)) 12 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/adf_pipelines_name.sql: -------------------------------------------------------------------------------- 1 | {% macro adf_pipelines_name(column) -%} 2 | {{ return(adapter.dispatch('adf_pipelines_name')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__adf_pipelines_name(column) -%} 7 | {{ column }} 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__adf_pipelines_name(column) -%} 11 | {% if column == 'pipelines.pipelineReference.referenceName' -%} 12 | {{ 'pipelineReference.referenceName' }} 13 | {% endif %} 14 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/flatten_data.sql: -------------------------------------------------------------------------------- 1 | {% macro flatten_data(column) -%} 2 | {{ return(adapter.dispatch('flatten_data')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro databricks__flatten_data(column) -%} 7 | lateral view explode ({{ column }}) 8 | {%- endmacro %} 9 | 10 | {% macro snowflake__flatten_data(column) -%} 11 | , lateral flatten(input => {{ column }}) 12 | {%- endmacro %} 13 | 14 | {% macro bigquery__flatten_data(column) -%} 15 | , unnest({{ column }}) 16 | {%- endmacro %} 17 | -------------------------------------------------------------------------------- /macros/date_diff.sql: -------------------------------------------------------------------------------- 1 | {% macro date_diff(datepart, start_date, end_date) -%} 2 | {{ return(adapter.dispatch('date_diff')(datepart, start_date, end_date)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__date_diff(datepart, start_date, end_date) -%} 6 | datediff({{ datepart }}, {{ start_date }}, {{ end_date }}) 7 | {%- endmacro %} 8 | 9 | {% macro bigquery__date_diff(datepart, start_date, end_date) -%} 10 | date_diff({{ end_date }}, {{ start_date }}, {{ datepart }}) 11 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/replace_dot_for_colon_notation.sql: -------------------------------------------------------------------------------- 1 | {% macro replace_dot_for_colon(struct_column, column_item) -%} 2 | {{ return(adapter.dispatch('replace_dot_for_colon')(struct_column, column_item)) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__replace_dot_for_colon(struct_column, column_item) -%} 6 | {{ struct_column }}.{{ column_item }} 7 | {%- endmacro %} 8 | 9 | {% macro snowflake__replace_dot_for_colon(struct_column, column_item) -%} 10 | {{ struct_column }}:{{ column_item }} 11 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/date_add.sql: -------------------------------------------------------------------------------- 1 | {% macro date_add(datepart, interval, column, default='INTERVAL') -%} 2 | {{ return(adapter.dispatch('date_add')(datepart, interval, column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__date_add(datepart, interval, column, default='INTERVAL') -%} 7 | dateadd({{ datepart }}, {{ interval }}, {{ column }} ) 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__date_add(datepart, interval, column, default='INTERVAL') -%} 11 | date_add({{ column }}, {{ default }} {{ interval }} {{ datepart }} ) 12 | {%- endmacro %} -------------------------------------------------------------------------------- /models/staging/dbt_utils_day.sql: -------------------------------------------------------------------------------- 1 | {% set my_query %} 2 | select cast({{current_timestamp()}} as date) 3 | {% endset %} 4 | 5 | {% if execute %} 6 | {% set today = run_query(my_query).columns[0].values()[0] %} 7 | {% set tomorrow = dateadd('day', 1, "'" ~ today ~ "'") %} 8 | {% set start_date = var('dbt_dag_monitoring')['dag_monitoring_start_date'] %} 9 | {% else %} 10 | {% set tomorrow = ' ' %} 11 | {% set start_date = ' ' %} 12 | {% endif %} 13 | 14 | {{ dbt_utils.date_spine( 15 | datepart="day", 16 | start_date=start_date, 17 | end_date=tomorrow 18 | ) 19 | }} -------------------------------------------------------------------------------- /macros/date_format.sql: -------------------------------------------------------------------------------- 1 | {% macro month_day(column, format='') -%} 2 | {{ return(adapter.dispatch('month_day')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro databricks__month_day(column, format='dd-MM') -%} 7 | date_format({{ column }}, '{{ format }}') 8 | {%- endmacro %} 9 | 10 | {% macro snowflake__month_day(column, format='dd-MM') -%} 11 | to_char(cast({{ column }} as date), '{{ format }}') 12 | {%- endmacro %} 13 | 14 | 15 | {% macro bigquery__month_day(column, format='%d-%m') -%} 16 | cast(parse_date('{{ format }}', cast({{ column }} as string)) as string) 17 | {%- endmacro %} -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #if something goes wrong, stop the script 4 | set -e 5 | 6 | # exports all variables in .env file. Any variable defined in the script will be available in the environment. 7 | set -a 8 | 9 | # Install virtualenv if not installed 10 | pip install virtualenv 11 | 12 | # Create a new virtual environment 13 | virtualenv env 14 | 15 | # It gives permission to activate the virtual environment 16 | chmod +x env/bin/activate 17 | 18 | # Activate the virtual environment 19 | source env/bin/activate # On Windows, use `env\Scripts\activate` 20 | 21 | # # Load the environment variables 22 | source .env 23 | 24 | # Install requirements 25 | pip install -r ./requirements.txt 26 | 27 | dbt deps -------------------------------------------------------------------------------- /models/staging/airflow_sources/stg_dag_run_airflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('id') }} as dag_run_id 5 | , {{ cast_as_string('dag_id') }} as dag_id 6 | , {{ cast_as_date('start_date') }} as run_date 7 | , state as dag_state 8 | , external_trigger 9 | , start_date as execution_start_date 10 | , end_date as execution_end_date 11 | , {{ date_diff('second', 'start_date', 'end_date') }} as duration 12 | , run_type 13 | , {{ cast_as_string('run_id') }} as run_id 14 | from {{ source('raw_airflow_monitoring', 'dag_run') }} 15 | ) 16 | select * 17 | from renamed 18 | -------------------------------------------------------------------------------- /models/staging/databricks_workflow_sources/stg_task_fail_databricks_workflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select 4 | {{ cast_as_string('task_id') }} as task_fail_id 5 | , {{ cast_as_string('task_id') }} as task_id 6 | , {{ cast_as_string('dag_id') }} as dag_id 7 | , run_id 8 | , execution_date 9 | , execution_start_date 10 | , execution_end_date 11 | , duration 12 | , 'not_implemented_for_databricks_workflow' as map_index 13 | from {{ ref('stg_task_instance_databricks_workflow') }} 14 | where state_task_instance in ('MAXIMUM_CONCURRENT_RUNS_REACHED', 'CANCELED', 'FAILED', 'UPSTREAM_FAILED') 15 | ) 16 | select * 17 | from renamed 18 | -------------------------------------------------------------------------------- /models/staging/adf_sources/stg_dag_run_adf.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('id') }} as dag_run_id 5 | , {{ cast_as_string('pipelineName') }} as dag_id 6 | , {{ cast_as_date('runStart') }} as run_date 7 | , status as dag_state 8 | , {{ cast_as_string('invokedBy') }} as external_trigger 9 | , runStart as execution_start_date 10 | , runEnd as execution_end_date 11 | , durationInMs / 1000 as duration 12 | , "not_implemented_by_adf" as run_type 13 | , {{ cast_as_string('runId') }} as run_id 14 | from {{ source('raw_adf_monitoring', 'adf_pipeline_runs') }} 15 | ) 16 | select * 17 | from renamed 18 | -------------------------------------------------------------------------------- /models/staging/airflow_sources/stg_task_fail_airflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('id') }} as task_fail_id 5 | , {{ cast_as_string('task_id') }} as task_id 6 | , {{ cast_as_string('dag_id') }} as dag_id 7 | , {{ cast_as_string('run_id') }} as run_id 8 | , {{ cast_as_date('start_date') }} as execution_date 9 | , start_date as execution_start_date 10 | , end_date as execution_end_date 11 | , duration 12 | , case 13 | when map_index = -1 then 'no mapping' 14 | end as map_index 15 | from {{ source('raw_airflow_monitoring', 'task_fail') }} 16 | ) 17 | select * 18 | from renamed 19 | -------------------------------------------------------------------------------- /models/marts/dim_dag_monitoring_dag.sql: -------------------------------------------------------------------------------- 1 | with 2 | stg_dag as ( 3 | {% for src in var('enabled_sources') -%} 4 | select 5 | dag_id 6 | , dag_name 7 | , dag_description 8 | , dag_frequency 9 | , timetable_description 10 | , is_paused 11 | , is_active 12 | , fileloc 13 | , owners 14 | , '{{ src }}' as source_system 15 | from 16 | {{ ref('stg_dag_' + src) }} 17 | {% if not loop.last -%} union {% endif -%} 18 | {% endfor -%} 19 | ) 20 | , stg_dag_with_sk as ( 21 | select 22 | {{ dbt_utils.generate_surrogate_key(['dag_id']) }} as dag_sk 23 | , * 24 | from stg_dag 25 | ) 26 | select * 27 | from stg_dag_with_sk -------------------------------------------------------------------------------- /models/staging/adf_sources/stg_task_fail_adf.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('activityRunId') }} as task_fail_id 5 | , {{ cast_as_string('activityRunId') }} as task_id 6 | , {{ cast_as_string('pipelineName') }} as dag_id 7 | , pipelineRunId as run_id 8 | , {{ cast_as_date('activityRunStart') }} as execution_date 9 | , activityRunStart as execution_start_date 10 | , activityRunEnd as execution_end_date 11 | , durationInMs / 1000 as duration 12 | , "not_implemented_for_adf" as map_index 13 | from {{ source('raw_adf_monitoring', 'adf_activity_runs') }} 14 | where status in ('TimedOut', 'Cancelled', 'Failed') 15 | ) 16 | select * 17 | from renamed 18 | 19 | -------------------------------------------------------------------------------- /models/staging/databricks_workflow_sources/stg_dag_run_databricks_workflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select 4 | {{ cast_as_string('run_id') }} as dag_run_id 5 | , {{ cast_as_string('job_id') }} as dag_id 6 | , {{cast_as_timestamp('start_time')}} as run_date 7 | , {{replace_dot_for_colon('state','result_state')}} as dag_state 8 | , "trigger" as external_trigger 9 | , {{cast_as_timestamp('start_time')}} as execution_start_date 10 | , {{cast_as_timestamp('end_time')}} as execution_end_date 11 | , execution_duration / 1000 as duration 12 | , run_type 13 | , {{ cast_as_string('run_id') }} as run_id 14 | from {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} 15 | ) 16 | select * 17 | from renamed 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Operational System (please complete the following information):** 27 | - OS: 28 | - Version: 29 | 30 | **Do you use WSL? Which version? (please complete the following information):** 31 | 32 | **Additional context** 33 | Add any other context about the problem here. -------------------------------------------------------------------------------- /macros/cast_as_string.sql: -------------------------------------------------------------------------------- 1 | {% macro cast_as_string(column) -%} 2 | {{ return(adapter.dispatch('cast_as_string')(column)) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro databricks__cast_as_string(column) -%} 7 | cast({{ column }} as string) 8 | {%- endmacro %} 9 | 10 | {% macro bigquery__cast_as_string(column) -%} 11 | {% if column == 'invokedBy' or column == 'properties.typeProperties.recurrence.schedule' -%} 12 | {{ column }} 13 | {% else -%} 14 | cast({{ column }} as string) 15 | {% endif -%} 16 | {%- endmacro %} 17 | 18 | {% macro snowflake__cast_as_string(column) -%} 19 | {% if column == 'null' -%} 20 | {{ column }} 21 | {% else -%} 22 | cast({{ column }} as string) 23 | {% endif -%} 24 | {%- endmacro %} 25 | 26 | 27 | {% macro redshift__cast_as_string(column) -%} 28 | cast({{ column }} as varchar) 29 | {%- endmacro %} 30 | -------------------------------------------------------------------------------- /integration_tests/for_CI/change_dbt_project_airflow_source.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Enable adf sources in dbt_project.yml 4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 5 | 6 | # Disable databricks sources in dbt_project.yml 7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)false/\1true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 8 | 9 | # Enable adf models in dbt_project.yml 10 | sed -i '/adf_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 11 | 12 | # Disable databricks_workflow models in dbt_project.yml 13 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 14 | 15 | # Update the enabled_sources in dbt_project.yml 16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Propose a feature request, new capability or improvement. 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | 22 | ## Constraints and Assumptions 23 | Call out any constraint and/or assumption relevant for the development and use of this feature. 24 | 25 | ## Tests 26 | Describe here any new test requirement for this feature. 27 | 28 | ## References 29 | -------------------------------------------------------------------------------- /integration_tests/for_CI/change_dbt_project_adf_source.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Enable adf sources in dbt_project.yml 4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 5 | 6 | # Disable databricks sources in dbt_project.yml 7 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 8 | 9 | # Enable adf models in dbt_project.yml 10 | sed -i '/adf_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 11 | 12 | # Disable databricks_workflow models in dbt_project.yml 13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 14 | 15 | # Update the enabled_sources in dbt_project.yml 16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['adf'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 17 | -------------------------------------------------------------------------------- /models/marts/bridge_dag_monitoring.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: bridge_dag_monitoring 5 | description: "Bridge table used to make relationships between marts." 6 | columns: 7 | - name: 'generated_date' 8 | description: "Date of execution of the DAG." 9 | 10 | - name: dag_fk 11 | description: "Foreign key for each dag." 12 | 13 | - name: task_fk 14 | description: "Foreign key for each task." 15 | 16 | - name: dag_run_fk 17 | description: "Foreign key for fact_dag_monitoring_dag_run. Composed of: dag_run_id, execution_start_date and execution_end_date" 18 | 19 | - name: task_fail_fk 20 | description: "Foreign key for fact_dag_monitoring_task_fail. Composed of: task_fail_id, execution_end_date and execution_start_date" 21 | 22 | - name: task_instance_fk 23 | description: "Foreign key for fact_dag_monitoring_task_instance. Composed of: task_instance_id, execution_end_date, and execution_start_date" -------------------------------------------------------------------------------- /integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Disable databricks sources in dbt_project.yml 4 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 5 | 6 | # Enable airflow sources in dbt_project.yml 7 | sed -i '/raw_airflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 8 | 9 | # Enable airflow models in dbt_project.yml 10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 11 | 12 | # Disable databricks_workflow models in dbt_project.yml 13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 14 | 15 | # Update the enabled_sources in dbt_project.yml 16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 17 | -------------------------------------------------------------------------------- /integration_tests/for_CI/change_dbt_project_databricks_source.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Enable databricks sources in dbt_project.yml 4 | sed -i '/raw_databricks_workflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 5 | 6 | # Disable airflow sources in dbt_project.yml 7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 8 | 9 | # Disable airflow_workflow models in dbt_project.yml 10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 11 | 12 | # Enable databricks_workflow models in dbt_project.yml 13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" 14 | 15 | # Update the enabled_sources in dbt_project.yml 16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['databricks_workflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml" -------------------------------------------------------------------------------- /catalog-dag-monitoring.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: backstage.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: dbt_dag_monitoring 5 | description: | 6 | DAG Monitoring is a product designed to monitor orquestration metadata, formed by a tap, a dbt package and a Power BI dashboard. 7 | tags: 8 | - product 9 | - airflow 10 | - monitoring 11 | - powerbi 12 | links: 13 | - title: Wiki 14 | url: https://wiki.indicium.tech/en/central_dados/squad_produtos_horizontais/squad_produtos_horizontais/dag-monitoring 15 | - title: Repository 16 | url: https://github.com/techindicium/dbt-dag-monitoring 17 | annotations: 18 | indicium.tech/product-url: https://app.powerbi.com/groups/1c5de32c-67f7-493c-ad6d-1d1c574b98bb/reports/132e0228-08ba-4f24-b6c4-a4974414e4b8/ReportSection?experience=power-bi 19 | spec: 20 | title: DAG Monitoring 21 | team: Produtos Horizontais 22 | class: Técnico 23 | vertical: Monitoring 24 | businessUnit: TI 25 | interface: Dashboard 26 | language: Portuguese 27 | owner: Indicium 28 | type: product 29 | lifecycle: experimental 30 | system: public-websites -------------------------------------------------------------------------------- /models/staging/airflow_sources/stg_dag_airflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('dag_id') }} as dag_id 5 | , {{ cast_as_string('dag_id') }} as dag_name 6 | , description as dag_description 7 | , case 8 | when timetable_description like '% hour, between %' then 'hourly' 9 | when timetable_description like 'Between %' then 'hourly' 10 | when timetable_description like '% on day % month' then 'monthly' 11 | when timetable_description like '% in %' then 'monthly' 12 | when timetable_description like '%:% on %' then 'weekly' 13 | when timetable_description like '%:%' then 'daily' 14 | else timetable_description 15 | end as dag_frequency 16 | , timetable_description 17 | , is_paused 18 | , is_active 19 | , fileloc 20 | , owners 21 | from {{ source('raw_airflow_monitoring', 'dag') }} 22 | ) 23 | select * 24 | from renamed 25 | -------------------------------------------------------------------------------- /models/marts/dim_dag_monitoring_task.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dim_dag_monitoring_task 5 | description: "dimensions table for Airflow tasks" 6 | 7 | columns: 8 | - name: task_sk 9 | description: "Surrogate key. Composed of: task_id and dag_id" 10 | tests: 11 | - unique 12 | - not_null 13 | 14 | - name: task_id 15 | description: "task id." 16 | tests: 17 | - not_null 18 | 19 | - name: dag_id 20 | description: "DAG id." 21 | tests: 22 | - not_null 23 | 24 | - name: map_index 25 | description: "Index for mapping." 26 | 27 | - name: hostname 28 | description: "Task hostname." 29 | 30 | - name: operator 31 | description: " Task operator model." 32 | 33 | - name: task_pool 34 | description: " Airflow's pool in which the task should be executed." 35 | 36 | - name: source_system 37 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow." 38 | -------------------------------------------------------------------------------- /integration_tests/macros/seed__task_fail.sql: -------------------------------------------------------------------------------- 1 | {% macro seed__task_fail() %} 2 | {% set create_table %} 3 | CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_fail ( 4 | id INT64, 5 | task_id STRING, 6 | dag_id STRING, 7 | start_date TIMESTAMP, 8 | end_date TIMESTAMP, 9 | duration INT64, 10 | map_index INT64, 11 | run_id STRING 12 | ); 13 | {% endset %} 14 | 15 | {% set insert_table %} 16 | 17 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_fail ( 18 | id, task_id, dag_id, start_date, end_date, duration, map_index, run_id 19 | ) VALUES 20 | (1, 'dbt_freshness', 'dbt_bitrix', TIMESTAMP('2023-12-27T02:30:16.714366Z'), TIMESTAMP('2023-12-21T09:01:57.631415Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00'), 21 | (2, 'dbt_source_test', 'dbt_bitrix', TIMESTAMP('2023-12-21T09:01:57.073097Z'), TIMESTAMP('2023-12-23T08:30:25.791135Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00'); 22 | 23 | {% endset %} 24 | 25 | {% do run_query(create_table) %} 26 | {% do log("finished creating table task_fail", info=true) %} 27 | 28 | {% do run_query(insert_table) %} 29 | {% do log("finished insert table task_fail", info=true) %} 30 | 31 | {% endmacro %} -------------------------------------------------------------------------------- /models/staging/databricks_workflow_sources/stg_dag_databricks_workflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select 4 | {{ cast_as_string('job_id') }} as dag_id 5 | , {{replace_dot_for_colon('settings','name')}} as dag_name 6 | , 'not_implemented_for_databricks_workflow' as dag_description 7 | , 'not_implemented_for_databricks_workflow' as dag_frequency 8 | , {{replace_dot_for_colon('settings','schedule.quartz_cron_expression')}} as timetable_description 9 | , case 10 | when {{replace_dot_for_colon('settings','schedule.pause_status')}} = 'PAUSED' then true 11 | else false 12 | end as is_paused 13 | , case 14 | when {{replace_dot_for_colon('settings','schedule.pause_status')}} != 'PAUSED' then true 15 | else false 16 | end as is_active 17 | , 'not_implemented_for_databricks_workflow' as fileloc 18 | , creator_user_name as owners 19 | , null as ind_extraction_date 20 | , {{replace_dot_for_colon('settings','schedule.pause_status')}} as pause_status 21 | from {{ source('raw_databricks_workflow_monitoring', 'jobs') }} 22 | ) 23 | select * 24 | from renamed 25 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | 5 |
6 | PR Checklist 7 | 8 | ### PR Structure 9 | 10 | - [ ] This PR has reasonably narrow scope (if not, break it down into smaller PRs). 11 | - [ ] This PR avoids mixing refactoring changes with feature changes (split into two PRs 12 | otherwise). 13 | 14 | ### Thoroughness 15 | 16 | - [ ] This PR adds tests for the most critical parts of the new functionality or fixes. 17 | - [ ] I've updated the docs and README with the added features, breaking changes, new instructions on how to use the repository. 18 | 19 | ### Release planning 20 | 21 | - [ ] I've decided if this PR requires a new major/minor/patch version accordingly to 22 | [semver](https://semver.org/), and I've changed the name of the BRANCH to release/* , feature/* or patch/* . 23 |
24 | 25 | ### What 26 | 27 | [TODO: Short statement about what is changing.] 28 | 29 | ### Why 30 | 31 | [TODO: Why this change is being made. Include any context required to understand the why.] 32 | 33 | ### Known limitations 34 | 35 | [TODO or N/A] -------------------------------------------------------------------------------- /integration_tests/profiles.yml: -------------------------------------------------------------------------------- 1 | dbt_dag_monitoring_integration_tests: 2 | target: '{{ env_var(''DBT_DEFAULT_TARGET'', ''databricks'')}}' 3 | outputs: 4 | databricks: 5 | ansi_mode: false 6 | catalog: '{{ env_var(''DEV_CATALOG_NAME'')}}' 7 | host: '{{ env_var(''DEV_HOST'') }}' 8 | http_path: '{{ env_var(''DEV_HTTP_PATH'') }}' 9 | schema: '{{ env_var(''DEV_SCHEMA_NAME'')}}' 10 | threads: 16 11 | token: '{{ env_var(''DEV_TOKEN'') }}' 12 | type: databricks 13 | 14 | bigquery: 15 | dataset: "{{ env_var('BIGQUERY_DATASET') }}" 16 | project: "{{ env_var('BIGQUERY_PROJECT') }}" 17 | job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}" 18 | threads: "{{ env_var('DBT_THREADS') | int }}" 19 | job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}" 20 | method: oauth 21 | location: us 22 | priority: interactive 23 | type: bigquery 24 | 25 | snowflake: 26 | type: "snowflake" 27 | account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" 28 | user: "{{ env_var('SNOWFLAKE_USER') }}" 29 | password: "{{ env_var('SNOWFLAKE_PASSWORD') }}" 30 | role: "{{ env_var('SNOWFLAKE_ROLE') }}" 31 | database: "{{ env_var('SNOWFLAKE_DATABASE') }}" 32 | warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" 33 | schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" 34 | threads: 10 35 | -------------------------------------------------------------------------------- /models/marts/dim_dag_monitoring_dag.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: dim_dag_monitoring_dag 5 | description: "Dimension table for Airflow DAGs" 6 | columns: 7 | - name: 'dag_sk' 8 | description: "Surrogate key. Composed of: dag_id" 9 | tests: 10 | - unique 11 | - not_null 12 | 13 | - name: dag_id 14 | description: "Source table ID." 15 | tests: 16 | - unique 17 | - not_null 18 | 19 | - name: dag_name 20 | description: "Descriptive name for DAG." 21 | 22 | - name: dag_description 23 | description: "Description about the DAG." 24 | 25 | - name: dag_frequency 26 | description: "DAG frequency for execution." 27 | 28 | - name: timetable_description 29 | description: "DAGs execution scheduling." 30 | 31 | - name: is_paused 32 | description: "Is the DAG paused." 33 | 34 | - name: is_active 35 | description: "Is the DAG active." 36 | 37 | - name: fileloc 38 | description: "path to file that needs to be imported to load this DAG. `source_code` in source: 11" 39 | 40 | - name: owners 41 | description: "DAG owner." 42 | 43 | - name: source_system 44 | description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow" 45 | -------------------------------------------------------------------------------- /profiles.yml: -------------------------------------------------------------------------------- 1 | dbt_dag_monitoring: 2 | target: "{{ env_var('DBT_DEFAULT_TARGET', 'databricks')}}" 3 | outputs: 4 | databricks: 5 | type: databricks 6 | catalog: "{{ env_var('DEV_CATALOG_NAME')}}" 7 | schema: "{{ env_var('DEV_SCHEMA_NAME')}}" 8 | host: "{{ env_var('DEV_HOST') }}" 9 | http_path: "{{ env_var('DEV_HTTP_PATH') }}" 10 | token: "{{ env_var('DEV_TOKEN') }}" 11 | threads: 16 12 | ansi_mode: false 13 | 14 | bigquery: 15 | dataset: "{{ env_var('BIGQUERY_DATASET') }}" 16 | project: "{{ env_var('BIGQUERY_PROJECT') }}" 17 | job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}" 18 | threads: "{{ env_var('DBT_THREADS') | int }}" 19 | job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}" 20 | method: oauth 21 | location: us 22 | priority: interactive 23 | type: bigquery 24 | 25 | snowflake: 26 | type: "snowflake" 27 | account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" 28 | user: "{{ env_var('SNOWFLAKE_USER') }}" 29 | password: "{{ env_var('SNOWFLAKE_PASSWORD') }}" 30 | role: "{{ env_var('SNOWFLAKE_ROLE') }}" 31 | database: "{{ env_var('SNOWFLAKE_DATABASE') }}" 32 | warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" 33 | schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" 34 | threads: 10 -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_dag_run.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: fact_dag_monitoring_dag_run 5 | description: "Events table of Airflow DAG executions" 6 | columns: 7 | - name: 'dag_run_sk' 8 | description: "Surrogate key. Composed of: dag_run_id, execution_start_date and execution_end_date" 9 | tests: 10 | - unique 11 | - not_null 12 | 13 | - name: dag_fk 14 | description: "DAGs ids." 15 | tests: 16 | - relationships: 17 | to: ref('dim_dag_monitoring_dag') 18 | field: dag_sk 19 | 20 | - name: generated_date 21 | description: "date of execution of the DAG." 22 | tests: 23 | - relationships: 24 | to: ref('dbt_utils_day') 25 | field: date_day 26 | 27 | - name: external_trigger 28 | description: "Points out if the DAG execution was triggered externally (True / False)." 29 | 30 | - name: execution_start_date 31 | description: "Data e hora em que iniciou a execução da DAG." 32 | 33 | - name: execution_end_date 34 | description: "Date and hour when the DAG execution ended." 35 | 36 | - name: run_type 37 | description: "Type of execution of the DAG." 38 | 39 | - name: duration 40 | description: "Execution time in seconds." 41 | 42 | - name: source_system 43 | description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow." 44 | -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_task_fail.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: fact_dag_monitoring_task_fail 5 | description: "Events table of Airflow tasks failures" 6 | columns: 7 | - name: task_fail_sk 8 | description: "Surrogate key. Composed of: task_fail_id, execution_end_date and execution_start_date" 9 | tests: 10 | - unique 11 | - not_null 12 | 13 | - name: task_fk 14 | description: "Unique identifier of the task execution." 15 | tests: 16 | - relationships: 17 | to: ref('dim_dag_monitoring_task') 18 | field: task_sk 19 | 20 | - name: dag_fk 21 | description: "Id of the DAG." 22 | tests: 23 | - relationships: 24 | to: ref('dim_dag_monitoring_dag') 25 | field: dag_sk 26 | 27 | - name: generated_date 28 | description: "Date of execution of the DAG." 29 | tests: 30 | - relationships: 31 | to: ref('dbt_utils_day') 32 | field: date_day 33 | 34 | - name: execution_start_date 35 | description: " Date and hour when the DAG execution started." 36 | 37 | - name: execution_end_date 38 | description: "Date and hour when the DAG execution ended." 39 | 40 | - name: duration 41 | description: "Duration of the execution in seconds." 42 | 43 | - name: source_system 44 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow." 45 | -------------------------------------------------------------------------------- /models/staging/airflow_sources/stg_task_instance_airflow.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('task_id') }} as task_id 5 | , {{ cast_as_string('dag_id') }} as dag_id 6 | , {{ cast_as_string('run_id') }} as run_id 7 | , {{ cast_as_date('start_date') }} as execution_date 8 | , start_date as execution_start_date 9 | , end_date as execution_end_date 10 | , duration 11 | , state as state_task_instance 12 | , try_number 13 | , hostname 14 | , pool as task_pool 15 | , priority_weight 16 | , operator 17 | , case 18 | when map_index = -1 then 'no mapping' 19 | end as map_index 20 | from {{ source('raw_airflow_monitoring', 'task_instance') }} 21 | ) 22 | , created_id as ( 23 | /*Table does not have a unique identifier, the id was created as the unique identification of records*/ 24 | select 25 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk 26 | , task_id 27 | , dag_id 28 | , run_id 29 | , execution_date 30 | , execution_start_date 31 | , execution_end_date 32 | , duration 33 | , state_task_instance 34 | , try_number 35 | , hostname 36 | , task_pool 37 | , priority_weight 38 | , operator 39 | , map_index 40 | from renamed 41 | ) 42 | select * 43 | from created_id 44 | -------------------------------------------------------------------------------- /models/staging/adf_sources/stg_task_instance_adf.sql: -------------------------------------------------------------------------------- 1 | with 2 | renamed as ( 3 | select distinct 4 | {{ cast_as_string('activityRunId') }} as task_id 5 | , {{ cast_as_string('pipelineName') }} as dag_id 6 | , {{ cast_as_string('pipelineRunId') }} as run_id 7 | , {{ cast_as_date('activityRunStart') }} as execution_date 8 | , activityRunStart as execution_start_date 9 | , activityRunEnd as execution_end_date 10 | , durationInMs / 1000 as duration 11 | , status as state_task_instance 12 | , retryAttempt as try_number 13 | , "not_implemented_for_adf" as hostname 14 | , "not_implemented_for_adf" as task_pool 15 | , "not_implemented_for_adf" as priority_weight 16 | , activityName as operator 17 | , "not_implemented_for_adf" as map_index 18 | from {{ source('raw_adf_monitoring', 'adf_activity_runs') }} 19 | ) 20 | , created_id as ( 21 | /*Im not sure this is necessary for adf*/ 22 | select 23 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk 24 | , task_id 25 | , dag_id 26 | , run_id 27 | , execution_date 28 | , execution_start_date 29 | , execution_end_date 30 | , duration 31 | , state_task_instance 32 | , try_number 33 | , hostname 34 | , task_pool 35 | , priority_weight 36 | , operator 37 | , map_index 38 | from renamed 39 | ) 40 | select * 41 | from created_id 42 | -------------------------------------------------------------------------------- /macros/day_of_week.sql: -------------------------------------------------------------------------------- 1 | {% macro day_of_week(column) %} 2 | {{ return(adapter.dispatch('day_of_week')(column)) }} 3 | {%- endmacro %} 4 | 5 | {% macro databricks__day_of_week(column) %} 6 | case 7 | when {{ column }} = 1 then 'Sunday' 8 | when {{ column }} = 2 then 'Monday' 9 | when {{ column }} = 3 then 'Tuesday' 10 | when {{ column }} = 4 then 'Wednesday' 11 | when {{ column }} = 5 then 'Thursday' 12 | when {{ column }} = 6 then 'Friday' 13 | when {{ column }} = 7 then 'Saturday' 14 | end as name_of_day 15 | {% endmacro %} 16 | 17 | {% macro snowflake__day_of_week(column) %} 18 | case 19 | when {{ column }} = 0 then 'Sunday' 20 | when {{ column }} = 1 then 'Monday' 21 | when {{ column }} = 2 then 'Tuesday' 22 | when {{ column }} = 3 then 'Wednesday' 23 | when {{ column }} = 4 then 'Thursday' 24 | when {{ column }} = 5 then 'Friday' 25 | when {{ column }} = 6 then 'Saturday' 26 | end as name_of_day 27 | {% endmacro %} 28 | 29 | {% macro bigquery__day_of_week(column) %} 30 | case 31 | when {{ column }} = 1 then 'Sunday' 32 | when {{ column }} = 2 then 'Monday' 33 | when {{ column }} = 3 then 'Tuesday' 34 | when {{ column }} = 4 then 'Wednesday' 35 | when {{ column }} = 5 then 'Thursday' 36 | when {{ column }} = 6 then 'Friday' 37 | when {{ column }} = 7 then 'Saturday' 38 | end as name_of_day 39 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_dag_monitoring_integration_tests' 2 | version: '0.2.0' 3 | 4 | require-dbt-version: [">=1.0.0", "<2.0.0"] 5 | 6 | config-version: 2 7 | 8 | target-path: "target" 9 | clean-targets: ["target", "dbt_modules", "dbt_packages"] 10 | macro-paths: ["macros"] 11 | log-path: "logs" 12 | seed-paths: ["seeds"] 13 | 14 | profile: dbt_dag_monitoring_integration_tests 15 | 16 | dispatch: 17 | - macro_namespace: 'dbt_utils' 18 | search_order: ['dbt_utils_integration_tests', 'dbt_utils'] 19 | 20 | sources: 21 | dbt_dag_monitoring: 22 | staging: 23 | adf_sources: 24 | raw_adf_monitoring: 25 | +enabled: false 26 | databricks_workflow_sources: 27 | raw_databricks_workflow_monitoring: 28 | +enabled: true 29 | airflow_sources: 30 | raw_airflow_monitoring: 31 | +enabled: false 32 | 33 | models: 34 | dbt_dag_monitoring: 35 | marts: 36 | +materialized: table 37 | staging: 38 | adf_sources: 39 | +enabled: false 40 | airflow_sources: 41 | +enabled: false 42 | databricks_workflow_sources: 43 | +enabled: true 44 | +materialized: view 45 | 46 | vars: 47 | dbt_dag_monitoring: 48 | enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow' 49 | dag_monitoring_start_date: cast('2023-01-01' as date) 50 | dag_monitoring_airflow_database: cdi_dev 51 | dag_monitoring_airflow_schema: ci_dbt_dag_monitoring 52 | dag_monitoring_databricks_database: cdi_dev 53 | dag_monitoring_databricks_schema: ci_dbt_dag_monitoring 54 | dag_monitoring_adf_database: cdi_dev 55 | dag_monitoring_adf_schema: ci_dbt_dag_monitoring -------------------------------------------------------------------------------- /models/marts/bridge_dag_monitoring.sql: -------------------------------------------------------------------------------- 1 | with 2 | fact_dag_run as ( 3 | select 4 | dag_run_sk 5 | , dag_fk 6 | , generated_date 7 | from {{ ref('fact_dag_monitoring_dag_run') }} 8 | ) 9 | , fact_task_fail as ( 10 | select 11 | task_fail_sk 12 | , dag_fk 13 | , task_fk 14 | , generated_date 15 | from {{ ref('fact_dag_monitoring_task_fail') }} 16 | ) 17 | , fact_task_instance as ( 18 | select 19 | task_instance_sk 20 | , dag_fk 21 | , task_fk 22 | , generated_date 23 | from {{ ref('fact_dag_monitoring_task_instance') }} 24 | ) 25 | , bridge as ( 26 | select 27 | coalesce(fact_task_instance.generated_date, fact_dag_run.generated_date, fact_task_fail.generated_date) as generated_date 28 | , coalesce(fact_dag_run.dag_fk, fact_task_fail.dag_fk, fact_task_instance.dag_fk) as dag_fk 29 | , coalesce(fact_task_instance.task_fk, fact_task_fail.task_fk) as task_fk 30 | , fact_dag_run.dag_run_sk as dag_run_fk 31 | , fact_task_fail.task_fail_sk as task_fail_fk 32 | , fact_task_instance.task_instance_sk as task_instance_fk 33 | from fact_task_instance 34 | full outer join fact_task_fail 35 | on fact_task_instance.task_fk = fact_task_fail.task_fk 36 | and fact_task_instance.generated_date = fact_task_fail.generated_date 37 | full outer join fact_dag_run 38 | on coalesce(fact_task_instance.dag_fk, fact_task_fail.dag_fk) = fact_dag_run.dag_fk 39 | and fact_task_instance.generated_date = fact_dag_run.generated_date 40 | ) 41 | select * 42 | from bridge -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'dbt_dag_monitoring' 2 | version: '0.2.0' 3 | 4 | require-dbt-version: [">=1.3.0", "<2.0.0"] 5 | 6 | config-version: 2 7 | 8 | target-path: "target" 9 | clean-targets: ["target", "dbt_modules", "dbt_packages"] 10 | macro-paths: ["macros"] 11 | log-path: "logs" 12 | seed-paths: ["seeds"] 13 | 14 | profile: dbt_dag_monitoring 15 | 16 | # When using it for testing purposes, you can take out all the comments below and set to true only the sources, models and vars you want to test 17 | 18 | # sources: 19 | # dbt_dag_monitoring: 20 | # staging: 21 | # adf_sources: 22 | # raw_adf_monitoring: 23 | # +enabled: false 24 | # databricks_workflow_sources: 25 | # raw_databricks_workflow_monitoring: 26 | # +enabled: true 27 | # airflow_sources: 28 | # raw_airflow_monitoring: 29 | # +enabled: false 30 | 31 | # models: 32 | # dbt_dag_monitoring: 33 | # marts: 34 | # +materialized: table 35 | # staging: 36 | # adf_sources: 37 | # +enabled: false 38 | # airflow_sources: 39 | # +enabled: false 40 | # databricks_workflow_sources: 41 | # +enabled: true 42 | # +materialized: view 43 | 44 | # Only one type of enabled sources can be turned on at a time 45 | 46 | # vars: 47 | # dbt_dag_monitoring: 48 | # enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow' 49 | # dag_monitoring_start_date: cast('2023-01-01' as date) 50 | # dag_monitoring_airflow_database: cdi_dev 51 | # dag_monitoring_airflow_schema: ci_dbt_dag_monitoring 52 | # dag_monitoring_databricks_database: cdi_dev 53 | # dag_monitoring_databricks_schema: ci_dbt_dag_monitoring 54 | # dag_monitoring_adf_database: cdi_dev 55 | # dag_monitoring_adf_schema: ci_dbt_dag_monitoring -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_task_instance.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: fact_dag_monitoring_task_instance 5 | description: "Events table of Airflow task instances" 6 | columns: 7 | - name: task_instance_sk 8 | description: "Surrogate key. Composed of: task_instance_id, execution_end_date, and execution_start_date" 9 | tests: 10 | - unique 11 | - not_null 12 | 13 | - name: task_fk 14 | description: "Id of the tasks" 15 | tests: 16 | - relationships: 17 | to: ref('dim_dag_monitoring_task') 18 | field: task_sk 19 | 20 | - name: dag_fk 21 | description: "Id of the DAG." 22 | tests: 23 | - relationships: 24 | to: ref('dim_dag_monitoring_dag') 25 | field: dag_sk 26 | 27 | - name: generated_date 28 | description: "Date of execution of the DAG." 29 | tests: 30 | - relationships: 31 | to: ref('dbt_utils_day') 32 | field: date_day 33 | 34 | - name: execution_start_date 35 | description: " Date and hour when the DAG execution started." 36 | 37 | - name: execution_end_date 38 | description: "Date and hour when the DAG execution ended." 39 | 40 | - name: duration 41 | description: "Duration of the execution in seconds." 42 | 43 | - name: state_task_instance 44 | description: "The state of the task execution." 45 | 46 | - name: try_number 47 | description: "The number of attempts to execute." 48 | 49 | - name: priority_weight 50 | description: "Task priority." 51 | 52 | - name: source_system 53 | description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow." 54 | -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- 1 | > [!WARNING] 2 | > ADF source and models were NOT tested in Snowflake connection! 3 | 4 | This README is about the integration tests step inside the ci.yml. 5 | 6 | Integration tests work in a similar way to how an user can reference the dbt-dag-monitoring in their project. That is how we start it, running 7 | dbt deps in the packages: local: ../ 8 | In that way, we are pulling dbt-dag-monitoring. 9 | 10 | As we are simulating the use of the project somewhere else, when checking dbt_project.yml inside the integration_tests folder, we can see that we configured the sources, the models, and vars of it. Those settings are crucial to run the project. 11 | 12 | When running the continuous integration in the Github actions, where the integration tests are actually analyzed, we are using the dbt_project.yml inside the integration_tests folder as reference. 13 | 14 | By looking at the profiles.yml folder, we can see that we use 3 connections: Databricks, BigQuery and Snowflake. Whatever modifications are done in the project, it must pass successfully in the three data warehouses to be accepted to merge. 15 | 16 | When testing new features in the project, the user can save time by having credentials at each DW to test the changes locally, before passing them to the pull request, due to the fact that each commit that is analyzed by the CI, takes 7 minutes minimum to run. 17 | 18 | In case you do not find the schema to observe on a DW, it is because of the schema creation and schema deletion just after the CI is done for that specific DW. 19 | To clarify how the integration test functions in the continuous integration, we can take a look at the diagram below: 20 | 21 | 22 | 23 | > [!NOTE] 24 | > Databricks works as a DW and as a source for the models. 25 | 26 | As you can see on the image above, the .sh files are used to give a transition between an origin and a destination source. 27 | 28 | 29 | As we have pattern on the sources of making the transitions between Databricks to ADF to Airflow, we needed a new shell file that is the “change_dbt_project_from_databricks_to_airflow.sh” to make the last transition Databricks to Airflow, as ADF was not tested on Snowflake. 30 | 31 | -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_dag_run.sql: -------------------------------------------------------------------------------- 1 | with 2 | dim_dag as ( 3 | select 4 | dag_id 5 | , dag_sk as dag_fk 6 | from {{ ref('dim_dag_monitoring_dag') }} 7 | ) 8 | , util_days as ( 9 | select cast(date_day as date) as date_day 10 | from {{ ref('dbt_utils_day') }} 11 | ) 12 | , stg_dag_run as ( 13 | {% for src in var('enabled_sources') -%} 14 | select 15 | dag_run_id 16 | , dag_id 17 | , run_id 18 | , run_date 19 | , execution_start_date 20 | , execution_end_date 21 | , duration 22 | , dag_state 23 | , external_trigger 24 | , run_type 25 | , '{{ src }}' as source_system 26 | from {{ ref('stg_dag_run_' + src) }} 27 | {% if not loop.last -%} union {% endif -%} 28 | {% endfor -%} 29 | ) 30 | , joined as ( 31 | select 32 | stg_dag_run.dag_run_id 33 | , dim_dag.dag_fk 34 | , dim_dag.dag_id 35 | , stg_dag_run.run_id 36 | , util_days.date_day 37 | , stg_dag_run.execution_start_date 38 | , stg_dag_run.execution_end_date 39 | , stg_dag_run.dag_state 40 | , stg_dag_run.external_trigger 41 | , stg_dag_run.run_type 42 | , stg_dag_run.duration 43 | , stg_dag_run.source_system 44 | from stg_dag_run 45 | left join dim_dag on stg_dag_run.dag_id = dim_dag.dag_id 46 | left join util_days on {{ cast_as_date('stg_dag_run.run_date') }} = {{ cast_as_date('util_days.date_day') }} 47 | ) 48 | , joined_with_sk as ( 49 | select 50 | {{ dbt_utils.generate_surrogate_key([ 51 | 'dag_run_id' 52 | , 'execution_start_date' 53 | , 'execution_end_date' 54 | , 'run_id']) }} as dag_run_sk 55 | , dag_fk 56 | , date_day as generated_date 57 | , execution_start_date 58 | , execution_end_date 59 | , dag_state 60 | , external_trigger 61 | , run_type 62 | , duration 63 | , source_system 64 | from joined 65 | ) 66 | select * 67 | from joined_with_sk 68 | -------------------------------------------------------------------------------- /models/staging/databricks_workflow_sources/source.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: raw_databricks_workflow_monitoring 5 | description: "Raw data from databricks for databricks monitoring analysis." 6 | database: "{{ var('dag_monitoring_databricks_database', '')}}" 7 | schema: "{{ var('dag_monitoring_databricks_schema', raw_databricks_workflow_monitoring) }}" 8 | tables: 9 | - name: jobs 10 | description: " Table that contains information about databricks jobs(DAGs)." 11 | columns: 12 | - name: job_id 13 | description: "Table unique identifier." 14 | tests: 15 | - not_null 16 | - unique 17 | 18 | - name: settings 19 | description: '{{ doc("settings_doc") }}' 20 | 21 | - name: created_time 22 | description: "Timestamp of the job creation" 23 | 24 | - name: creator_user_name 25 | description: "E-mail of the job creator" 26 | 27 | - name: ind_extraction_date 28 | description: "Date of extraction of the table" 29 | 30 | - name: job_runs 31 | description: "Table that contains the execution data of the Databricks pipelines(jobs)" 32 | columns: 33 | - name: run_id 34 | description: "Unique identifier of the table. Job run id" 35 | tests: 36 | - not_null 37 | - unique 38 | 39 | - name: job_id 40 | description: "Job id." 41 | 42 | - name: state 43 | description: '{{ doc("state_doc") }}' 44 | 45 | - name: trigger 46 | description: "It tells how the pipeline execution was triggered." 47 | 48 | - name: start_time 49 | description: "Timestamp when the DAG execution started." 50 | 51 | - name: end_time 52 | description: "Timestamp when the DAG execution ended." 53 | 54 | - name: execution_duration 55 | description: "Duration of DAG in milliseconds." 56 | 57 | - name: run_type 58 | description: "Type of execution" 59 | 60 | - name: tasks 61 | description: '{{ doc("tasks_doc") }}' 62 | 63 | - name: dbt_utils_day 64 | description: "Table that contains data of the dates created by the dbt_utils macro." 65 | -------------------------------------------------------------------------------- /integration_tests/macros/adf_activity_runs.sql: -------------------------------------------------------------------------------- 1 | {% macro adf_activity_runs() %} 2 | {% set create_table %} 3 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_activity_runs ( 4 | activityRunEnd TIMESTAMP, 5 | activityName STRING, 6 | activityRunStart TIMESTAMP, 7 | activityType STRING, 8 | durationInMs INT, 9 | retryAttempt INT, 10 | error_errorCode STRING, 11 | error_message STRING, 12 | error_failureType STRING, 13 | error_target STRING, 14 | activityRunId STRING, 15 | linkedServiceName STRING, 16 | pipelineName STRING, 17 | pipelineRunId STRING, 18 | status STRING, 19 | output_effectiveIntegrationRuntime STRING, 20 | input_source_type STRING 21 | ); 22 | {% endset %} 23 | 24 | {% set insert_table %} 25 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_activity_runs VALUES 26 | ( 27 | CAST('2024-08-20T03:30:10.973554Z' AS TIMESTAMP), 28 | CAST('Set CurrentDate' AS STRING), 29 | CAST('2024-08-20T03:30:10.742531Z' AS TIMESTAMP), 30 | CAST('SetVariable' AS STRING), 31 | CAST(231 AS INT), 32 | NULL, 33 | NULL, 34 | NULL, 35 | NULL, 36 | CAST('Set CurrentDate' AS STRING), 37 | CAST('f653c43a-6508-42f8-8467-0e10152aa3f9' AS STRING), 38 | NULL, 39 | CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING), 40 | CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING), 41 | CAST('Succeeded' AS STRING), 42 | NULL, 43 | NULL 44 | ), 45 | ( 46 | CAST('2024-08-20T03:30:11.538784Z' AS TIMESTAMP), 47 | CAST('Set Timestamp' AS STRING), 48 | CAST('2024-08-20T03:30:11.274576Z' AS TIMESTAMP), 49 | CAST('SetVariable' AS STRING), 50 | CAST(264 AS INT), 51 | NULL, 52 | NULL, 53 | NULL, 54 | NULL, 55 | CAST('Set Timestamp' AS STRING), 56 | CAST('b8c48c2f-b0e6-45f0-a502-cee31dffba2e' AS STRING), 57 | NULL, 58 | CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING), 59 | CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING), 60 | CAST('Succeeded' AS STRING), 61 | NULL, 62 | NULL 63 | ); 64 | 65 | {% endset %} 66 | 67 | {% do run_query(create_table) %} 68 | {% do log("finished creating table adf_activity_runs", info=true) %} 69 | 70 | {% do run_query(insert_table) %} 71 | {% do log("finished insert table adf_activity_runs", info=true) %} 72 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/macros/seed__dag.sql: -------------------------------------------------------------------------------- 1 | {% macro seed__dag() -%} 2 | {{ return(adapter.dispatch('seed__dag')()) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__seed__dag() %} 6 | {% set create_table %} 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag ( 8 | dag_id STRING, 9 | is_paused BOOLEAN, 10 | is_subdag BOOLEAN, 11 | is_active BOOLEAN, 12 | last_parsed_time TIMESTAMP, 13 | last_pickled TIMESTAMP, 14 | last_expired TIMESTAMP, 15 | scheduler_lock STRING, 16 | pickle_id INT64, 17 | fileloc STRING, 18 | owners STRING, 19 | description STRING, 20 | default_view STRING, 21 | schedule_interval STRING, 22 | root_dag_id STRING, 23 | next_dagrun TIMESTAMP, 24 | next_dagrun_create_after TIMESTAMP, 25 | max_active_tasks INT64, 26 | has_task_concurrency_limits BOOLEAN, 27 | max_active_runs INT64, 28 | next_dagrun_data_interval_start TIMESTAMP, 29 | next_dagrun_data_interval_end TIMESTAMP, 30 | has_import_errors BOOLEAN, 31 | timetable_description STRING, 32 | processor_subdir STRING 33 | ); 34 | {% endset %} 35 | 36 | {% set insert_table %} 37 | 38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag VALUES 39 | ( 40 | 'test_docker', false, false, false, TIMESTAMP('2022-12-16 09:35:19.433-03'), NULL, NULL, '', NULL, 41 | '/opt/airflow/dags/repo/airflow/dags/test_dag.py', 'Indicium', '', 'grid', '0 6 * * *', '', 42 | TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), 16, false, 16, 43 | TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), false, 'At 06:00', '' 44 | ), 45 | ( 46 | 'dbt', true, false, false, TIMESTAMP('2022-11-25 16:12:51.922-03'), NULL, NULL, '', NULL, 47 | '/opt/airflow/dags/repo/airflow/dags/all_dags.py', 'airflow', '', 'grid', '7/15 9-23 * * *', '', 48 | TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), 16, false, 1, 49 | TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), true, 50 | 'Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59', '' 51 | ); 52 | 53 | {% endset %} 54 | 55 | {% do run_query(create_table) %} 56 | {% do log("finished creating table dag", info=true) %} 57 | 58 | {% do run_query(insert_table) %} 59 | {% do log("finished insert table dag", info=true) %} 60 | 61 | 62 | {% endmacro %} -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_task_fail.sql: -------------------------------------------------------------------------------- 1 | with 2 | dim_dag as ( 3 | select 4 | dag_id 5 | , dag_sk as dag_fk 6 | from {{ ref('dim_dag_monitoring_dag') }} 7 | ) 8 | , dim_task as ( 9 | select 10 | task_sk as task_fk 11 | , task_id 12 | , dag_id 13 | from {{ ref('dim_dag_monitoring_task') }} 14 | ) 15 | , util_days as ( 16 | select cast(date_day as date) as date_day 17 | from {{ ref('dbt_utils_day') }} 18 | ) 19 | , stg_task_fail as ( 20 | {% for src in var('enabled_sources') -%} 21 | select 22 | task_fail_id 23 | , task_id 24 | , dag_id 25 | , run_id 26 | , execution_start_date 27 | , execution_end_date 28 | , duration 29 | , execution_date 30 | , map_index 31 | , '{{ src }}' as source_system 32 | from {{ ref('stg_task_fail_' + src) }} 33 | {% if not loop.last -%} union {% endif -%} 34 | {% endfor -%} 35 | ) 36 | , joined as ( 37 | select 38 | stg_task_fail.task_fail_id 39 | , stg_task_fail.task_id 40 | , dim_dag.dag_id 41 | , stg_task_fail.run_id 42 | , dim_dag.dag_fk 43 | , dim_task.task_fk 44 | , util_days.date_day 45 | , stg_task_fail.execution_start_date 46 | , stg_task_fail.execution_end_date 47 | , stg_task_fail.duration 48 | , stg_task_fail.source_system 49 | from stg_task_fail 50 | left join dim_dag on stg_task_fail.dag_id = dim_dag.dag_id 51 | left join dim_task on 52 | stg_task_fail.task_id = dim_task.task_id 53 | and stg_task_fail.dag_id = dim_task.dag_id 54 | left join util_days on {{ cast_as_date('stg_task_fail.execution_date') }} = {{ cast_as_date('util_days.date_day') }} 55 | ) 56 | , surrogate_key as ( 57 | select 58 | {{ dbt_utils.generate_surrogate_key([ 59 | 'task_fail_id' 60 | , 'execution_start_date' 61 | , 'execution_end_date' 62 | , 'run_id'] 63 | ) }} as task_fail_sk 64 | , dag_fk 65 | , task_fk 66 | , date_day as generated_date 67 | , execution_start_date 68 | , execution_end_date 69 | , duration 70 | , source_system 71 | from joined 72 | ) 73 | select * 74 | from surrogate_key 75 | -------------------------------------------------------------------------------- /integration_tests/seeds/airflow/task_instance.csv: -------------------------------------------------------------------------------- 1 | "task_id","dag_id","run_id","start_date","end_date","duration","state","try_number","hostname","unixname","job_id","pool","queue","priority_weight","operator","queued_dttm","pid","max_tries","executor_config","pool_slots","queued_by_job_id","external_executor_id","trigger_id","trigger_timeout","next_method","next_kwargs","map_index","updated_at" 2 | dbt_source_test,dbt_bitrix,scheduled__2023-01-12T06:00:00+00:00,2023-01-13 03:01:07.644 -0300,2023-01-13 03:01:27.852 -0300,20.207217,success,1,dbtbitrixdbtsourcetest-32bc3af501374e48913fad10b54fdd67,root,332,default_pool,default,17,DockerOperator,2023-01-13 03:00:58.037 -0300,21,2,�\u0004}�.,1,201,,,,,"",-1, 3 | dump_table1_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.688 -0300,2022-11-25 15:58:28.388 -0300,0.700336,success,1,samplefistdumptable1todl-0d85d3eca2b14a58b822dbb5f5c21bec,root,20,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1, 4 | copy_table4_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1, 5 | dump_table3_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.252 -0300,2022-11-25 15:58:27.923 -0300,0.670813,success,1,samplefistdumptable3todl-dfadd4af7fde472593ee7c824e6ca2ae,root,18,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1, 6 | dump_table5_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:59:17.480 -0300,2022-11-25 15:59:18.020 -0300,0.540106,success,1,samplefistdumptable5todl-54e2543a9e694a63b55e112e99c2053d,root,22,default_pool,default,1,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1, 7 | copy_table3_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1, 8 | dump_table4_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.540 -0300,2022-11-25 15:58:28.263 -0300,0.722546,success,1,samplefistdumptable4todl-de6d057adeeb4f4b94b777491f5e3611,root,19,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,20,1,�\u0004}�.,1,7,,,,,"",-1, 9 | copy_table1_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1, 10 | delay,sample_fist,scheduled__2022-02-02T00:00:00+00:00,2022-11-25 15:55:20.412 -0300,2022-11-25 15:55:20.412 -0300,0.0,success,0,"",root,,default_pool,default,10,DummyOperator,,,1,�\u0004}�.,1,,,,,,"",-1, 11 | -------------------------------------------------------------------------------- /models/staging/adf_sources/stg_dag_adf.sql: -------------------------------------------------------------------------------- 1 | with exploded_by_pipeline as ( 2 | select 3 | * 4 | from 5 | {{ source('raw_adf_monitoring', 'adf_triggers') }} 6 | {{ flatten_data('properties.pipelines') }} as pipelines 7 | ), 8 | 9 | triggers_renamed as ( 10 | select 11 | id as trigger_id 12 | , case 13 | when properties.typeProperties.recurrence.frequency = 'Hour' then 'hourly' 14 | when properties.typeProperties.recurrence.frequency = 'Day' then 'daily' 15 | when properties.typeProperties.recurrence.frequency = 'Week' then 'weekly' 16 | when properties.typeProperties.recurrence.frequency = 'Month' then 'monthly' 17 | when properties.typeProperties.recurrence.frequency = 'Minute' then 'minutely' 18 | end as dag_frequency 19 | ,{{ cast_as_string('properties.typeProperties.recurrence.schedule') }} as timetable_description 20 | ,properties.typeProperties.recurrence.frequency as adf_frequency 21 | ,properties.typeProperties.recurrence.startTime as start_time 22 | , case 23 | when properties.runtimeState = 'Started' then 'true' 24 | else 'false' 25 | end as is_active 26 | , case 27 | when properties.runtimeState = 'Started' then 'false' 28 | else 'true' 29 | end as is_paused 30 | ,properties.runtimeState 31 | ,{{adf_pipelines_name('pipelines.pipelineReference.referenceName') }} as pipeline_name 32 | 33 | from exploded_by_pipeline 34 | ), 35 | pipeline_with_row_number as ( 36 | select 37 | *, 38 | row_number() over (partition by id order by etag desc) row_number 39 | from {{ source('raw_adf_monitoring', 'adf_pipelines') }} 40 | ), 41 | pipeline_dedup as ( 42 | select * from 43 | pipeline_with_row_number 44 | where row_number = 1 45 | ), 46 | pipelines_and_triggers as ( 47 | select 48 | pipelines.id as dag_id 49 | ,pipelines.name as dag_name 50 | ,triggers.* 51 | from pipeline_dedup pipelines 52 | left join triggers_renamed triggers 53 | on pipelines.name = triggers.pipeline_name 54 | ) 55 | select 56 | {{ cast_as_string('dag_name') }} as dag_id 57 | , {{ cast_as_string('dag_name') }} as dag_name 58 | , "not_implemented_for_adf" as dag_description 59 | , dag_frequency 60 | , timetable_description 61 | , is_paused 62 | , is_active 63 | , 'not_implemented_for_adf' as fileloc 64 | , 'not_implemented_for_adf' as owners 65 | , null as ind_extraction_date 66 | from 67 | pipelines_and_triggers 68 | -------------------------------------------------------------------------------- /integration_tests/macros/seed__dag_run.sql: -------------------------------------------------------------------------------- 1 | {% macro seed__dag_run() -%} 2 | {{ return(adapter.dispatch('seed__dag_run')()) }} 3 | {%- endmacro %} 4 | 5 | {% macro default__seed__dag_run() %} 6 | {% set create_table %} 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag_run ( 8 | id INT64, 9 | dag_id STRING, 10 | execution_date TIMESTAMP, 11 | state STRING, 12 | run_id STRING, 13 | external_trigger BOOLEAN, 14 | conf STRING, 15 | end_date TIMESTAMP, 16 | start_date TIMESTAMP, 17 | run_type STRING, 18 | last_scheduling_decision TIMESTAMP, 19 | dag_hash STRING, 20 | creating_job_id INT64, 21 | queued_at TIMESTAMP, 22 | data_interval_start TIMESTAMP, 23 | data_interval_end TIMESTAMP, 24 | log_template_id INT64, 25 | updated_at TIMESTAMP 26 | ); 27 | 28 | {% endset %} 29 | 30 | {% set insert_table %} 31 | 32 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag_run VALUES 33 | ( 34 | 87755, 35 | 'airflow_status_monitoring', 36 | TIMESTAMP('2024-07-04 10:56:00.000-03'), 37 | 'success', 38 | 'scheduled__2024-07-04T13:56:00+00:00', 39 | false, 40 | '�\u0004}�.', 41 | TIMESTAMP('2024-07-04 10:57:15.119-03'), 42 | TIMESTAMP('2024-07-04 10:57:00.256-03'), 43 | 'scheduled', 44 | TIMESTAMP('2024-07-04 10:57:15.109-03'), 45 | '2606cfccb8540961ee80c09fe32dcc8d', 46 | 110841, 47 | TIMESTAMP('2024-07-04 10:57:00.210-03'), 48 | TIMESTAMP('2024-07-04 10:56:00.000-03'), 49 | TIMESTAMP('2024-07-04 10:57:00.000-03'), 50 | 1, 51 | TIMESTAMP('2024-07-04 10:57:15.120-03') 52 | ), 53 | ( 54 | 84260, 55 | 'dag_tags_rbac', 56 | TIMESTAMP('2024-07-02 05:30:00.000-03'), 57 | 'success', 58 | 'scheduled__2024-07-02T08:30:00+00:00', 59 | false, 60 | '�\u0004}�.', 61 | TIMESTAMP('2024-07-02 06:01:20.844-03'), 62 | TIMESTAMP('2024-07-02 06:00:00.624-03'), 63 | 'scheduled', 64 | TIMESTAMP('2024-07-02 06:01:20.836-03'), 65 | '3616896069a7d5a3b40f4478372f03da', 66 | 110841, 67 | TIMESTAMP('2024-07-02 06:00:00.550-03'), 68 | TIMESTAMP('2024-07-02 05:30:00.000-03'), 69 | TIMESTAMP('2024-07-02 06:00:00.000-03'), 70 | 1, 71 | TIMESTAMP('2024-07-02 06:01:20.848-03') 72 | ); 73 | 74 | {% endset %} 75 | 76 | {% do run_query(create_table) %} 77 | {% do log("finished creating table dag_run", info=true) %} 78 | 79 | {% do run_query(insert_table) %} 80 | {% do log("finished insert table dag_run", info=true) %} 81 | 82 | 83 | {% endmacro %} -------------------------------------------------------------------------------- /models/docs/universal.md: -------------------------------------------------------------------------------- 1 | [comment]: < Universal > 2 | 3 | {% docs state_doc %} 4 | Json with state property of the pipeline execution in the following format: 5 | { 6 | "life_cycle_state": "TERMINATED", 7 | "result_state": "SUCCESS", 8 | "state_message": "", 9 | "user_cancelled_or_timedout": false 10 | } 11 | {% enddocs %} 12 | 13 | 14 | {% docs tasks_doc %} 15 | List of objects with information about the tasks. Example of a task in json: 16 | { 17 | "attempt_number": "0", 18 | "cleanup_duration": "0", 19 | "cluster_instance": { 20 | "cluster_id": "0426-123-kq2r1tew", 21 | "spark_context_id": "123" 22 | }, 23 | "dbt_task": null, 24 | "depends_on": null, 25 | "description": null, 26 | "end_time": "1701855074931", 27 | "execution_duration": "110000", 28 | "existing_cluster_id": "0426-123-kq2r1tew", 29 | "git_source": null, 30 | "libraries": null, 31 | "notebook_task": { 32 | "notebook_path": "/notebook", 33 | "source": "WORKSPACE" 34 | }, 35 | "run_id": "123", 36 | "setup_duration": "1000", 37 | "start_time": "1701854963851", 38 | "state": { 39 | "life_cycle_state": "TERMINATED", 40 | "result_state": "SUCCESS", 41 | "state_message": "", 42 | "user_cancelled_or_timedout": false 43 | }, 44 | "task_key": "ADFafb-123" 45 | } 46 | {% enddocs %} 47 | 48 | {% docs settings_doc %} 49 | Job configuration json like the following: 50 | { 51 | "email_notifications": { 52 | "no_alert_for_skipped_runs": false, 53 | "on_failure": null, 54 | "on_start": null, 55 | "on_success": null 56 | }, 57 | "format": "MULTI_TASK", 58 | "max_concurrent_runs": "1", 59 | "name": "Fact_TransactionProtected_V2", 60 | "notification_settings": null, 61 | "schedule": { 62 | "pause_status": "UNPAUSED", 63 | "quartz_cron_expression": "19 0 9 * * ?", 64 | "timezone_id": "America/Sao_Paulo" 65 | }, 66 | "timeout_seconds": "0" 67 | } 68 | {% enddocs %} 69 | -------------------------------------------------------------------------------- /integration_tests/seeds/airflow/task_fail.csv: -------------------------------------------------------------------------------- 1 | "id","task_id","dag_id","start_date","end_date","duration","map_index","run_id" 2 | 1,dbt_freshness,dbt_bitrix,2022-12-12 14:59:57.480 -0300,2022-12-12 14:59:59.035 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00 3 | 2,dbt_source_test,dbt_bitrix,2022-12-12 15:01:00.909 -0300,2022-12-12 15:01:02.861 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00 4 | 3,dbt_freshness,dbt_bitrix,2022-12-15 16:09:59.406 -0300,2022-12-15 16:10:01.164 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 5 | 4,dbt_freshness,dbt_bitrix,2022-12-15 16:11:33.938 -0300,2022-12-15 16:11:35.532 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 6 | 5,dbt_freshness,dbt_bitrix,2022-12-15 16:16:44.720 -0300,2022-12-15 16:16:46.632 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 7 | 6,dbt_freshness,dbt_bitrix,2022-12-15 17:59:26.557 -0300,2022-12-15 17:59:28.156 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 8 | 7,dbt_freshness,dbt_bitrix,2022-12-15 18:04:37.127 -0300,2022-12-15 18:04:38.722 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 9 | 8,dbt_freshness,dbt_bitrix,2022-12-15 18:18:55.583 -0300,2022-12-15 18:18:57.274 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 10 | 9,dbt_freshness,dbt_bitrix,2022-12-15 18:26:31.585 -0300,2022-12-15 18:26:33.287 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00 11 | 10,testing_dockeroperator,test_docker,2022-12-15 18:55:53.329 -0300,2022-12-15 18:55:55.427 -0300,2,-1,manual__2022-12-15T21:55:43.582236+00:00 12 | 11,dbt_freshness,dbt_bitrix,2022-12-16 09:49:25.205 -0300,2022-12-16 09:50:02.437 -0300,37,-1,scheduled__2022-12-15T06:00:00+00:00 13 | 12,dbt_freshness,dbt_bitrix,2022-12-16 11:27:41.768 -0300,2022-12-16 11:27:54.878 -0300,13,-1,scheduled__2022-12-15T06:00:00+00:00 14 | 13,dbt_freshness,dbt_bitrix,2022-12-16 14:14:03.698 -0300,2022-12-16 14:14:17.741 -0300,14,-1,scheduled__2022-12-15T06:00:00+00:00 15 | 14,dbt_freshness,dbt_bitrix,2022-12-16 14:24:27.859 -0300,2022-12-16 14:24:42.344 -0300,14,-1,manual__2022-12-16T17:24:17.778111+00:00 16 | 15,dbt_freshness,dbt_bitrix,2022-12-16 14:55:23.124 -0300,2022-12-16 14:55:36.900 -0300,13,-1,manual__2022-12-16T17:55:14.271759+00:00 17 | 16,dbt_freshness,dbt_bitrix,2022-12-16 15:31:16.824 -0300,2022-12-16 15:31:55.576 -0300,38,-1,manual__2022-12-16T18:31:06.560155+00:00 18 | 17,dbt_freshness,dbt_bitrix,2022-12-17 03:00:17.475 -0300,2022-12-17 03:00:55.782 -0300,38,-1,scheduled__2022-12-16T06:00:00+00:00 19 | 18,dbt_freshness,dbt_bitrix,2022-12-17 03:06:06.742 -0300,2022-12-17 03:06:43.661 -0300,36,-1,scheduled__2022-12-16T06:00:00+00:00 20 | 19,dbt_freshness,dbt_bitrix,2022-12-18 03:00:19.544 -0300,2022-12-18 03:00:57.838 -0300,38,-1,scheduled__2022-12-17T06:00:00+00:00 21 | 20,dbt_freshness,dbt_bitrix,2022-12-18 03:06:07.809 -0300,2022-12-18 03:06:44.903 -0300,37,-1,scheduled__2022-12-17T06:00:00+00:00 -------------------------------------------------------------------------------- /models/marts/dim_dag_monitoring_task.sql: -------------------------------------------------------------------------------- 1 | with 2 | stg_task_instance as ( 3 | {% for src in var('enabled_sources') -%} 4 | select distinct 5 | task_id 6 | , dag_id 7 | , hostname 8 | , operator 9 | , task_pool 10 | , map_index 11 | , '{{ src }}' as source_system 12 | from {{ ref('stg_task_instance_' + src) }} 13 | {% if not loop.last -%} union {% endif -%} 14 | {% endfor -%} 15 | ) 16 | , stg_task_fail as ( 17 | {% for src in var('enabled_sources') -%} 18 | select distinct 19 | task_id 20 | , dag_id 21 | , map_index 22 | , {{ cast_as_string('null') }} as hostname 23 | , {{ cast_as_string('null') }} as operator 24 | , {{ cast_as_string('null') }} as task_pool 25 | , '{{ src }}' as source_system 26 | from {{ ref('stg_task_fail_' + src) }} 27 | {% if not loop.last -%} union {% endif -%} 28 | {% endfor -%} 29 | ) 30 | , union_task_instance_with_fail as ( 31 | select 32 | task_id 33 | , dag_id 34 | , map_index 35 | , hostname 36 | , operator 37 | , task_pool 38 | , source_system 39 | from stg_task_instance 40 | union all 41 | select 42 | task_id 43 | , dag_id 44 | , map_index 45 | , hostname 46 | , operator 47 | , task_pool 48 | , source_system 49 | from stg_task_fail 50 | ) 51 | , dedup_dim_task as ( 52 | select 53 | task_id 54 | , dag_id 55 | , map_index 56 | , hostname 57 | , operator 58 | , task_pool 59 | , source_system 60 | , row_number() over( 61 | partition by 62 | task_id 63 | , dag_id 64 | , source_system 65 | order by 66 | task_id 67 | , dag_id 68 | , source_system 69 | ) as dedup 70 | from union_task_instance_with_fail 71 | ) 72 | , dim_task_with_sk as ( 73 | select 74 | {{ dbt_utils.generate_surrogate_key([ 75 | 'task_id' 76 | , 'dag_id'] 77 | ) }} as task_sk 78 | , task_id 79 | , dag_id 80 | , map_index 81 | , hostname 82 | , operator 83 | , task_pool 84 | , source_system 85 | from dedup_dim_task 86 | where dedup = 1 87 | ) 88 | select * 89 | from dim_task_with_sk 90 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter and Publisher 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - closed 7 | branches: 8 | - main 9 | 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | new_release: 16 | if: github.event.pull_request.merged == true 17 | permissions: 18 | # write permission is required to create a github release 19 | contents: write 20 | # write permission is required for autolabeler 21 | # otherwise, read permission is required at least 22 | pull-requests: write 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v2 28 | with: 29 | fetch-depth: 0 30 | 31 | - name: Get branch name 32 | id: getbranch 33 | run: echo ::set-output name=BRANCH::${GITHUB_HEAD_REF} 34 | 35 | # ${{ github.ref }} was not giving v* as tag name, but refs/tags/v* instead, so I had to abbreviate it 36 | - name: Get latest abbreviated tag 37 | id: gettag 38 | run: echo ::set-output name=TAG::$(git describe --tags $(git rev-list --tags --max-count=1)) # get the latest tag across all branches and put it in the output TAG 39 | 40 | - name: Calculate next version 41 | id: nextversion 42 | run: | 43 | BRANCH_NAME="${{ steps.getbranch.outputs.BRANCH }}" 44 | CURRENT_VERSION="${{ steps.gettag.outputs.TAG }}" 45 | IFS='.' read -ra VERSION_PARTS <<< "$CURRENT_VERSION" 46 | if [[ $BRANCH_NAME =~ ^(major|release|Major|Release)/ ]]; then 47 | VERSION_PARTS[0]=$((VERSION_PARTS[0] + 1)) 48 | VERSION_PARTS[1]=0 49 | VERSION_PARTS[2]=0 50 | elif [[ $BRANCH_NAME =~ ^(feature|minor|Feature|Minor)/ ]]; then 51 | VERSION_PARTS[1]=$((VERSION_PARTS[1] + 1)) 52 | VERSION_PARTS[2]=0 53 | elif [[ $BRANCH_NAME =~ ^(patch|fix|hotfix|bugfix|Patch|Fix|Hotfix|Bugfix)/ ]]; then 54 | VERSION_PARTS[2]=$((VERSION_PARTS[2] + 1)) 55 | fi 56 | NEXT_VERSION="${VERSION_PARTS[0]}.${VERSION_PARTS[1]}.${VERSION_PARTS[2]}" 57 | echo ::set-output name=NEXT_VERSION::"$NEXT_VERSION" 58 | 59 | - name: Create and publish new tag 60 | run: | 61 | git tag ${{ steps.nextversion.outputs.NEXT_VERSION }} 62 | git push origin ${{ steps.nextversion.outputs.NEXT_VERSION }} 63 | 64 | - uses: release-drafter/release-drafter@v5 65 | with: 66 | commitish: main 67 | name: "dbt-dag-monitoring ${{ steps.nextversion.outputs.NEXT_VERSION }}" 68 | tag: ${{ steps.nextversion.outputs.NEXT_VERSION }} 69 | publish: true 70 | env: 71 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 72 | -------------------------------------------------------------------------------- /integration_tests/seeds/adf/adf_activity_runs.csv: -------------------------------------------------------------------------------- 1 | activityRunEnd,activityName,activityRunStart,activityType,durationInMs,retryAttempt,error_errorCode,error_message,error_failureType,error_target,activityRunId,linkedServiceName,pipelineName,pipelineRunId,status,output_effectiveIntegrationRuntime,input_source_type 2 | 2024-08-20T03:30:10.9735549Z,Set CurrentDate,2024-08-20T03:30:10.742531Z,SetVariable,231,,,,,Set CurrentDate,f653c43a-6508-42f8-8467-0e10152aa3f9,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 3 | 2024-08-20T03:30:11.5387841Z,Set Timestamp,2024-08-20T03:30:11.2745768Z,SetVariable,264,,,,,Set Timestamp,b8c48c2f-b0e6-45f0-a502-cee31dffba2e,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 4 | 2024-08-20T03:38:09.0201429Z,Restart LoadingControl,2024-08-20T03:30:11.9547107Z,DatabricksNotebook,477065,,,,,Restart LoadingControl,af8e3927-c2e3-4c54-9b07-b4c0df7d6564,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South), 5 | 2024-08-20T03:39:13.7734401Z,Get All Tables,2024-08-20T03:38:10.4390219Z,Lookup,63334,,,,,Get All Tables,8df489d3-f7d3-4462-9080-6e5557e78638,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),AzureDatabricksDeltaLakeSource 6 | 2024-08-20T03:42:27.2972053Z,For Each Tables,2024-08-20T03:39:15.8346054Z,ForEach,191462,,,,,For Each Tables,35a4c708-cda1-470e-b202-ae76aa743c0d,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 7 | 2024-08-20T03:39:16.8973044Z,Set CurrentDate foreach,2024-08-20T03:39:16.6507636Z,SetVariable,246,,,,,Set CurrentDate foreach,913afaa0-b40b-4c8c-b95a-48011c5e0e1c,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 8 | 2024-08-20T03:39:16.907916Z,Set CurrentDate foreach,2024-08-20T03:39:16.652598Z,SetVariable,255,,,,,Set CurrentDate foreach,9316bfbf-e4e8-4c3a-a214-474524a71eac,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 9 | 2024-08-20T03:39:16.8983084Z,Set CurrentDate foreach,2024-08-20T03:39:16.6742498Z,SetVariable,224,,,,,Set CurrentDate foreach,98aaf33e-86eb-4b32-98c4-7af526d677c5,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 10 | 2024-08-20T03:39:16.9416579Z,Set CurrentDate foreach,2024-08-20T03:39:16.673797Z,SetVariable,267,,,,,Set CurrentDate foreach,1d466d96-8210-4f9a-94b9-d25405dae8a7,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,, 11 | 2024-08-20T03:40:37.1476554Z,Update StartDate,2024-08-20T03:39:17.2774453Z,DatabricksNotebook,79870,,,,,Update StartDate,6e608cd1-4444-4061-8384-cb36946508a2,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South), 12 | -------------------------------------------------------------------------------- /models/marts/fact_dag_monitoring_task_instance.sql: -------------------------------------------------------------------------------- 1 | with 2 | dim_dag as ( 3 | select 4 | dag_id 5 | , dag_sk as dag_fk 6 | from {{ ref('dim_dag_monitoring_dag') }} 7 | ) 8 | , dim_task as ( 9 | select 10 | task_sk as task_fk 11 | , task_id 12 | , dag_id 13 | from {{ ref('dim_dag_monitoring_task') }} 14 | ) 15 | , util_days as ( 16 | select cast(date_day as date) as date_day 17 | from {{ ref('dbt_utils_day') }} 18 | ) 19 | , stg_task_instance as ( 20 | {% for src in var('enabled_sources') -%} 21 | select 22 | task_instance_sk 23 | , task_id 24 | , dag_id 25 | , run_id 26 | , execution_date 27 | , execution_start_date 28 | , execution_end_date 29 | , duration 30 | , state_task_instance 31 | , try_number 32 | , priority_weight 33 | , '{{ src }}' as source_system 34 | from {{ ref('stg_task_instance_' + src) }} 35 | {% if not loop.last -%} union {% endif -%} 36 | {% endfor -%} 37 | ) 38 | , joined as ( 39 | select 40 | stg_task_instance.task_instance_sk 41 | , stg_task_instance.task_id 42 | , stg_task_instance.dag_id 43 | , stg_task_instance.run_id 44 | , dim_dag.dag_fk 45 | , dim_task.task_fk 46 | , util_days.date_day 47 | , stg_task_instance.execution_start_date 48 | , stg_task_instance.execution_end_date 49 | , stg_task_instance.duration 50 | , stg_task_instance.state_task_instance 51 | , stg_task_instance.try_number 52 | , stg_task_instance.priority_weight 53 | , stg_task_instance.source_system 54 | from stg_task_instance 55 | left join dim_dag on stg_task_instance.dag_id = dim_dag.dag_id 56 | left join dim_task on 57 | stg_task_instance.task_id = dim_task.task_id 58 | and stg_task_instance.dag_id = dim_task.dag_id 59 | left join util_days on {{ cast_as_date('stg_task_instance.execution_date') }} = {{ cast_as_date('util_days.date_day') }} 60 | ) 61 | , surrogate_key as ( 62 | select 63 | {{ dbt_utils.generate_surrogate_key([ 64 | 'task_instance_sk' 65 | , 'execution_start_date' 66 | , 'execution_end_date' 67 | , 'run_id']) }} as task_instance_sk 68 | , dag_fk 69 | , task_fk 70 | , date_day as generated_date 71 | , execution_start_date 72 | , execution_end_date 73 | , duration 74 | , state_task_instance 75 | , try_number 76 | , priority_weight 77 | , source_system 78 | from joined 79 | ) 80 | select * 81 | from surrogate_key 82 | -------------------------------------------------------------------------------- /integration_tests/macros/seed__task_instance.sql: -------------------------------------------------------------------------------- 1 | {% macro seed__task_instance() %} 2 | {% set create_table %} 3 | CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_instance ( 4 | TASK_ID STRING, 5 | DAG_ID STRING, 6 | RUN_ID STRING, 7 | START_DATE TIMESTAMP, 8 | END_DATE TIMESTAMP, 9 | DURATION FLOAT64, 10 | STATE STRING, 11 | TRY_NUMBER INT64, 12 | HOSTNAME STRING, 13 | UNIXNAME STRING, 14 | JOB_ID NUMERIC, 15 | POOL STRING, 16 | QUEUE STRING, 17 | PRIORITY_WEIGHT INT64, 18 | OPERATOR STRING, 19 | QUEUED_DTTM TIMESTAMP, 20 | PID INT64, 21 | MAX_TRIES INT64, 22 | EXECUTOR_CONFIG STRING, 23 | POOL_SLOTS INT64, 24 | QUEUED_BY_JOB_ID NUMERIC, 25 | EXTERNAL_EXECUTOR_ID NUMERIC, 26 | TRIGGER_ID NUMERIC, 27 | TRIGGER_TIMEOUT INT64, 28 | NEXT_METHOD INT64, 29 | NEXT_KWARGS INT64, 30 | MAP_INDEX INT64, 31 | UPDATED_AT TIMESTAMP 32 | ); 33 | 34 | {% endset %} 35 | 36 | {% set insert_table %} 37 | 38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_instance ( 39 | TASK_ID, 40 | DAG_ID, 41 | RUN_ID, 42 | START_DATE, 43 | END_DATE, 44 | DURATION, 45 | STATE, 46 | TRY_NUMBER, 47 | HOSTNAME, 48 | UNIXNAME, 49 | JOB_ID, 50 | POOL, 51 | QUEUE, 52 | PRIORITY_WEIGHT, 53 | OPERATOR, 54 | QUEUED_DTTM, 55 | PID, 56 | MAX_TRIES, 57 | EXECUTOR_CONFIG, 58 | POOL_SLOTS, 59 | QUEUED_BY_JOB_ID, 60 | EXTERNAL_EXECUTOR_ID, 61 | TRIGGER_ID, 62 | TRIGGER_TIMEOUT, 63 | NEXT_METHOD, 64 | NEXT_KWARGS, 65 | MAP_INDEX, 66 | UPDATED_AT 67 | ) 68 | VALUES 69 | ( 70 | 'dbt_source_test', 71 | 'dbt_bitrix', 72 | 'scheduled__2023-01-12T06:00:00+00:00', 73 | TIMESTAMP('2023-12-27T02:30:16.714366Z'), 74 | TIMESTAMP('2023-12-21T09:01:57.631415Z'), 75 | 20.207217, 76 | 'success', 77 | 1, 78 | 'dbtbitrixdb1374e48913fad10b54fdd67', 79 | 'root', 80 | 332, 81 | 'default_pool', 82 | 'default', 83 | 17, 84 | 'DockerOperator', 85 | '2024-02-02T11:01:54.071588Z', 86 | 21, 87 | 2, 88 | '�\u0004}�.', 89 | 1, 90 | 201, 91 | NULL, 92 | NULL, 93 | NULL, 94 | NULL, 95 | NULL, 96 | -1, 97 | NULL 98 | ), 99 | ( 100 | 'dump_table1_to_DL', 101 | 'sample_fist', 102 | 'scheduled__2022-02-03T00:00:00+00:00', 103 | TIMESTAMP('2023-12-21T09:01:57.631415Z'), 104 | TIMESTAMP('2023-12-23T08:30:25.791135Z'), 105 | 0.700336, 106 | 'success', 107 | 1, 108 | 'samplefistdumpa58b822dbb5f5c21bec', 109 | 'root', 110 | 20, 111 | 'default_pool', 112 | 'default', 113 | 2, 114 | 'BashOperator', 115 | '2024-02-02T11:02:10.162511Z', 116 | 21, 117 | 1, 118 | '�\u0004}�.', 119 | 1, 120 | 7, 121 | NULL, 122 | NULL, 123 | NULL, 124 | NULL, 125 | NULL, 126 | -1, 127 | NULL 128 | ); 129 | 130 | 131 | {% endset %} 132 | 133 | {% do run_query(create_table) %} 134 | {% do log("finished creating table task_instance", info=true) %} 135 | 136 | {% do run_query(insert_table) %} 137 | {% do log("finished insert table task_instance", info=true) %} 138 | 139 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/seeds/airflow/dag.csv: -------------------------------------------------------------------------------- 1 | "dag_id","is_paused","is_subdag","is_active","last_parsed_time","last_pickled","last_expired","scheduler_lock","pickle_id","fileloc","owners","description","default_view","schedule_interval","root_dag_id","next_dagrun","next_dagrun_create_after","max_active_tasks","has_task_concurrency_limits","max_active_runs","next_dagrun_data_interval_start","next_dagrun_data_interval_end","has_import_errors","timetable_description","processor_subdir" 2 | test_docker,false,false,false,2022-12-16 09:35:19.433 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/test_dag.py,Indicium,,grid,"""0 6 * * * """,,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,16,false,16,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,false,At 06:00, 3 | dbt,true,false,false,2022-11-25 16:12:51.922 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""7/15 9-23 * * *""",,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,16,false,1,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,true,"Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59", 4 | enterprise_sync_marketing,true,false,false,2024-03-19 17:41:15.253 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 18 * * *""",,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,16,false,1,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,true,At 18:00,/opt/airflow/dags/b50e21f1f72af1012e31506b48198ba61244fd4f/airflow/dags 5 | sample_fist,true,false,false,2022-11-25 16:12:51.927 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""@daily""",,2022-02-02 21:00:00.000 -0300,,16,false,1,2022-02-02 21:00:00.000 -0300,2022-02-03 21:00:00.000 -0300,true,At 00:00, 6 | sheets-projects,false,false,false,2023-08-17 16:48:17.895 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/sheets-projects.py,DSaaS,Dag to run xxx pipeline,grid,"""0 6 * * * """,,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,16,false,16,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,true,At 06:00,/opt/airflow/dags/7d473a2591c8679c8529ce9b4600489ee12e7c62/airflow/dags 7 | dbt_snowflake_enterprise_dsaas,true,false,false,2024-04-16 10:33:35.408 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,null,,,,16,false,1,,,false,"Never, external triggers only",/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags 8 | enterprise_sync,true,false,false,2024-04-16 10:33:35.411 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 17 * * *""",,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,16,false,1,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,false,At 17:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags 9 | enterprise_sync_public,true,false,false,2024-04-16 10:33:35.417 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 16 * * *""",,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,16,false,1,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,false,At 16:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags 10 | bitrix_projects_validate_dag,true,false,false,2023-12-19 19:51:48.161 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/bitrix_projects validate_dag.py,DSaaS,Dag to run hours extractions pipeline,grid,"""0 0/12 * * *""",,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,16,false,16,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,false,Every 12 hours,/opt/airflow/dags/fb3710f58ae5f8df430f2bbe40ef211ccebc012b/airflow/dags 11 | -------------------------------------------------------------------------------- /integration_tests/seeds/airflow/dag_run.csv: -------------------------------------------------------------------------------- 1 | "id","dag_id","execution_date","state","run_id","external_trigger","conf","end_date","start_date","run_type","last_scheduling_decision","dag_hash","creating_job_id","queued_at","data_interval_start","data_interval_end","log_template_id","updated_at" 2 | 87755,airflow_status_monitoring,2024-07-04 10:56:00.000 -0300,success,scheduled__2024-07-04T13:56:00+00:00,false,�\u0004}�.,2024-07-04 10:57:15.119 -0300,2024-07-04 10:57:00.256 -0300,scheduled,2024-07-04 10:57:15.109 -0300,"2606cfccb8540961ee80c09fe32dcc8d",110841,2024-07-04 10:57:00.210 -0300,2024-07-04 10:56:00.000 -0300,2024-07-04 10:57:00.000 -0300,1,2024-07-04 10:57:15.120 -0300 3 | 84260,dag_tags_rbac,2024-07-02 05:30:00.000 -0300,success,scheduled__2024-07-02T08:30:00+00:00,false,�\u0004}�.,2024-07-02 06:01:20.844 -0300,2024-07-02 06:00:00.624 -0300,scheduled,2024-07-02 06:01:20.836 -0300,"3616896069a7d5a3b40f4478372f03da",110841,2024-07-02 06:00:00.550 -0300,2024-07-02 05:30:00.000 -0300,2024-07-02 06:00:00.000 -0300,1,2024-07-02 06:01:20.848 -0300 4 | 84507,airflow_status_monitoring,2024-07-02 09:40:00.000 -0300,success,scheduled__2024-07-02T12:40:00+00:00,false,�\u0004}�.,2024-07-02 09:41:17.534 -0300,2024-07-02 09:41:00.866 -0300,scheduled,2024-07-02 09:41:17.528 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:41:00.832 -0300,2024-07-02 09:40:00.000 -0300,2024-07-02 09:41:00.000 -0300,1,2024-07-02 09:41:17.536 -0300 5 | 84775,airflow_status_monitoring,2024-07-02 13:48:00.000 -0300,success,scheduled__2024-07-02T16:48:00+00:00,false,�\u0004}�.,2024-07-02 13:49:15.733 -0300,2024-07-02 13:49:00.347 -0300,scheduled,2024-07-02 13:49:15.728 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 13:49:00.289 -0300,2024-07-02 13:48:00.000 -0300,2024-07-02 13:49:00.000 -0300,1,2024-07-02 13:49:15.742 -0300 6 | 84508,airflow_status_monitoring,2024-07-02 09:41:00.000 -0300,success,scheduled__2024-07-02T12:41:00+00:00,false,�\u0004}�.,2024-07-02 09:42:14.676 -0300,2024-07-02 09:42:00.647 -0300,scheduled,2024-07-02 09:42:14.666 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:42:00.591 -0300,2024-07-02 09:41:00.000 -0300,2024-07-02 09:42:00.000 -0300,1,2024-07-02 09:42:14.678 -0300 7 | 84509,airflow_status_monitoring,2024-07-02 09:42:00.000 -0300,success,scheduled__2024-07-02T12:42:00+00:00,false,�\u0004}�.,2024-07-02 09:43:16.490 -0300,2024-07-02 09:43:00.343 -0300,scheduled,2024-07-02 09:43:16.485 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:43:00.262 -0300,2024-07-02 09:42:00.000 -0300,2024-07-02 09:43:00.000 -0300,1,2024-07-02 09:43:16.491 -0300 8 | 84346,airflow_status_monitoring,2024-07-02 07:14:00.000 -0300,success,scheduled__2024-07-02T10:14:00+00:00,false,�\u0004}�.,2024-07-02 07:15:17.620 -0300,2024-07-02 07:15:00.566 -0300,scheduled,2024-07-02 07:15:17.614 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 07:15:00.528 -0300,2024-07-02 07:14:00.000 -0300,2024-07-02 07:15:00.000 -0300,1,2024-07-02 07:15:17.621 -0300 9 | 84510,airflow_status_monitoring,2024-07-02 09:43:00.000 -0300,success,scheduled__2024-07-02T12:43:00+00:00,false,�\u0004}�.,2024-07-02 09:44:16.026 -0300,2024-07-02 09:44:00.704 -0300,scheduled,2024-07-02 09:44:16.020 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:44:00.665 -0300,2024-07-02 09:43:00.000 -0300,2024-07-02 09:44:00.000 -0300,1,2024-07-02 09:44:16.027 -0300 10 | 84776,bitrix_refresh_access_token,2024-07-02 13:00:00.000 -0300,success,scheduled__2024-07-02T16:00:00+00:00,false,�\u0004}�.,2024-07-02 13:50:19.597 -0300,2024-07-02 13:50:00.487 -0300,scheduled,2024-07-02 13:50:19.592 -0300,b71d3629e5a26934dd20b6e9a3335f84,110841,2024-07-02 13:50:00.361 -0300,2024-07-02 13:00:00.000 -0300,2024-07-02 13:50:00.000 -0300,1,2024-07-02 13:50:19.599 -0300 11 | -------------------------------------------------------------------------------- /macros/model_task_instance_databricks_workflow.sql: -------------------------------------------------------------------------------- 1 | {% macro model_task_instance_databricks_workflow() -%} 2 | {{ return(adapter.dispatch('model_task_instance_databricks_workflow')()) }} 3 | {%- endmacro %} 4 | 5 | 6 | {% macro default__model_task_instance_databricks_workflow() -%} 7 | with 8 | flatten_data as ( 9 | select 10 | job_runs.job_id 11 | , job_runs.inserteddate as inserted_date 12 | , exploded_tasks.* 13 | from 14 | {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs 15 | {{ flatten_data('tasks') }} as exploded_tasks 16 | ) 17 | , renamed as ( 18 | select 19 | {{ cast_as_string("flatten_data.task_key") }} as task_id 20 | , {{ cast_as_string("flatten_data.job_id") }} as dag_id 21 | , {{ cast_as_string("flatten_data.run_id") }} as run_id 22 | , {{cast_as_timestamp('flatten_data.start_time')}} as execution_date 23 | , {{cast_as_timestamp('flatten_data.start_time')}} as execution_start_date 24 | , {{cast_as_timestamp('flatten_data.end_time')}} as execution_end_date 25 | , (flatten_data.execution_duration / 1000) as duration 26 | , {{replace_dot_for_colon('state','result_state')}} as state_task_instance 27 | , attempt_number as try_number 28 | , {{replace_dot_for_colon('notebook_task','notebook_path')}} as hostname 29 | , 'not_implemented_for_databricks_workflow' as task_pool 30 | , 'not_implemented_for_databricks_workflow' as priority_weight 31 | , case 32 | when {{replace_dot_for_colon('notebook_task','notebook_path')}} is not null then 33 | {{replace_dot_for_colon('notebook_task','notebook_path')}} 34 | else flatten_data.task_key 35 | end as operator 36 | , 'not_implemented_for_databricks_workflow' as map_index 37 | from flatten_data 38 | ) 39 | select 40 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk 41 | , * 42 | from renamed 43 | {%- endmacro %} 44 | 45 | {% macro snowflake__model_task_instance_databricks_workflow() -%} 46 | with 47 | flatten_data as ( 48 | select * 49 | from 50 | {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs 51 | {{ flatten_data('"tasks"') }} as exploded_tasks 52 | ) 53 | , renamed as ( 54 | select 55 | {{ cast_as_string("value:task_key") }} as task_id 56 | , {{ cast_as_string("job_id") }} as dag_id 57 | , {{ cast_as_string("run_id") }} as run_id 58 | , {{cast_as_timestamp('start_time')}} as execution_date 59 | , {{cast_as_timestamp('start_time')}} as execution_start_date 60 | , {{cast_as_timestamp('end_time')}} as execution_end_date 61 | , (execution_duration / 1000) as duration 62 | , {{replace_dot_for_colon('state','result_state')}} as state_task_instance 63 | , {{replace_dot_for_colon('value','attempt_number')}} as try_number 64 | , {{replace_dot_for_colon('value','notebook_task.notebook_path')}} as hostname 65 | , 'not_implemented_for_databricks_workflow' as task_pool 66 | , 'not_implemented_for_databricks_workflow' as priority_weight 67 | , case 68 | when {{replace_dot_for_colon('value','notebook_task.notebook_path')}} is not null then 69 | {{replace_dot_for_colon('value','notebook_task.notebook_path')}} 70 | else {{replace_dot_for_colon('value','task_key')}} 71 | end as operator 72 | , 'not_implemented_for_databricks_workflow' as map_index 73 | from flatten_data 74 | ) 75 | select 76 | {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk 77 | , * 78 | from renamed 79 | {%- endmacro %} 80 | -------------------------------------------------------------------------------- /models/staging/adf_sources/source.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: raw_adf_monitoring 5 | description: " Raw data extracted from ADF for ADF monitoring analysis." 6 | database: "{{ var('dag_monitoring_adf_database', '')}}" 7 | schema: "{{ var('dag_monitoring_adf_schema', '') }}" 8 | tables: 9 | - name: adf_pipeline_runs 10 | description: "table that contains data from ADF pipeline runs." 11 | columns: 12 | - name: id 13 | description: "Table id." 14 | tests: 15 | - not_null 16 | - unique 17 | 18 | - name: pipelineName 19 | description: " Name of the pipeline of the dag run." 20 | tests: 21 | - not_null 22 | 23 | - name: runStart 24 | description: "Execution date" 25 | 26 | - name: status 27 | description: "DAG run state." 28 | 29 | - name: invokedBy 30 | description: "Points if the DAG run was triggered externally (True / False)." 31 | 32 | - name: runStart 33 | description: "Date and time when the DAG run started." 34 | 35 | - name: runEnd 36 | description: "Date and time when the DAG run ended." 37 | 38 | - name: durationInMs 39 | description: "DAG duration in Milliseconds" 40 | 41 | - name: adf_activity_runs 42 | description: "table that contains data from ADF activity runs." 43 | columns: 44 | - name: activityRunId 45 | description: "task id." 46 | tests: 47 | - not_null 48 | 49 | - name: pipelineName 50 | description: "Pipeline id to which this activity belongs." 51 | tests: 52 | - not_null 53 | 54 | - name: pipelineRunId 55 | description: "Pipeline execution id to which this activity belongs." 56 | tests: 57 | - not_null 58 | 59 | - name: activityRunStart 60 | description: " Date and time when the execution started." 61 | 62 | - name: activityRunEnd 63 | description: "Date and time when the execution ended." 64 | 65 | - name: durationInMs 66 | description: "Duration of the execution in Milliseconds." 67 | 68 | - name: map_index 69 | description: "Mapping index" 70 | 71 | - name: adf_pipelines 72 | description: "Table that contains information about ADF pipelines." 73 | columns: 74 | - name: id 75 | description: "table id." 76 | tests: 77 | - not_null 78 | - unique 79 | 80 | - name: is_paused 81 | description: "If the dag is paused." 82 | 83 | - name: is_active 84 | description: "If the DAG is active." 85 | 86 | - name: description 87 | description: "DAG description" 88 | 89 | - name: fileloc 90 | description: "File path that needs to be imported to load this DAG." 91 | 92 | - name: owners 93 | description: "DAG owner." 94 | 95 | - name: timetable_description 96 | description: "Description of the scheduling table" 97 | 98 | - name: ind_extraction_date 99 | description: "Date of extraction of the table" 100 | 101 | - name: adf_triggers 102 | description: "Table that contains information about ADF triggers." 103 | columns: 104 | - name: id 105 | description: "Identification of the table." 106 | tests: 107 | - not_null 108 | - unique 109 | 110 | - name: properties.runtimeState 111 | description: "If the trigger is active or not." 112 | 113 | - name: properties.annotations 114 | description: "Annotations in the trigger." 115 | 116 | - name: properties.pipelines 117 | description: "Pipelines that are executed by this trigger." 118 | 119 | - name: properties.typeProperties.recurrence.frequency 120 | description: "Frequency with which the pipeline is executed e.g Hour, Day, Week, Month" 121 | 122 | - name: properties.typeProperties.recurrence.interval 123 | description: "In how many 'frequency' this trigger is executed e.g 1 Day, 2 Week, being 1 and 2 the interval" 124 | 125 | - name: properties.typeProperties.recurrence.schedule 126 | description: "Scheduling defined by the table" 127 | 128 | - name: properties.typeProperties.recurrence.startTime 129 | description: "First execution" 130 | 131 | - name: properties.typeProperties.recurrence.timeZone 132 | description: "Time zone of the trigger" 133 | 134 | - name: dbt_utils_day 135 | description: "Table that contains data from the dates created from the dbt_utils macro." 136 | -------------------------------------------------------------------------------- /models/staging/airflow_sources/source.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: raw_airflow_monitoring 5 | description: "Raw data extracted from Airflow for Airflow monitoring analysis." 6 | database: "{{ var('dag_monitoring_airflow_database', '')}}" 7 | schema: "{{ var('dag_monitoring_airflow_schema', '') }}" 8 | tables: 9 | - name: dag_run 10 | description: "Table that contains data from Airflow DAG runs." 11 | columns: 12 | - name: id 13 | description: "Id of the table" 14 | tests: 15 | - not_null 16 | - unique 17 | 18 | - name: dag_id 19 | description: "Id of the dag run." 20 | tests: 21 | - not_null 22 | 23 | - name: execution_date 24 | description: "Date of execution." 25 | 26 | - name: state 27 | description: "state of the DAG run." 28 | 29 | - name: external_trigger 30 | description: "Points if the DAG run was triggered externally (True / False)." 31 | 32 | - name: start_date 33 | description: "Date and time when the DAG run started." 34 | 35 | - name: end_date 36 | description: "Date and time when the DAG run ended." 37 | 38 | - name: run_type 39 | description: "Type of DAG run." 40 | 41 | - name: task_instance 42 | description: "Table that contains data from Airflow task instances." 43 | columns: 44 | - name: task_id 45 | description: "Id of the executed task" 46 | tests: 47 | - not_null 48 | 49 | - name: dag_id 50 | description: "Identification of the dag." 51 | tests: 52 | - not_null 53 | 54 | - name: run_id 55 | description: "Identification of the run." 56 | 57 | - name: start_date 58 | description: " Date and time when the execution started." 59 | 60 | - name: end_date 61 | description: "Date and time when the execution ended." 62 | 63 | - name: duration 64 | description: "Duration of the execution in seconds." 65 | 66 | - name: state 67 | description: "The state of the task execution." 68 | 69 | - name: try_number 70 | description: "Number of execution attempts." 71 | 72 | - name: hostname 73 | description: "Task hostname." 74 | 75 | - name: pool 76 | description: "The airflow pool in which the task should be executed." 77 | 78 | - name: priority_weight 79 | description: "Priority of the task." 80 | 81 | - name: operator 82 | description: "Task model operator." 83 | 84 | - name: queue 85 | description: "Task queue." 86 | 87 | - name: pool_slots 88 | description: "Pool slots quantity." 89 | 90 | - name: map_index 91 | description: "Mapping index." 92 | 93 | - name: task_fail 94 | description: "Table that contains data from Airflow tasks with failures." 95 | columns: 96 | - name: id 97 | description: "Table id." 98 | tests: 99 | - not_null 100 | - unique 101 | 102 | - name: task_id 103 | description: "Task id." 104 | tests: 105 | - not_null 106 | 107 | - name: dag_id 108 | description: "Dag id." 109 | tests: 110 | - not_null 111 | 112 | - name: start_date 113 | description: "Date and time when the execution started." 114 | 115 | - name: end_date 116 | description: "Date and time when the execution ended." 117 | 118 | - name: duration 119 | description: "Duration of the execution in seconds." 120 | 121 | - name: map_index 122 | description: "Mapping index" 123 | 124 | - name: dag 125 | description: "Table that contains information about Airflow DAGs." 126 | columns: 127 | - name: dag_id 128 | description: "Dag id." 129 | tests: 130 | - not_null 131 | - unique 132 | 133 | - name: is_paused 134 | description: "If the dag is paused." 135 | 136 | - name: is_active 137 | description: "If the DAG is active." 138 | 139 | - name: description 140 | description: "DAG description." 141 | 142 | - name: fileloc 143 | description: "File path that needs to be imported to load this DAG." 144 | 145 | - name: owners 146 | description: "DAG owner." 147 | 148 | - name: timetable_description 149 | description: "Description of the scheduling table." 150 | 151 | - name: dbt_utils_day 152 | description: "Table that contains data from dates created from the dbt_utils macro." 153 | -------------------------------------------------------------------------------- /models/calendar/dim_dag_monitoring_dates.sql: -------------------------------------------------------------------------------- 1 | {% set end_date_query %} 2 | select {{ date_add("year", "100", "current_date()") }} 3 | {% endset %} 4 | 5 | {% if execute %} 6 | {%set end_date = run_query(end_date_query).columns[0].values()[0] %} 7 | {% else %} 8 | {% set end_date = ' ' %} 9 | {% endif %} 10 | 11 | /* generating dates using a dbt-utils macro */ 12 | with 13 | dates_raw as ( 14 | {{ dbt_utils.date_spine( 15 | datepart="day", 16 | start_date="cast('1970-01-01' as date)", 17 | end_date="cast('" ~ end_date ~ "' as date)" 18 | ) 19 | }} 20 | ) 21 | 22 | /* extracting some date information*/ 23 | , days_info as ( 24 | select 25 | cast(date_day as date) as date_day 26 | , extract(DAYOFWEEK from date_day) as week_day 27 | , extract(month from date_day) as month_number 28 | , extract(quarter from date_day) as quarter_number 29 | , {{ day_of_year("date_day") }} as day_of_year 30 | , extract(year from date_day) as year_date 31 | , {{ month_day('date_day') }} as month_day 32 | from dates_raw 33 | ) 34 | 35 | /**/ 36 | , days_named as ( 37 | select 38 | * 39 | , {{ day_of_week('week_day') }} 40 | , case 41 | when month_number = 1 then 'January' 42 | when month_number = 2 then 'February' 43 | when month_number = 3 then 'March' 44 | when month_number = 4 then 'April' 45 | when month_number = 5 then 'May' 46 | when month_number = 6 then 'June' 47 | when month_number = 7 then 'July' 48 | when month_number = 8 then 'August' 49 | when month_number = 9 then 'September' 50 | when month_number = 10 then 'October' 51 | when month_number = 11 then 'November' 52 | else 'December' 53 | end as month_name 54 | , case 55 | when month_number = 1 then 'Jan' 56 | when month_number = 2 then 'Feb' 57 | when month_number = 3 then 'Mar' 58 | when month_number = 4 then 'Apr' 59 | when month_number = 5 then 'May' 60 | when month_number = 6 then 'Jun' 61 | when month_number = 7 then 'Jul' 62 | when month_number = 8 then 'Aug' 63 | when month_number = 9 then 'Sep' 64 | when month_number = 10 then 'Oct' 65 | when month_number = 11 then 'Nov' 66 | else 'Dec' 67 | end as month_short 68 | , case 69 | when quarter_number = 1 then '1º quarter' 70 | when quarter_number = 2 then '2º quarter' 71 | when quarter_number = 3 then '3º quarter' 72 | else '4º quarter' 73 | end as quarter_name 74 | , case 75 | when quarter_number in(1,2) then 1 76 | else 2 77 | end as semester 78 | , case 79 | when quarter_number in(1,2) then '1º Semester' 80 | else '2º Semester' 81 | end as semester_name 82 | from days_info 83 | ) 84 | 85 | , flags_cte as ( 86 | /*flags related to holidays and business days*/ 87 | select 88 | * 89 | , case 90 | when month_day = '01-01' then true 91 | when month_day = '21-04' then true 92 | when month_day = '01-05' then true 93 | when month_day = '07-09' then true 94 | when month_day = '12-10' then true 95 | when month_day = '02-11' then true 96 | when month_day = '15-11' then true 97 | when month_day = '25-12' then true 98 | else false 99 | end as fl_holiday 100 | , case 101 | when week_day in(6, 0) then false 102 | when month_day = '01-01' then false 103 | when month_day = '21-04' then false 104 | when month_day = '01-05' then false 105 | when month_day = '07-09' then false 106 | when month_day = '12-10' then false 107 | when month_day = '02-11' then false 108 | when month_day = '15-11' then false 109 | when month_day = '25-12' then false 110 | else true 111 | end as fl_business_day 112 | , coalesce(week_day in(6, 0), false) as fl_weekends 113 | from days_named 114 | ) 115 | 116 | /* reorganizing the columns */ 117 | , final_cte as ( 118 | select 119 | date_day 120 | , week_day 121 | , name_of_day 122 | , month_number 123 | , month_name 124 | , month_short 125 | , quarter_number 126 | , quarter_name 127 | , semester 128 | , semester_name 129 | , fl_holiday 130 | , fl_business_day 131 | , fl_weekends 132 | , day_of_year 133 | , year_date 134 | from flags_cte 135 | ) 136 | 137 | select * 138 | from final_cte -------------------------------------------------------------------------------- /integration_tests/macros/adf_pipeline_runs.sql: -------------------------------------------------------------------------------- 1 | {% macro adf_pipeline_runs() -%} 2 | {{ return(adapter.dispatch('adf_pipeline_runs')()) }} 3 | {%- endmacro %} 4 | 5 | {%- macro default__adf_pipeline_runs() -%} 6 | {% set create_table %} 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs( 8 | id STRING, 9 | runId STRING, 10 | debugRunId STRING, 11 | runGroupId STRING, 12 | pipelineName STRING, 13 | parameters STRUCT< 14 | ENVIRONMENT STRING, 15 | RESET_TYPE STRING, 16 | DAYS_BEFORE STRING 17 | >, 18 | invokedBy STRUCT< 19 | id STRING, 20 | name STRING, 21 | invokedByType STRING, 22 | pipelineName STRING, 23 | pipelineRunId STRING 24 | >, 25 | runStart TIMESTAMP, 26 | runEnd TIMESTAMP, 27 | durationInMs BIGINT, 28 | status STRING, 29 | message STRING, 30 | pipelineReturnValue MAP, 31 | lastUpdated TIMESTAMP, 32 | annotations ARRAY, 33 | runDimension MAP, 34 | isLatest BOOLEAN 35 | ); 36 | {% endset %} 37 | 38 | {% set insert_table %} 39 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs 40 | VALUES 41 | ( 42 | '/SUBSCRIPTIONS/9F075ORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5', 43 | '9f81a5eb-db6082b73db5', 44 | NULL, 45 | '9f81a5eb-a7c82b73db5', 46 | 'PL-FEMFILESCSLandingZone-N', 47 | NAMED_STRUCT( 48 | 'ENVIRONMENT', 'FILESCSV', 49 | 'RESET_TYPE', 'FULL', 50 | 'DAYS_BEFORE', NULL 51 | ), 52 | NAMED_STRUCT( 53 | 'id', 'cf613b7b-e0c4cfbe8', 54 | 'name', 'cf613b7b-e0ce924dc0cfbe8', 55 | 'invokedByType', 'PipelineActivity', 56 | 'pipelineName', 'PL-OrquestraZeroLoadingControl', 57 | 'pipelineRunId', '598ec8ee-6e049c4b8c558' 58 | ), 59 | '2024-08-20T03:30:06.6061079Z', 60 | '2024-08-20T03:47:03.9865228Z', 61 | 1017380, 62 | 'Succeeded', 63 | NULL, 64 | MAP(), 65 | '2024-08-20T03:47:03.9879388Z', 66 | ARRAY(), 67 | MAP(), 68 | true 69 | ), 70 | ( 71 | '/SUBSCRIPTIONS/9//pipelinerua1-9313-73fa5c0a3f0e', 72 | '64c7a8c7--73fa5c0a3f0e', 73 | NULL, 74 | '64c7a8c7-30e23fa5c0a3f0e', 75 | 'NET_REC_DAILY_PRICE_UPDATE', 76 | NAMED_STRUCT( 77 | 'ENVIRONMENT', NULL, 78 | 'RESET_TYPE', NULL, 79 | 'DAYS_BEFORE', '1' 80 | ), 81 | NAMED_STRUCT( 82 | 'id', '0858477451681969CU22', 83 | 'name', 'NET_REC_DAILY_UPDATE', 84 | 'invokedByType', 'ScheduleTrigger', 85 | 'pipelineName', NULL, 86 | 'pipelineRunId', NULL 87 | ), 88 | '2024-08-20T12:00:31.2728264Z', 89 | '2024-08-20T13:15:52.6545498Z', 90 | 4521381, 91 | 'Succeeded', 92 | NULL, 93 | MAP(), 94 | '2024-08-20T13:15:52.6550273Z', 95 | ARRAY( 96 | 'ted' 97 | ), 98 | MAP(), 99 | true 100 | ); 101 | {% endset %} 102 | 103 | {% do run_query(create_table) %} 104 | {% do log("finished creating table adf_pipeline_runs", info=true) %} 105 | 106 | {% do run_query(insert_table) %} 107 | {% do log("finished insert table adf_pipeline_runs", info=true) %} 108 | {%- endmacro -%} 109 | 110 | 111 | {%- macro bigquery__adf_pipeline_runs() -%} 112 | {% set create_table %} 113 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs( 114 | id STRING, 115 | runId STRING, 116 | debugRunId STRING, 117 | runGroupId STRING, 118 | pipelineName STRING, 119 | parameters STRUCT< 120 | ENVIRONMENT STRING, 121 | RESET_TYPE STRING, 122 | DAYS_BEFORE STRING 123 | >, 124 | invokedBy STRUCT< 125 | id STRING, 126 | name STRING, 127 | invokedByType STRING, 128 | pipelineName STRING, 129 | pipelineRunId STRING 130 | >, 131 | runStart TIMESTAMP, 132 | runEnd TIMESTAMP, 133 | durationInMs BIGINT, 134 | status STRING, 135 | message STRING, 136 | pipelineReturnValue ARRAY>, 137 | lastUpdated TIMESTAMP, 138 | annotations ARRAY, 139 | runDimension ARRAY>, 140 | isLatest BOOLEAN 141 | ); 142 | 143 | {% endset %} 144 | 145 | {% set insert_table %} 146 | INSERT INTO `{{ target.database }}.{{ target.schema }}.adf_pipeline_runs` 147 | VALUES 148 | ( 149 | '/SUBSCRIPTIONS/9FFACTORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5', 150 | '9f81a5eb-db6082b73db5', 151 | NULL, 152 | '9f81a5eb-a73e-db6082b73db5', 153 | 'PL-FEMFILESCSingZone-N', 154 | STRUCT( 155 | 'FILESCSV' AS ENVIRONMENT, 156 | 'FULL' AS RESET_TYPE, 157 | NULL AS DAYS_BEFORE 158 | ), 159 | STRUCT( 160 | 'cf613b7b-e04dc0cfbe8' AS id, 161 | 'cf613b7b-e0c4924dc0cfbe8' AS name, 162 | 'PipelineActivity' AS invokedByType, 163 | 'PL-OrquestradorSooLoadingControl' AS pipelineName, 164 | '598ec8ee-604c-47c7-a3c0-e049c4b8c558' AS pipelineRunId 165 | ), 166 | TIMESTAMP('2024-08-20T03:30:06.606107Z'), 167 | TIMESTAMP('2024-08-20T03:47:03.986522Z'), 168 | 1017380, 169 | 'Succeeded', 170 | NULL, 171 | ARRAY>[], 172 | TIMESTAMP('2024-08-20T03:47:03.987938Z'), 173 | ARRAY[], 174 | ARRAY>[], 175 | TRUE 176 | ), 177 | ( 178 | '/SUBSCRIPTIONS/9/PROVIDERS/MO-ADF-D/pipelinerua1-9313-73fa5c0a3f0e', 179 | '64c7a8c7-30313-73fa5c0a3f0e', 180 | NULL, 181 | '64c7a8c7-30313-73fa5c0a3f0e', 182 | 'NET_REC_DAILY_PRICE_UPDATE', 183 | STRUCT( 184 | NULL AS ENVIRONMENT, 185 | NULL AS RESET_TYPE, 186 | '1' AS DAYS_BEFORE 187 | ), 188 | STRUCT( 189 | '08584774516819036014561066769CU22' AS id, 190 | 'NET_REC_DAILY_UPDATE' AS name, 191 | 'ScheduleTrigger' AS invokedByType, 192 | NULL AS pipelineName, 193 | NULL AS pipelineRunId 194 | ), 195 | TIMESTAMP('2024-08-20T12:00:31.272826Z'), 196 | TIMESTAMP('2024-08-20T13:15:52.654549Z'), 197 | 4521381, 198 | 'Succeeded', 199 | NULL, 200 | ARRAY>[], 201 | TIMESTAMP('2024-08-20T13:15:52.655027Z'), 202 | ARRAY['ted'], 203 | ARRAY>[], 204 | TRUE 205 | ); 206 | 207 | 208 | {% endset %} 209 | 210 | {% do run_query(create_table) %} 211 | {% do log("finished creating table adf_pipeline_runs", info=true) %} 212 | 213 | {% do run_query(insert_table) %} 214 | {% do log("finished insert table adf_pipeline_runs", info=true) %} 215 | {%- endmacro -%} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dag Monitoring 2 | This package allows you to easily monitor your DAGs from well known orchestration tools, providing helpful info to improve your data pipeline. 3 | 4 | # Table of Contents 5 | 6 | - [Before creating a branch](#Before-creating-a-branch) 7 | - [Revisions](#revisions) 8 | - [Tools supported](#tools-supported) 9 | - [Quickstart](#:running:-Quickstart) 10 | - [requirements](#requirements) 11 | - [Profiles](#profiles) 12 | - [Installation](#installation) 13 | - [Configuring models package](#Configuring-models-package) 14 | - [Airflow metadata](#Airflow-metadata) 15 | - [ADF metadata](#ADF-metadata) 16 | - [Databricks Workflow Data](#Databricks-Workflow-Data) 17 | - [Integration tests](#Integration-tests) 18 | 19 | # Before creating a branch 20 | 21 | Pay attention, it is very important to know if your modification to this repository is a release/major (breaking changes), a feature/minor (functionalities) or a patch(to fix bugs). With that information, create your branch name like this: 22 | 23 | - `release/` or `major/` or `Release/` or `Major/` 24 | - `feature/` or `minor/` with capitalised letters work as well 25 | - `patch/` or `fix/` or `hotfix/` with capitalised letters work as well 26 | 27 | # Revisions 28 | 0.3.0 - For Snowflake warehouses 29 | 0.3.1 - For Redshift warehouses 30 | 31 | ## Tools supported: 32 | 33 | - Azure Datafactory 34 | - Apache Airflow 35 | - Databricks Workflows 36 | 37 | If you are cloning this repository, we recommend that the clone happens via SSH key. 38 | 39 | # :running: Quickstart 40 | 41 | New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). 42 | 43 | ## Requirements 44 | dbt version 45 | * ```dbt version >= 1.3.0``` 46 | 47 | dbt_utils package. Read more about them [here](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/). 48 | * ```dbt-labs/dbt_utils version: 1.1.1``` 49 | 50 | This package works for most of EL processes and depends on the metadata generated by the respective platform. 51 | 52 | ## Profiles 53 | Using as example a profile for Databricks workflows, when testing the repository, it is necessary to fill the profiles information below by changing the `example.env` to `.env`, and filling its variables with the adequate values. 54 | 55 | ``` 56 | dbt_dag_monitoring: 57 | target: "{{ env_var('DBT_DEFAULT_TARGET', 'dev')}}" 58 | outputs: 59 | dev: 60 | type: databricks 61 | catalog: "{{ env_var('DEV_CATALOG_NAME')}}" 62 | schema: "{{ env_var('DEV_SCHEMA_NAME')}}" 63 | host: "{{ env_var('DEV_HOST') }}" 64 | http_path: "{{ env_var('DEV_HTTP_PATH') }}" 65 | token: "{{ env_var('DEV_TOKEN') }}" 66 | threads: 16 67 | ansi_mode: false 68 | ``` 69 | 70 | When it is done, there are two necessary commands for working locally without difficulties: 71 | 72 | `chmod +x setup.sh` 73 | 74 | and 75 | 76 | `source setup.sh` 77 | 78 | ## Installation 79 | 80 | 1. Include this package in your `packages.yml` file. 81 | ```yaml 82 | packages: 83 | - git: "https://github.com/techindicium/dbt-dag-monitoring.git" 84 | revision: # 0.3.0 or 0.3.1 85 | ``` 86 | 87 | 2. Run `dbt deps` to install the package. 88 | 89 | ## Configuring models package 90 | 91 | ### Models: 92 | The functioning of the package on the desired platform depends on the configuration of dbt_project.yml. To define which platform we are transforming the data to, the enabled field must be "true", for the desired platform, and "false" for all others. 93 | 94 | ### Vars: 95 | Then, we define the variables: in the first line we determine which platform dbt should consider the variables for. In the third line we define which data the monitoring will be based on, and in the following lines we define which database and data schema will be used, according to the platform defined above. 96 | ``` 97 | models: 98 | dbt_dag_monitoring: 99 | marts: 100 | +materialized: table 101 | staging: 102 | +materialized: view 103 | airflow_sources: 104 | +enabled: true 105 | adf_sources: 106 | +enabled: false 107 | databricks_workflow_sources: 108 | +enabled: false 109 | 110 | sources: 111 | dbt_dag_monitoring: 112 | staging: 113 | adf_sources: 114 | raw_adf_monitoring: 115 | +enabled: false 116 | databricks_workflow_sources: 117 | raw_databricks_workflow_monitoring: 118 | +enabled: false 119 | airflow_sources: 120 | raw_airflow_monitoring: 121 | +enabled: true 122 | ``` 123 | ... 124 | 125 | When the vars are added to the dbt_project, it suppresses dbt compilation errors. 126 | ``` 127 | vars: 128 | dbt_dag_monitoring: 129 | enabled_sources: ['airflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow' 130 | dag_monitoring_start_date: cast('2023-01-01' as date) 131 | dag_monitoring_airflow_database: #landing_zone 132 | dag_monitoring_airflow_schema: #airflow_metadata 133 | dag_monitoring_databricks_database: #raw_catalog 134 | dag_monitoring_databricks_schema: #databricks_metadata 135 | dag_monitoring_adf_database: #raw 136 | dag_monitoring_adf_schema: #adf_metadata 137 | ``` 138 | 139 | ## Airflow metadata 140 | 141 | The airflow sources are based on the Airflow metadata database, any form of extraction from it should suffice. 142 | 143 | The package is consistent with any type of EL process, and the data warehouse must have the following tables: 144 | - dag_run 145 | - task_instance 146 | - task_fail 147 | - dag 148 | 149 | ## ADF Metadata 150 | 151 | The adf models rely on sources extracted by our adf tap: 152 | 153 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/6b9c9e970518db1e21086ec75a7442d1b6978c93/plugins/custom/tap-azuredatafactory/?at=featuer%2Fadd_adf_extractor 154 | 155 | ## Databricks Workflow Data 156 | The databricks workflow models rely on sources extracted by our adf tap: 157 | 158 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/main/plugins/custom/tap-databricksops/ 159 | 160 | specifically the streams: 161 | 162 | - jobs 163 | - job_runs 164 | 165 | ## Integration tests 166 | 167 | > [!IMPORTANT] 168 | > When using the integration tests folder, for the sake of the continuous integration code run seamlessly, you can NOT change in your pull request the default value of the vars, models and sources being Databricks inside the integration_tests/dbt_project.yml. Following the source pattern is important. 169 | 170 | More information on the README.md in integration_tests folder. 171 | 172 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - opened 7 | - synchronize 8 | branches: 9 | - main 10 | 11 | env: 12 | DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests 13 | DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests 14 | DBT_DEFAULT_TARGET: databricks 15 | DEV_CATALOG_NAME: cdi_dev 16 | DEV_SCHEMA_NAME: ci_dbt_dag_monitoring 17 | DEV_HOST: ${{ secrets.DATABRICKS_HOST }} 18 | DEV_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} 19 | DEV_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }} 20 | 21 | BIGQUERY_DATASET: ci_dbt_dag_monitoring 22 | BIGQUERY_PROJECT: indicium-sandbox 23 | DBT_JOB_TIMEOUT: 300 24 | DBT_THREADS: 16 25 | DBT_JOB_RETRIES: 1 26 | 27 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT}} 28 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_TEST_USER }} 29 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD}} 30 | SNOWFLAKE_ROLE: INTERNAL_PRODUCTS_CICD 31 | SNOWFLAKE_DATABASE: SANDBOX 32 | SNOWFLAKE_WAREHOUSE: SANDBOX_WAREHOUSE 33 | SNOWFLAKE_SCHEMA: ci_dbt_dag_monitoring 34 | 35 | 36 | jobs: 37 | dbt-checks: 38 | runs-on: ubuntu-latest 39 | 40 | steps: 41 | - name: Checkout repository 42 | uses: actions/checkout@v2 43 | 44 | - name: Set up Python 45 | uses: actions/setup-python@v2 46 | with: 47 | python-version: '3.8' 48 | 49 | - name: Install dependencies 50 | run: | 51 | python -m pip install --upgrade pip 52 | pip install -r requirements.txt 53 | 54 | - name: Authenticate to GCP 55 | uses: "google-github-actions/auth@v2" 56 | with: 57 | credentials_json: "${{ secrets.BIGQUERY_AUTH }}" 58 | 59 | - name: Run dbt debug for Databricks 60 | run: dbt debug 61 | 62 | - name: Run dbt debug for BigQuery 63 | run: dbt debug --target bigquery 64 | 65 | - name: Run dbt debug for Snowflake 66 | run: dbt debug --target snowflake 67 | 68 | - name: dbt deps 69 | run: dbt deps 70 | 71 | - name: dbt compile 72 | run: dbt compile 73 | 74 | integration-test: 75 | runs-on: ubuntu-latest 76 | steps: 77 | - name: Checkout repository 78 | uses: actions/checkout@v2 79 | 80 | - name: Set up Python 81 | uses: actions/setup-python@v2 82 | with: 83 | python-version: '3.8' 84 | 85 | - name: Install dependencies 86 | run: | 87 | python -m pip install --upgrade pip 88 | pip install -r requirements.txt 89 | 90 | - name: enter integration tests 91 | run: | 92 | cd integration_tests/ 93 | 94 | - name: Authenticate to GCP 95 | uses: "google-github-actions/auth@v2" 96 | with: 97 | credentials_json: "${{ secrets.BIGQUERY_AUTH }}" 98 | 99 | - name: Run dbt integration tests Databricks source in Databricks connection 100 | run: | 101 | dbt deps --target databricks 102 | 103 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target databricks 104 | 105 | dbt seed --target databricks 106 | 107 | dbt run-operation jobs --target databricks 108 | dbt run-operation job_runs --target databricks 109 | 110 | dbt test -s source:* --target databricks 111 | 112 | dbt build --target databricks 113 | 114 | - name: switch enabled sources for adf source 115 | run: 116 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh 117 | 118 | - name: Run dbt tasks for ADF source in Databricks connection 119 | run: | 120 | dbt deps 121 | 122 | dbt seed --target databricks 123 | 124 | dbt run-operation adf_pipeline_runs --target databricks 125 | dbt run-operation adf_triggers --target databricks 126 | 127 | dbt test -s source:* --target databricks 128 | 129 | dbt build --target databricks 130 | 131 | - name: switch enabled sources for airflow source 132 | run: | 133 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh 134 | 135 | - name: Run dbt tasks for Airflow source in Databricks connection 136 | run: | 137 | dbt deps 138 | 139 | dbt seed --target databricks 140 | 141 | dbt test -s source:* --target databricks 142 | 143 | dbt build --target databricks 144 | 145 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' 146 | 147 | - name: change databricks database to bigquery database 148 | run: | 149 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh 150 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks cdi_dev indicium-sandbox 151 | 152 | - name: Run dbt integration tests Databricks source in BigQuery connection 153 | run: | 154 | dbt deps --target bigquery 155 | 156 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery 157 | 158 | dbt run-operation jobs --target bigquery 159 | dbt run-operation job_runs --target bigquery 160 | 161 | dbt test -s source:* --target bigquery 162 | 163 | dbt build --exclude-resource-type seed --target bigquery 164 | 165 | - name: switch enabled sources for adf source 166 | run: | 167 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh adf cdi_dev indicium-sandbox 168 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh 169 | 170 | - name: Run dbt integration tests ADF source in BigQuery connection 171 | run: | 172 | dbt deps 173 | 174 | dbt seed -s adf_pipelines --target bigquery 175 | 176 | dbt run-operation adf_activity_runs --target bigquery 177 | dbt run-operation adf_pipeline_runs --target bigquery 178 | dbt run-operation adf_triggers --target bigquery 179 | 180 | dbt test -s source:* --target bigquery 181 | 182 | dbt build --exclude-resource-type seed --target bigquery 183 | 184 | - name: switch enabled sources for airflow source 185 | run: | 186 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh 187 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow cdi_dev indicium-sandbox 188 | 189 | - name: Run dbt tasks for Airflow source in BigQuery connection 190 | run: | 191 | dbt deps 192 | 193 | dbt run-operation seed__dag_run --target bigquery 194 | dbt run-operation seed__dag --target bigquery 195 | dbt run-operation seed__task_fail --target bigquery 196 | dbt run-operation seed__task_instance --target bigquery 197 | 198 | dbt test -s source:* --target bigquery 199 | 200 | dbt build --exclude-resource-type seed --target bigquery 201 | 202 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery 203 | 204 | - name: change BigQuery database to Snowflake database 205 | run: | 206 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks indicium-sandbox sandbox 207 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh 208 | 209 | - name: Run dbt integration tests Databricks source in Snowflake connection 210 | run: | 211 | dbt deps 212 | 213 | dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake 214 | 215 | dbt run-operation jobs --target snowflake 216 | dbt run-operation job_runs --target snowflake 217 | 218 | dbt test -s source:* --target snowflake 219 | 220 | dbt build --exclude-resource-type seed --target snowflake 221 | 222 | - name: switch enabled sources for airflow source 223 | run: | 224 | . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow indicium-sandbox sandbox 225 | . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh 226 | 227 | - name: Run dbt integration tests Airflow source in Snowflake connection 228 | env: 229 | DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests 230 | DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests 231 | run: | 232 | dbt deps 233 | 234 | dbt seed -s seeds/airflow/* --target snowflake 235 | 236 | dbt test -s source:* --target snowflake 237 | 238 | dbt build --exclude-resource-type seed --target snowflake 239 | 240 | dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake 241 | 242 | -------------------------------------------------------------------------------- /integration_tests/macros/adf_triggers.sql: -------------------------------------------------------------------------------- 1 | {% macro adf_triggers() -%} 2 | {{ return(adapter.dispatch('adf_triggers')()) }} 3 | {%- endmacro %} 4 | 5 | {%- macro default__adf_triggers() -%} 6 | {% set create_table %} 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers ( 8 | id STRING, 9 | name STRING, 10 | type STRING, 11 | properties STRUCT< 12 | annotations ARRAY, 13 | pipelines ARRAY< 14 | STRUCT< 15 | pipelineReference STRUCT< 16 | referenceName STRING, 17 | type STRING 18 | >, 19 | parameters STRUCT< 20 | days_before STRING, 21 | environment STRING, 22 | reset_type STRING, 23 | Job_ID STRING, 24 | DatabricksWorkspaceID STRING, 25 | WaitRecheckSeconds INT 26 | > 27 | > 28 | >, 29 | type STRING, 30 | typeProperties STRUCT< 31 | recurrence STRUCT< 32 | frequency STRING, 33 | interval INT, 34 | startTime STRING, 35 | timeZone STRING, 36 | schedule STRUCT< 37 | minutes ARRAY, 38 | hours ARRAY, 39 | weekDays ARRAY, 40 | monthDays ARRAY 41 | > 42 | >, 43 | parentTrigger STRING, 44 | requestedStartTime STRING, 45 | requestedEndTime STRING, 46 | rerunConcurrency INT 47 | >, 48 | runtimeState STRING 49 | > 50 | ); 51 | {% endset %} 52 | 53 | {% set insert_table %} 54 | insert into `{{ target.database }}`.{{ target.schema }}.adf_triggers VALUES 55 | ( 56 | '/subscriptions/9f07555crvices-atastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly', 57 | 'TR-fd-prod-dion-monthly', 58 | 'Microsoft.Dataes/triggers', 59 | NAMED_STRUCT( 60 | 'annotations', ARRAY('fraud-detection', 'prod', 'duration-estimation'), 61 | 'pipelines', ARRAY( 62 | NAMED_STRUCT( 63 | 'pipelineReference', NAMED_STRUCT( 64 | 'referenceName', 'fd-prod-duration_estimation', 65 | 'type', 'PipelineReference' 66 | ), 67 | 'parameters', NAMED_STRUCT( 68 | 'days_before', NULL, 69 | 'environment', NULL, 70 | 'reset_type', NULL, 71 | 'Job_ID', NULL, 72 | 'DatabricksWorkspaceID', NULL, 73 | 'WaitRecheckSeconds', NULL 74 | ) 75 | ) 76 | ), 77 | 'type', 'ScheduleTrigger', 78 | 'typeProperties', NAMED_STRUCT( 79 | 'recurrence', NAMED_STRUCT( 80 | 'frequency', 'Month', 81 | 'interval', 1, 82 | 'startTime', '2020-10-14T04:30:00', 83 | 'timeZone', 'E. South America Standard Time', 84 | 'schedule', NAMED_STRUCT( 85 | 'minutes', ARRAY(30), 86 | 'hours', ARRAY(4), 87 | 'weekDays', NULL, 88 | 'monthDays', ARRAY(14) 89 | ) 90 | ), 91 | 'parentTrigger', NULL, 92 | 'requestedStartTime', NULL, 93 | 'requestedEndTime', NULL, 94 | 'rerunConcurrency', NULL 95 | ), 96 | 'runtimeState', NULL 97 | ) 98 | ), 99 | ( 100 | '/subscriptions/TR-fd-dev-predict-main', 101 | 'TR-fd-dev-predict-main', 102 | 'Microsoft.Dats/triggers', 103 | NAMED_STRUCT( 104 | 'annotations', ARRAY('fraud-detection', 'dev', 'predict'), 105 | 'pipelines', ARRAY( 106 | NAMED_STRUCT( 107 | 'pipelineReference', NAMED_STRUCT( 108 | 'referenceName', 'fd-dev-predict-main', 109 | 'type', 'PipelineReference' 110 | ), 111 | 'parameters', NAMED_STRUCT( 112 | 'days_before', NULL, 113 | 'environment', NULL, 114 | 'reset_type', NULL, 115 | 'Job_ID', NULL, 116 | 'DatabricksWorkspaceID', NULL, 117 | 'WaitRecheckSeconds', NULL 118 | ) 119 | ) 120 | ), 121 | 'type', 'ScheduleTrigger', 122 | 'typeProperties', NAMED_STRUCT( 123 | 'recurrence', NAMED_STRUCT( 124 | 'frequency', 'Week', 125 | 'interval', 1, 126 | 'startTime', '2021-01-26T21:50:00', 127 | 'timeZone', 'E. South America Standard Time', 128 | 'schedule', NAMED_STRUCT( 129 | 'minutes', ARRAY(0), 130 | 'hours', ARRAY(5), 131 | 'weekDays', ARRAY('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'), 132 | 'monthDays', NULL 133 | ) 134 | ), 135 | 'parentTrigger', NULL, 136 | 'requestedStartTime', NULL, 137 | 'requestedEndTime', NULL, 138 | 'rerunConcurrency', NULL 139 | ), 140 | 'runtimeState', 'Stopped' 141 | ) 142 | ); 143 | {% endset %} 144 | 145 | {% do run_query(create_table) %} 146 | {% do log("finished creating table triggers", info=true) %} 147 | 148 | {% do run_query(insert_table) %} 149 | {% do log("finished insert table triggers", info=true) %} 150 | {%- endmacro -%} 151 | 152 | 153 | {%- macro bigquery__adf_triggers() -%} 154 | {% set create_table %} 155 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers ( 156 | id STRING, 157 | name STRING, 158 | type STRING, 159 | properties STRUCT< 160 | annotations ARRAY, 161 | pipelines ARRAY< 162 | STRUCT< 163 | pipelineReference STRUCT< 164 | referenceName STRING, 165 | type STRING 166 | >, 167 | parameters STRUCT< 168 | days_before STRING, 169 | environment STRING, 170 | reset_type STRING, 171 | Job_ID STRING, 172 | DatabricksWorkspaceID STRING, 173 | WaitRecheckSeconds INT 174 | > 175 | > 176 | >, 177 | type STRING, 178 | typeProperties STRUCT< 179 | recurrence STRUCT< 180 | frequency STRING, 181 | `interval` INT, 182 | startTime STRING, 183 | timeZone STRING, 184 | schedule STRUCT< 185 | minutes ARRAY, 186 | hours ARRAY, 187 | weekDays ARRAY, 188 | monthDays ARRAY 189 | > 190 | >, 191 | parentTrigger STRING, 192 | requestedStartTime STRING, 193 | requestedEndTime STRING, 194 | rerunConcurrency INT 195 | >, 196 | runtimeState STRING 197 | > 198 | ); 199 | 200 | {% endset %} 201 | 202 | {% set insert_table %} 203 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_triggers 204 | VALUES 205 | ( 206 | '/subscriptions/9f0755tories/tlo-datastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly', 207 | 'TR-fd-prod-ation-monthly', 208 | 'Microsoft.Dattories/triggers', 209 | STRUCT( 210 | ARRAY['fraud-detection', 'prod', 'duration-estimation'], 211 | ARRAY, 216 | parameters STRUCT< 217 | days_before STRING, 218 | environment STRING, 219 | reset_type STRING, 220 | Job_ID STRING, 221 | DatabricksWorkspaceID STRING, 222 | WaitRecheckSeconds INT64 223 | > 224 | >>[ 225 | STRUCT( 226 | STRUCT( 227 | 'fd-prod-duration_estimation', 228 | 'PipelineReference' 229 | ), 230 | STRUCT( 231 | NULL, 232 | NULL, 233 | NULL, 234 | NULL, 235 | NULL, 236 | NULL 237 | ) 238 | ) 239 | ], 240 | 'ScheduleTrigger', 241 | STRUCT( 242 | STRUCT( 243 | 'Month', 244 | 1, 245 | '2020-10-14T04:30:00', 246 | 'E. South America Standard Time', 247 | STRUCT( 248 | ARRAY[30], 249 | ARRAY[4], 250 | NULL, 251 | ARRAY[14] 252 | ) 253 | ), 254 | NULL, 255 | NULL, 256 | NULL, 257 | NULL 258 | ), 259 | NULL 260 | ) 261 | ), 262 | ( 263 | '/subscriptions/y/factories/tlo-datastudio-adf-d/triggers/TR-fd-dev-predict-main', 264 | 'TR-fd-dev-predict-main', 265 | 'Microsoft.DataFactory/factories/triggers', 266 | STRUCT( 267 | ARRAY['fraud-detection', 'dev', 'predict'], 268 | ARRAY, 273 | parameters STRUCT< 274 | days_before STRING, 275 | environment STRING, 276 | reset_type STRING, 277 | Job_ID STRING, 278 | DatabricksWorkspaceID STRING, 279 | WaitRecheckSeconds INT64 280 | > 281 | >>[ 282 | STRUCT( 283 | STRUCT( 284 | 'fd-dev-predict-main', 285 | 'PipelineReference' 286 | ), 287 | STRUCT( 288 | NULL, 289 | NULL, 290 | NULL, 291 | NULL, 292 | NULL, 293 | NULL 294 | ) 295 | ) 296 | ], 297 | 'ScheduleTrigger', 298 | STRUCT( 299 | STRUCT( 300 | 'Week', 301 | 1, 302 | '2021-01-26T21:50:00', 303 | 'E. South America Standard Time', 304 | STRUCT( 305 | ARRAY[0], 306 | ARRAY[5], 307 | ARRAY['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], 308 | NULL 309 | ) 310 | ), 311 | NULL, 312 | NULL, 313 | NULL, 314 | NULL 315 | ), 316 | 'Stopped' 317 | ) 318 | ); 319 | 320 | {% endset %} 321 | 322 | {% do run_query(create_table) %} 323 | {% do log("finished creating table triggers", info=true) %} 324 | 325 | {% do run_query(insert_table) %} 326 | {% do log("finished insert table triggers", info=true) %} 327 | {%- endmacro -%} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /integration_tests/macros/jobs.sql: -------------------------------------------------------------------------------- 1 | {% macro jobs() -%} 2 | {{ return(adapter.dispatch('jobs')()) }} 3 | {%- endmacro %} 4 | 5 | {% macro databricks__jobs() %} 6 | {% set create_table %} 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs ( 8 | created_time BIGINT, 9 | creator_user_name STRING, 10 | job_id BIGINT, 11 | settings STRUCT< 12 | email_notifications STRUCT< 13 | on_failure ARRAY, 14 | no_alert_for_skipped_runs BOOLEAN 15 | >, 16 | format STRING, 17 | max_concurrent_runs BIGINT, 18 | name STRING, 19 | schedule STRUCT< 20 | pause_status STRING, 21 | quartz_cron_expression STRING, 22 | timezone_id STRING 23 | >, 24 | tags STRUCT< 25 | dev STRING, 26 | env STRING 27 | >, 28 | timeout_seconds bigint, 29 | trigger STRUCT< 30 | file_arrival STRUCT< 31 | url STRING 32 | >, 33 | pause_status STRING 34 | > 35 | >, 36 | insertedDate TIMESTAMP 37 | ); 38 | {% endset %} 39 | 40 | {% set insert_table %} 41 | 42 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES 43 | ( 44 | CAST(1722606667504 AS BIGINT), 45 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 46 | CAST(466340877826952 AS BIGINT), 47 | NAMED_STRUCT( 48 | 'email_notifications', NAMED_STRUCT( 49 | 'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'), 50 | 'no_alert_for_skipped_runs', NULL 51 | ), 52 | 'format', 'MULTI_TASK', 53 | 'max_concurrent_runs', 1, 54 | 'name', '[prod] core_dag_monitoring_data_transformation_dbt_job', 55 | 'schedule', NAMED_STRUCT( 56 | 'pause_status', NULL, 57 | 'quartz_cron_expression', NULL, 58 | 'timezone_id', NULL 59 | ), 60 | 'tags',NAMED_STRUCT( 61 | 'dev', NULL, 62 | 'env','prod' 63 | ), 64 | 'timeout_seconds', 0, 65 | 'trigger',NAMED_STRUCT( 66 | 'file_arrival',NAMED_STRUCT( 67 | 'url',NULL 68 | ), 69 | 'paused_status',NULL 70 | ) 71 | ), 72 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 73 | ), 74 | ( 75 | CAST(1722544845800 AS BIGINT), 76 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 77 | CAST(823250232903490 AS BIGINT), 78 | NAMED_STRUCT( 79 | 'email_notifications',NAMED_STRUCT( 80 | 'on_failure', ARRAY('produtos_horizontais@indicium1.opsgenie.net'), 81 | 'no_alert_for_skipped_runs', NULL 82 | ), 83 | 'format','MULTI_TASK', 84 | 'max_concurrent_runs',1, 85 | 'name','[prod] core_dag_monitoring_extraction_meltano_job', 86 | 'schedule',NAMED_STRUCT( 87 | 'pause_status','UNPAUSED', 88 | 'quartz_cron_expression','0 0 0/3 * * ? *', 89 | 'timezone_id','UTC' 90 | ), 91 | 'tags',NAMED_STRUCT( 92 | 'dev', NULL, 93 | 'env','prod' 94 | ), 95 | 'timeout_seconds',0, 96 | 'trigger',NAMED_STRUCT( 97 | 'file_arrival',NAMED_STRUCT( 98 | 'url',NULL 99 | ), 100 | 'paused_status',NULL 101 | ) 102 | ), 103 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 104 | ), 105 | ( 106 | CAST(1722538441265 AS BIGINT), 107 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 108 | CAST(790689006770532 AS BIGINT), 109 | NAMED_STRUCT( 110 | 'email_notifications',NAMED_STRUCT( 111 | 'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'), 112 | 'no_alert_for_skipped_runs',NULL 113 | ), 114 | 'format','MULTI_TASK', 115 | 'max_concurrent_runs',1, 116 | 'name','[prod] investment_postgres_extraction_spark_job', 117 | 'schedule',NAMED_STRUCT( 118 | 'pause_status','UNPAUSED', 119 | 'quartz_cron_expression','0 0 0/4 * * ? *', 120 | 'timezone_id','UTC' 121 | ), 122 | 'tags',NAMED_STRUCT( 123 | 'dev', NULL, 124 | 'env','prod' 125 | ), 126 | 'timeout_seconds',0, 127 | 'trigger',NAMED_STRUCT( 128 | 'file_arrival',NAMED_STRUCT( 129 | 'url',NULL 130 | ), 131 | 'paused_status',NULL 132 | ) 133 | ), 134 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 135 | ); 136 | 137 | {% endset %} 138 | 139 | {% do run_query(create_table) %} 140 | {% do log("finished creating table jobs", info=true) %} 141 | 142 | {% do run_query(insert_table) %} 143 | {% do log("finished insert table jobs ", info=true) %} 144 | 145 | 146 | {% endmacro %} 147 | 148 | {% macro bigquery__jobs() %} 149 | {% set create_table %} 150 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs ( 151 | created_time BIGINT, 152 | creator_user_name STRING, 153 | job_id BIGINT, 154 | settings STRUCT< 155 | email_notifications STRUCT< 156 | on_failure ARRAY, 157 | no_alert_for_skipped_runs BOOLEAN 158 | >, 159 | format STRING, 160 | max_concurrent_runs BIGINT, 161 | name STRING, 162 | schedule STRUCT< 163 | pause_status STRING, 164 | quartz_cron_expression STRING, 165 | timezone_id STRING 166 | >, 167 | tags STRUCT< 168 | dev STRING, 169 | env STRING 170 | >, 171 | timeout_seconds bigint, 172 | trigger STRUCT< 173 | file_arrival STRUCT< 174 | url STRING 175 | >, 176 | pause_status STRING 177 | > 178 | >, 179 | insertedDate TIMESTAMP 180 | ); 181 | {% endset %} 182 | 183 | {% set insert_table %} 184 | 185 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES 186 | ( 187 | CAST(1722606667504 AS INT64), 188 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 189 | CAST(466340877826952 AS INT64), 190 | STRUCT( 191 | STRUCT( 192 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure, 193 | NULL AS no_alert_for_skipped_runs 194 | ) AS email_notifications, 195 | 'MULTI_TASK' AS format, 196 | 1 AS max_concurrent_runs, 197 | '[prod] core_dag_monitoring_data_transformation_dbt_job' AS name, 198 | STRUCT( 199 | NULL AS pause_status, 200 | NULL AS quartz_cron_expression, 201 | NULL AS timezone_id 202 | ) AS schedule, 203 | STRUCT( 204 | NULL AS dev, 205 | 'prod' AS env 206 | ) AS tags, 207 | 0 AS timeout_seconds, 208 | STRUCT( 209 | STRUCT( 210 | NULL AS url 211 | ) AS file_arrival, 212 | NULL AS paused_status 213 | ) AS trigger 214 | ), 215 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 216 | ), 217 | ( 218 | CAST(1722544845800 AS INT64), 219 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 220 | CAST(823250232903490 AS INT64), 221 | STRUCT( 222 | STRUCT( 223 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure, 224 | NULL AS no_alert_for_skipped_runs 225 | ) AS email_notifications, 226 | 'MULTI_TASK' AS format, 227 | 1 AS max_concurrent_runs, 228 | '[prod] core_dag_monitoring_extraction_meltano_job' AS name, 229 | STRUCT( 230 | 'UNPAUSED' AS pause_status, 231 | '0 0 0/3 * * ? *' AS quartz_cron_expression, 232 | 'UTC' AS timezone_id 233 | ) AS schedule, 234 | STRUCT( 235 | NULL AS dev, 236 | 'prod' AS env 237 | ) AS tags, 238 | 0 AS timeout_seconds, 239 | STRUCT( 240 | STRUCT( 241 | NULL AS url 242 | ) AS file_arrival, 243 | NULL AS paused_status 244 | ) AS trigger 245 | ), 246 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 247 | ), 248 | ( 249 | CAST(1722538441265 AS INT64), 250 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING), 251 | CAST(790689006770532 AS INT64), 252 | STRUCT( 253 | STRUCT( 254 | ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure, 255 | NULL AS no_alert_for_skipped_runs 256 | ) AS email_notifications, 257 | 'MULTI_TASK' AS format, 258 | 1 AS max_concurrent_runs, 259 | '[prod] investment_postgres_extraction_spark_job' AS name, 260 | STRUCT( 261 | 'UNPAUSED' AS pause_status, 262 | '0 0 0/4 * * ? *' AS quartz_cron_expression, 263 | 'UTC' AS timezone_id 264 | ) AS schedule, 265 | STRUCT( 266 | NULL AS dev, 267 | 'prod' AS env 268 | ) AS tags, 269 | 0 AS timeout_seconds, 270 | STRUCT( 271 | STRUCT( 272 | NULL AS url 273 | ) AS file_arrival, 274 | NULL AS paused_status 275 | ) AS trigger 276 | ), 277 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 278 | ); 279 | 280 | {% endset %} 281 | 282 | {% do run_query(create_table) %} 283 | {% do log("finished creating table jobs", info=true) %} 284 | 285 | {% do run_query(insert_table) %} 286 | {% do log("finished insert table jobs ", info=true) %} 287 | 288 | 289 | {% endmacro %} 290 | 291 | {% macro snowflake__jobs() %} 292 | {% set create_table %} 293 | CREATE OR REPLACE TABLE {{ target.database }}.{{ target.schema }}.jobs ( 294 | created_time BIGINT, 295 | creator_user_name VARCHAR, 296 | job_id BIGINT, 297 | settings VARIANT, 298 | insertedDate TIMESTAMP 299 | ); 300 | {% endset %} 301 | 302 | {% set insert_table %} 303 | 304 | INSERT INTO {{ target.database }}.{{ target.schema }}.jobs SELECT 305 | CAST(1722606667504 AS BIGINT), 306 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR), 307 | CAST(466340877826952 AS BIGINT), 308 | OBJECT_CONSTRUCT( 309 | 'email_notifications', OBJECT_CONSTRUCT( 310 | 'on_failure', ARRAY_CONSTRUCT('produtosgenie.net'), 311 | 'no_alert_for_skipped_runs', NULL 312 | ), 313 | 'format', 'MULTI_TASK', 314 | 'max_concurrent_runs', 1, 315 | 'name', '[prod] coreion_dbt_job', 316 | 'schedule', OBJECT_CONSTRUCT( 317 | 'pause_status', NULL, 318 | 'quartz_cron_expression', NULL, 319 | 'timezone_id', NULL 320 | ), 321 | 'tags', OBJECT_CONSTRUCT( 322 | 'dev', NULL, 323 | 'env', 'prod' 324 | ), 325 | 'timeout_seconds', 0, 326 | 'trigger', OBJECT_CONSTRUCT( 327 | 'file_arrival', OBJECT_CONSTRUCT( 328 | 'url', NULL 329 | ), 330 | 'paused_status', NULL 331 | ) 332 | ), 333 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 334 | UNION ALL 335 | SELECT 336 | CAST(1722544845800 AS BIGINT), 337 | CAST('13bc3f4b44571518ae' AS VARCHAR), 338 | CAST(823250232903490 AS BIGINT), 339 | OBJECT_CONSTRUCT( 340 | 'email_notifications', OBJECT_CONSTRUCT( 341 | 'on_failure', ARRAY_CONSTRUCT('prod.opsgenie.net'), 342 | 'no_alert_for_skipped_runs', NULL 343 | ), 344 | 'format', 'MULTI_TASK', 345 | 'max_concurrent_runs', 1, 346 | 'name', '[prod] cltano_job', 347 | 'schedule', OBJECT_CONSTRUCT( 348 | 'pause_status', 'UNPAUSED', 349 | 'quartz_cron_expression', '0 0 0/3 * * ? *', 350 | 'timezone_id', 'UTC' 351 | ), 352 | 'tags', OBJECT_CONSTRUCT( 353 | 'dev', NULL, 354 | 'env', 'prod' 355 | ), 356 | 'timeout_seconds', 0, 357 | 'trigger', OBJECT_CONSTRUCT( 358 | 'file_arrival', OBJECT_CONSTRUCT( 359 | 'url', NULL 360 | ), 361 | 'paused_status', NULL 362 | ) 363 | ), 364 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 365 | UNION ALL 366 | SELECT 367 | CAST(1722538441265 AS BIGINT), 368 | CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR), 369 | CAST(790689006770532 AS BIGINT), 370 | OBJECT_CONSTRUCT( 371 | 'email_notifications', OBJECT_CONSTRUCT( 372 | 'on_failure', ARRAY_CONSTRUCT('produtosopsgenie.net'), 373 | 'no_alert_for_skipped_runs', NULL 374 | ), 375 | 'format', 'MULTI_TASK', 376 | 'max_concurrent_runs', 1, 377 | 'name', '[prod] invspark_job', 378 | 'schedule', OBJECT_CONSTRUCT( 379 | 'pause_status', 'UNPAUSED', 380 | 'quartz_cron_expression', '0 0 0/4 * * ? *', 381 | 'timezone_id', 'UTC' 382 | ), 383 | 'tags', OBJECT_CONSTRUCT( 384 | 'dev', NULL, 385 | 'env', 'prod' 386 | ), 387 | 'timeout_seconds', 0, 388 | 'trigger', OBJECT_CONSTRUCT( 389 | 'file_arrival', OBJECT_CONSTRUCT( 390 | 'url', NULL 391 | ), 392 | 'paused_status', NULL 393 | ) 394 | ), 395 | CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP) 396 | ; 397 | 398 | 399 | {% endset %} 400 | 401 | {% do run_query(create_table) %} 402 | {% do log("finished creating table jobs", info=true) %} 403 | 404 | {% do run_query(insert_table) %} 405 | {% do log("finished insert table jobs ", info=true) %} 406 | {% endmacro %} -------------------------------------------------------------------------------- /integration_tests/seeds/adf/adf_pipelines.csv: -------------------------------------------------------------------------------- 1 | id,name,type,properties_description,properties_activities_0_name,properties_activities_0_type,properties_activities_1_name,properties_activities_1_type,properties_activities_1_typeProperties_items_value,properties_activities_1_typeProperties_items_type,properties_activities_1_typeProperties_isSequential,properties_activities_1_typeProperties_activities_0_name,properties_activities_1_typeProperties_activities_0_type,properties_activities_1_typeProperties_activities_1_name,properties_activities_1_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_2_name,properties_activities_1_typeProperties_activities_2_type,properties_activities_1_typeProperties_activities_2_typeProperties_source_type,properties_activities_1_typeProperties_activities_2_typeProperties_sink_type,properties_activities_1_typeProperties_activities_2_inputs_0_referenceName,properties_activities_1_typeProperties_activities_2_inputs_0_type,properties_activities_1_typeProperties_activities_2_outputs_0_referenceName,properties_activities_1_typeProperties_activities_2_outputs_0_type,properties_activities_1_typeProperties_activities_3_name,properties_activities_1_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_4_name,properties_activities_1_typeProperties_activities_4_type,properties_activities_1_typeProperties_activities_5_name,properties_activities_1_typeProperties_activities_5_type,properties_activities_2_name,properties_activities_2_type,properties_activities_3_name,properties_activities_3_type,properties_activities_4_name,properties_activities_4_type,properties_activities_5_name,properties_activities_5_type,properties_activities_6_name,properties_activities_6_type,etag,properties_activities_1_typeProperties_activities_6_name,properties_activities_1_typeProperties_activities_6_type,properties_activities_1_typeProperties_activities_7_name,properties_activities_1_typeProperties_activities_7_type,properties_activities_5_typeProperties_items_value,properties_activities_5_typeProperties_items_type,properties_activities_5_typeProperties_isSequential,properties_activities_5_typeProperties_activities_0_name,properties_activities_5_typeProperties_activities_0_type,properties_activities_5_typeProperties_activities_0_typeProperties_source_type,properties_activities_5_typeProperties_activities_0_typeProperties_sink_type,properties_activities_5_typeProperties_activities_0_inputs_0_referenceName,properties_activities_5_typeProperties_activities_0_inputs_0_type,properties_activities_5_typeProperties_activities_0_outputs_0_referenceName,properties_activities_5_typeProperties_activities_0_outputs_0_type,properties_activities_7_name,properties_activities_7_type,properties_activities_8_name,properties_activities_8_type,properties_activities_9_name,properties_activities_9_type,properties_activities_10_name,properties_activities_10_type,properties_activities_11_name,properties_activities_11_type,properties_activities_12_name,properties_activities_12_type,properties_activities_13_name,properties_activities_13_type,properties_activities_0_typeProperties_items_value,properties_activities_0_typeProperties_items_type,properties_activities_0_typeProperties_activities_0_name,properties_activities_0_typeProperties_activities_0_type,properties_activities_0_typeProperties_activities_0_typeProperties_source_type,properties_activities_0_typeProperties_activities_0_typeProperties_sink_type,properties_activities_0_typeProperties_activities_0_inputs_0_referenceName,properties_activities_0_typeProperties_activities_0_inputs_0_type,properties_activities_0_typeProperties_activities_0_outputs_0_referenceName,properties_activities_0_typeProperties_activities_0_outputs_0_type,properties_activities_1_typeProperties_activities_0_typeProperties_source_type,properties_activities_1_typeProperties_activities_0_typeProperties_sink_type,properties_activities_1_typeProperties_activities_0_inputs_0_referenceName,properties_activities_1_typeProperties_activities_0_inputs_0_type,properties_activities_1_typeProperties_activities_0_outputs_0_referenceName,properties_activities_1_typeProperties_activities_0_outputs_0_type,properties_activities_0_typeProperties_activities_1_name,properties_activities_0_typeProperties_activities_1_type,properties_activities_0_typeProperties_activities_2_name,properties_activities_0_typeProperties_activities_2_type,properties_activities_14_name,properties_activities_14_type,properties_activities_15_name,properties_activities_15_type,properties_activities_16_name,properties_activities_16_type,properties_activities_17_name,properties_activities_17_type,properties_activities_18_name,properties_activities_18_type,properties_activities_19_name,properties_activities_19_type,properties_activities_20_name,properties_activities_20_type,properties_activities_21_name,properties_activities_21_type,properties_activities_22_name,properties_activities_22_type,properties_activities_23_name,properties_activities_23_type,properties_activities_24_name,properties_activities_24_type,properties_activities_25_name,properties_activities_25_type,properties_activities_26_name,properties_activities_26_type,properties_activities_27_name,properties_activities_27_type,properties_activities_28_name,properties_activities_28_type,properties_activities_29_name,properties_activities_29_type,properties_activities_30_name,properties_activities_30_type,properties_activities_31_name,properties_activities_31_type,properties_activities_32_name,properties_activities_32_type,properties_activities_33_name,properties_activities_33_type,properties_activities_34_name,properties_activities_34_type,properties_activities_35_name,properties_activities_35_type,properties_activities_36_name,properties_activities_36_type,properties_activities_37_name,properties_activities_37_type,properties_activities_38_name,properties_activities_38_type,properties_activities_39_name,properties_activities_39_type,properties_activities_3_typeProperties_items_value,properties_activities_3_typeProperties_items_type,properties_activities_3_typeProperties_isSequential,properties_activities_3_typeProperties_activities_0_name,properties_activities_3_typeProperties_activities_0_type,properties_activities_3_typeProperties_activities_0_typeProperties_source_type,properties_activities_3_typeProperties_activities_1_name,properties_activities_3_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_1_typeProperties_source_type,properties_activities_1_typeProperties_activities_1_typeProperties_sink_type,properties_activities_1_typeProperties_activities_1_inputs_0_referenceName,properties_activities_1_typeProperties_activities_1_inputs_0_type,properties_activities_1_typeProperties_activities_1_outputs_0_referenceName,properties_activities_1_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_isSequential,properties_activities_0_typeProperties_activities_2_typeProperties_source_type,properties_activities_0_typeProperties_activities_2_typeProperties_sink_type,properties_activities_0_typeProperties_activities_2_inputs_0_referenceName,properties_activities_0_typeProperties_activities_2_inputs_0_type,properties_activities_0_typeProperties_activities_2_outputs_0_referenceName,properties_activities_0_typeProperties_activities_2_outputs_0_type,properties_activities_0_typeProperties_activities_3_name,properties_activities_0_typeProperties_activities_3_type,properties_activities_0_typeProperties_activities_4_name,properties_activities_0_typeProperties_activities_4_type,properties_activities_0_typeProperties_activities_5_name,properties_activities_0_typeProperties_activities_5_type,properties_activities_0_typeProperties_activities_6_name,properties_activities_0_typeProperties_activities_6_type,properties_activities_2_typeProperties_items_value,properties_activities_2_typeProperties_items_type,properties_activities_2_typeProperties_activities_0_name,properties_activities_2_typeProperties_activities_0_type,properties_activities_2_typeProperties_activities_0_typeProperties_source_type,properties_activities_2_typeProperties_activities_0_typeProperties_sink_type,properties_activities_2_typeProperties_activities_0_inputs_0_referenceName,properties_activities_2_typeProperties_activities_0_inputs_0_type,properties_activities_2_typeProperties_activities_0_outputs_0_referenceName,properties_activities_2_typeProperties_activities_0_outputs_0_type,properties_activities_4_typeProperties_items_value,properties_activities_4_typeProperties_items_type,properties_activities_4_typeProperties_isSequential,properties_activities_4_typeProperties_activities_0_name,properties_activities_4_typeProperties_activities_0_type,properties_activities_4_typeProperties_activities_1_name,properties_activities_4_typeProperties_activities_1_type,properties_activities_4_typeProperties_activities_2_name,properties_activities_4_typeProperties_activities_2_type,properties_activities_4_typeProperties_activities_3_name,properties_activities_4_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_8_name,properties_activities_1_typeProperties_activities_8_type,properties_activities_1_typeProperties_activities_6_typeProperties_source_type,properties_activities_2_typeProperties_isSequential,properties_activities_1_typeProperties_activities_9_name,properties_activities_1_typeProperties_activities_9_type,properties_activities_1_typeProperties_activities_10_name,properties_activities_1_typeProperties_activities_10_type,properties_activities_1_typeProperties_activities_11_name,properties_activities_1_typeProperties_activities_11_type,properties_activities_1_typeProperties_activities_12_name,properties_activities_1_typeProperties_activities_12_type,properties_activities_1_typeProperties_activities_13_name,properties_activities_1_typeProperties_activities_13_type,properties_activities_1_typeProperties_activities_14_name,properties_activities_1_typeProperties_activities_14_type,properties_activities_1_typeProperties_activities_15_name,properties_activities_1_typeProperties_activities_15_type,properties_activities_0_typeProperties_activities_1_typeProperties_source_type,properties_activities_0_typeProperties_activities_1_typeProperties_sink_type,properties_activities_0_typeProperties_activities_1_inputs_0_referenceName,properties_activities_0_typeProperties_activities_1_inputs_0_type,properties_activities_0_typeProperties_activities_1_outputs_0_referenceName,properties_activities_0_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_activities_7_name,properties_activities_0_typeProperties_activities_7_type,properties_activities_0_typeProperties_activities_8_name,properties_activities_0_typeProperties_activities_8_type,properties_activities_0_typeProperties_activities_9_name,properties_activities_0_typeProperties_activities_9_type,properties_activities_0_typeProperties_activities_10_name,properties_activities_0_typeProperties_activities_10_type,properties_activities_0_typeProperties_activities_11_name,properties_activities_0_typeProperties_activities_11_type,properties_activities_0_typeProperties_activities_12_name,properties_activities_0_typeProperties_activities_12_type,properties_activities_2_typeProperties_activities_1_name,properties_activities_2_typeProperties_activities_1_type,properties_activities_2_typeProperties_activities_1_typeProperties_source_type,properties_activities_2_typeProperties_activities_2_name,properties_activities_2_typeProperties_activities_2_type,properties_activities_2_typeProperties_activities_3_name,properties_activities_2_typeProperties_activities_3_type,properties_activities_3_typeProperties_activities_2_name,properties_activities_3_typeProperties_activities_2_type,properties_activities_3_typeProperties_activities_3_name,properties_activities_3_typeProperties_activities_3_type 2 | /subscriptions/9f0755ices-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-ESPPIFLEXTODatalakeLandingZone-N,PL-ESPPIFLEXTODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all IFLEX environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__ESPP__Generic,DatasetReference,DS__FEM__Equinix_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a33a-0000-0b00-0000-65fde01e0000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 3 | /subscriptions/9f07555c-2taplatformfm-services-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-RepomSQLPROD3TODatalakeLandingZone-N,PL-RepomSQLPROD3TODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all SQLPROD3 environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__Repom__Generic,DatasetReference,DS__FEM__UolDiveo_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a23a-0000-0b00-0000-65fde01e0000,FilterType,IfCondition,Update EndDate Success Error CopyData,DatabricksNotebook,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 4 | --------------------------------------------------------------------------------