├── .github
    ├── CODEOWNERS
    ├── release-drafter.yml
    ├── dependabot.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── pull_request_template.md
    └── workflows
    │   ├── release.yml
    │   └── ci.yml
├── integration_tests
    ├── packages.yml
    ├── requirements.txt
    ├── docs
    │   └── integration_tests_diagram.png
    ├── for_CI
    │   ├── change_of_database.sh
    │   ├── change_dbt_project_airflow_source.sh
    │   ├── change_dbt_project_adf_source.sh
    │   ├── change_dbt_project_from_databricks_to_airflow.sh
    │   └── change_dbt_project_databricks_source.sh
    ├── macros
    │   ├── create_schema.sql
    │   ├── drop_schema.sql
    │   ├── seed__task_fail.sql
    │   ├── adf_activity_runs.sql
    │   ├── seed__dag.sql
    │   ├── seed__dag_run.sql
    │   ├── seed__task_instance.sql
    │   ├── adf_pipeline_runs.sql
    │   ├── adf_triggers.sql
    │   └── jobs.sql
    ├── profiles.yml
    ├── dbt_project.yml
    ├── README.md
    └── seeds
    │   ├── airflow
    │       ├── task_instance.csv
    │       ├── task_fail.csv
    │       ├── dag.csv
    │       └── dag_run.csv
    │   └── adf
    │       ├── adf_activity_runs.csv
    │       └── adf_pipelines.csv
├── .gitignore
├── packages.yml
├── models
    ├── staging
    │   ├── databricks_workflow_sources
    │   │   ├── stg_task_instance_databricks_workflow.sql
    │   │   ├── stg_task_fail_databricks_workflow.sql
    │   │   ├── stg_dag_run_databricks_workflow.sql
    │   │   ├── stg_dag_databricks_workflow.sql
    │   │   └── source.yml
    │   ├── dbt_utils_day.sql
    │   ├── airflow_sources
    │   │   ├── stg_dag_run_airflow.sql
    │   │   ├── stg_task_fail_airflow.sql
    │   │   ├── stg_dag_airflow.sql
    │   │   ├── stg_task_instance_airflow.sql
    │   │   └── source.yml
    │   └── adf_sources
    │   │   ├── stg_dag_run_adf.sql
    │   │   ├── stg_task_fail_adf.sql
    │   │   ├── stg_task_instance_adf.sql
    │   │   ├── stg_dag_adf.sql
    │   │   └── source.yml
    ├── marts
    │   ├── dim_dag_monitoring_dag.sql
    │   ├── bridge_dag_monitoring.yml
    │   ├── dim_dag_monitoring_task.yml
    │   ├── dim_dag_monitoring_dag.yml
    │   ├── fact_dag_monitoring_dag_run.yml
    │   ├── fact_dag_monitoring_task_fail.yml
    │   ├── bridge_dag_monitoring.sql
    │   ├── fact_dag_monitoring_task_instance.yml
    │   ├── fact_dag_monitoring_dag_run.sql
    │   ├── fact_dag_monitoring_task_fail.sql
    │   ├── dim_dag_monitoring_task.sql
    │   └── fact_dag_monitoring_task_instance.sql
    ├── docs
    │   └── universal.md
    └── calendar
    │   └── dim_dag_monitoring_dates.sql
├── package-lock.yml
├── requirements.txt
├── macros
    ├── cast_as_date.sql
    ├── day_of_year.sql
    ├── cast_as_timestamp.sql
    ├── adf_pipelines_name.sql
    ├── flatten_data.sql
    ├── date_diff.sql
    ├── replace_dot_for_colon_notation.sql
    ├── date_add.sql
    ├── date_format.sql
    ├── cast_as_string.sql
    ├── day_of_week.sql
    └── model_task_instance_databricks_workflow.sql
├── example.env
├── setup.sh
├── catalog-dag-monitoring.yaml
├── profiles.yml
├── dbt_project.yml
├── README.md
└── LICENSE


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @techindicium/central-de-dados


--------------------------------------------------------------------------------
/integration_tests/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - local: ../


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | dbt_packages
3 | target
4 | *logs
5 | .env
6 | env
7 | .user.yml


--------------------------------------------------------------------------------
/packages.yml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - package: dbt-labs/dbt_utils
3 |     version: 1.1.1


--------------------------------------------------------------------------------
/integration_tests/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-snowflake==1.8.3
2 | dbt-databricks==v1.8.5
3 | databricks-sdk==0.17.0
4 | dbt-core==1.8.5


--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_task_instance_databricks_workflow.sql:
--------------------------------------------------------------------------------
1 | {{ model_task_instance_databricks_workflow() }}


--------------------------------------------------------------------------------
/package-lock.yml:
--------------------------------------------------------------------------------
1 | packages:
2 | - package: dbt-labs/dbt_utils
3 |   version: 1.1.1
4 | sha1_hash: b0e601a7edf623823e7381fcbae7d8a2d0999fe4
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dbt-core==1.8.5
2 | dbt-databricks==v1.8.5
3 | databricks-sdk==0.17.0
4 | dbt-snowflake==1.8.3
5 | google-cloud==0.34.0
6 | dbt-bigquery==1.8.2


--------------------------------------------------------------------------------
/integration_tests/docs/integration_tests_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techindicium/dbt-dag-monitoring/HEAD/integration_tests/docs/integration_tests_diagram.png


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | template: |
2 |   ## What's Changed
3 |   $CHANGES
4 | 
5 |   **Full Changelog**: https://github.com/$OWNER/$REPOSITORY/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION
6 | 


--------------------------------------------------------------------------------
/integration_tests/for_CI/change_of_database.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source=$1
4 | 
5 | sed -i "s/dag_monitoring_${source}_database: $2/dag_monitoring_${source}_database: $3/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
6 | 


--------------------------------------------------------------------------------
/integration_tests/macros/create_schema.sql:
--------------------------------------------------------------------------------
1 | {% macro create_schema(schema_name) %}
2 |     {% set sql %}
3 |         CREATE SCHEMA IF NOT EXISTS {{ schema_name }}
4 |     {% endset %}
5 |     {{ run_query(sql) }}
6 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/cast_as_date.sql:
--------------------------------------------------------------------------------
1 | {% macro cast_as_date(column) -%}
2 |     {{ return(adapter.dispatch('cast_as_date')(column)) }}
3 | {%- endmacro %}
4 | 
5 | 
6 | {% macro default__cast_as_date(column) -%}
7 |    cast({{ column }} as date) 
8 | {%- endmacro %} 
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "daily"
 7 |     rebase-strategy: "disabled"
 8 |     ignore:
 9 |       - dependency-name: "*"
10 |         update-types:
11 |           - version-update:semver-patch
12 | 


--------------------------------------------------------------------------------
/example.env:
--------------------------------------------------------------------------------
 1 | # this is an example of how to fill the information in each variable
 2 | 
 3 | 
 4 | # default configurations, you don't have to change it unless you have a specific need
 5 | export DBT_DEFAULT_TARGET="dev";
 6 | export DEV_CATALOG_NAME=""; 
 7 | export DEV_SCHEMA_NAME=""; 
 8 | export DEV_HOST="";
 9 | export DEV_HTTP_PATH="";
10 | export DEV_TOKEN="";


--------------------------------------------------------------------------------
/macros/day_of_year.sql:
--------------------------------------------------------------------------------
 1 | {% macro day_of_year(column) -%}
 2 |     {{ return(adapter.dispatch('day_of_year')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__day_of_year(column) -%}
 7 |    extract(dayofyear from {{ column }})
 8 | {%- endmacro %}
 9 | 
10 | 
11 | {% macro databricks__day_of_year(column) -%}
12 |    extract(doy from {{ column }})
13 | {%- endmacro %}


--------------------------------------------------------------------------------
/integration_tests/macros/drop_schema.sql:
--------------------------------------------------------------------------------
 1 | {% macro drop_schema(schema_name) %}
 2 | 
 3 |     {% set drop_schema_query %}
 4 |         DROP SCHEMA IF EXISTS {{ schema_name }} CASCADE;
 5 |     {% endset %}
 6 | 
 7 |     {% if execute %}
 8 |         {{ run_query(drop_schema_query) }}
 9 |     {% endif %}
10 | 
11 |     {% do log("Dropped schema " ~ schema_name, info = true) %}
12 | 
13 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/cast_as_timestamp.sql:
--------------------------------------------------------------------------------
 1 | {% macro cast_as_timestamp(column, n=1000) -%}
 2 |     {{ return(adapter.dispatch('cast_as_timestamp')(column, n=1000)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__cast_as_timestamp(column, n=1000) -%}
 7 |    to_timestamp({{ column }} / {{ n }} ) 
 8 | {%- endmacro %} 
 9 | 
10 | {% macro bigquery__cast_as_timestamp(column, n=1000) -%}
11 |    TIMESTAMP_SECONDS(cast({{ column }} / {{ n }} as int)) 
12 | {%- endmacro %}


--------------------------------------------------------------------------------
/macros/adf_pipelines_name.sql:
--------------------------------------------------------------------------------
 1 | {% macro adf_pipelines_name(column) -%}
 2 |     {{ return(adapter.dispatch('adf_pipelines_name')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__adf_pipelines_name(column) -%}
 7 |    {{ column }}
 8 | {%- endmacro %}
 9 | 
10 | {% macro bigquery__adf_pipelines_name(column) -%}
11 |     {% if column ==  'pipelines.pipelineReference.referenceName' -%}
12 |         {{ 'pipelineReference.referenceName' }}
13 |     {% endif %}
14 | {%- endmacro %} 


--------------------------------------------------------------------------------
/macros/flatten_data.sql:
--------------------------------------------------------------------------------
 1 | {% macro flatten_data(column) -%}
 2 |     {{ return(adapter.dispatch('flatten_data')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro databricks__flatten_data(column) -%}
 7 |    lateral view explode ({{ column }})
 8 | {%- endmacro %}
 9 | 
10 | {% macro snowflake__flatten_data(column) -%}
11 |   , lateral flatten(input => {{ column }})
12 | {%- endmacro %}
13 | 
14 | {% macro bigquery__flatten_data(column) -%}
15 |   , unnest({{ column }})
16 | {%- endmacro %}
17 | 


--------------------------------------------------------------------------------
/macros/date_diff.sql:
--------------------------------------------------------------------------------
 1 | {% macro date_diff(datepart, start_date, end_date) -%}
 2 |     {{ return(adapter.dispatch('date_diff')(datepart, start_date, end_date)) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro default__date_diff(datepart, start_date, end_date) -%}
 6 |     datediff({{ datepart }}, {{ start_date }}, {{ end_date }})
 7 | {%- endmacro %}
 8 | 
 9 | {% macro bigquery__date_diff(datepart, start_date, end_date) -%}
10 |     date_diff({{ end_date }}, {{ start_date }}, {{ datepart }})
11 | {%- endmacro %}


--------------------------------------------------------------------------------
/macros/replace_dot_for_colon_notation.sql:
--------------------------------------------------------------------------------
 1 | {% macro replace_dot_for_colon(struct_column, column_item) -%}
 2 |     {{ return(adapter.dispatch('replace_dot_for_colon')(struct_column, column_item)) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro default__replace_dot_for_colon(struct_column, column_item) -%}
 6 |   {{ struct_column }}.{{ column_item }}
 7 | {%- endmacro %}
 8 | 
 9 | {% macro snowflake__replace_dot_for_colon(struct_column, column_item) -%}
10 |    {{ struct_column }}:{{ column_item }}
11 | {%- endmacro %}


--------------------------------------------------------------------------------
/macros/date_add.sql:
--------------------------------------------------------------------------------
 1 | {% macro date_add(datepart, interval, column, default='INTERVAL') -%}
 2 |     {{ return(adapter.dispatch('date_add')(datepart, interval, column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__date_add(datepart, interval, column, default='INTERVAL') -%}
 7 |    dateadd({{ datepart }}, {{ interval }}, {{ column }} ) 
 8 | {%- endmacro %} 
 9 | 
10 | {% macro bigquery__date_add(datepart, interval, column, default='INTERVAL') -%}
11 |    date_add({{ column }}, {{ default }} {{ interval }} {{ datepart }} )  
12 | {%- endmacro %}


--------------------------------------------------------------------------------
/models/staging/dbt_utils_day.sql:
--------------------------------------------------------------------------------
 1 | {% set my_query %}
 2 |     select cast({{current_timestamp()}} as date)
 3 | {% endset %}
 4 | 
 5 | {% if execute %}
 6 |     {% set today = run_query(my_query).columns[0].values()[0] %}
 7 |     {% set tomorrow = dateadd('day', 1, "'" ~ today ~ "'") %}
 8 |     {% set start_date = var('dbt_dag_monitoring')['dag_monitoring_start_date'] %}
 9 |     {% else %}
10 |     {% set tomorrow = ' ' %}
11 |     {% set start_date = ' ' %}
12 | {% endif %}
13 | 
14 | {{ dbt_utils.date_spine(
15 |     datepart="day",
16 |     start_date=start_date,
17 |     end_date=tomorrow
18 | )
19 | }}


--------------------------------------------------------------------------------
/macros/date_format.sql:
--------------------------------------------------------------------------------
 1 | {% macro month_day(column, format='') -%}
 2 |     {{ return(adapter.dispatch('month_day')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro databricks__month_day(column, format='dd-MM') -%}
 7 |    date_format({{ column }}, '{{ format }}')
 8 | {%- endmacro %}
 9 | 
10 | {% macro snowflake__month_day(column, format='dd-MM') -%}
11 |    to_char(cast({{ column }} as date), '{{ format }}')
12 | {%- endmacro %}
13 | 
14 | 
15 | {% macro bigquery__month_day(column, format='%d-%m') -%}
16 |    cast(parse_date('{{ format }}', cast({{ column }} as string)) as string)
17 | {%- endmacro %}


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #if something goes wrong, stop the script
 4 | set -e
 5 | 
 6 | # exports all variables in .env file. Any variable defined in the script will be available in the environment.
 7 | set -a
 8 | 
 9 | # Install virtualenv if not installed
10 | pip install virtualenv
11 | 
12 | # Create a new virtual environment
13 | virtualenv env
14 | 
15 | # It gives permission to activate the virtual environment
16 | chmod +x env/bin/activate
17 | 
18 | # Activate the virtual environment
19 | source env/bin/activate  # On Windows, use `env\Scripts\activate`
20 | 
21 | # # Load the environment variables
22 | source .env
23 | 
24 | # Install requirements
25 | pip install -r ./requirements.txt
26 | 
27 | dbt deps


--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_dag_run_airflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('id') }} as dag_run_id
 5 |             , {{ cast_as_string('dag_id') }} as dag_id
 6 |             , {{ cast_as_date('start_date') }} as run_date
 7 |             , state as dag_state
 8 |             , external_trigger
 9 |             , start_date as execution_start_date
10 |             , end_date as execution_end_date
11 |             , {{ date_diff('second', 'start_date', 'end_date') }} as duration
12 |             , run_type
13 |             , {{ cast_as_string('run_id') }} as run_id
14 |         from {{ source('raw_airflow_monitoring', 'dag_run') }}
15 |     )
16 | select *
17 | from renamed
18 | 


--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_task_fail_databricks_workflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select
 4 |             {{ cast_as_string('task_id') }} as task_fail_id
 5 |             , {{ cast_as_string('task_id') }} as task_id
 6 |             , {{ cast_as_string('dag_id') }} as dag_id
 7 |             , run_id
 8 |             , execution_date
 9 |             , execution_start_date
10 |             , execution_end_date
11 |             , duration
12 |             , 'not_implemented_for_databricks_workflow' as map_index
13 |         from {{ ref('stg_task_instance_databricks_workflow') }}
14 |         where state_task_instance in ('MAXIMUM_CONCURRENT_RUNS_REACHED', 'CANCELED', 'FAILED', 'UPSTREAM_FAILED')
15 | )
16 | select *
17 | from renamed
18 | 


--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_dag_run_adf.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('id') }} as dag_run_id
 5 |             , {{ cast_as_string('pipelineName') }} as dag_id
 6 |             , {{ cast_as_date('runStart') }} as run_date
 7 |             , status as dag_state
 8 |             , {{ cast_as_string('invokedBy') }} as external_trigger
 9 |             , runStart as execution_start_date
10 |             , runEnd as execution_end_date
11 |             , durationInMs / 1000 as duration
12 |             , "not_implemented_by_adf" as run_type
13 |             , {{ cast_as_string('runId') }} as run_id
14 |         from {{ source('raw_adf_monitoring', 'adf_pipeline_runs') }}
15 |     )
16 | select *
17 | from renamed
18 | 


--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_task_fail_airflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('id') }} as task_fail_id
 5 |             , {{ cast_as_string('task_id') }} as task_id
 6 |             , {{ cast_as_string('dag_id') }} as dag_id
 7 |             , {{ cast_as_string('run_id') }} as run_id
 8 |             , {{ cast_as_date('start_date') }} as execution_date
 9 |             , start_date as execution_start_date
10 |             , end_date as execution_end_date
11 |             , duration
12 |             , case 
13 |                 when map_index = -1 then 'no mapping'
14 |             end as map_index
15 |         from {{ source('raw_airflow_monitoring', 'task_fail') }}
16 |     )
17 | select *
18 | from renamed
19 | 


--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_dag.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     stg_dag as (
 3 |     {% for src in var('enabled_sources') -%}
 4 |         select
 5 |             dag_id
 6 |             , dag_name
 7 |             , dag_description
 8 |             , dag_frequency
 9 |             , timetable_description
10 |             , is_paused
11 |             , is_active
12 |             , fileloc
13 |             , owners 
14 |             , '{{ src }}' as source_system
15 |         from         
16 |             {{ ref('stg_dag_' + src) }}
17 |         {% if not loop.last -%} union {% endif -%}
18 |     {% endfor -%}
19 |     )
20 |     , stg_dag_with_sk as (
21 |         select
22 |             {{ dbt_utils.generate_surrogate_key(['dag_id']) }} as dag_sk
23 |             , *
24 |         from stg_dag
25 |     )
26 | select *
27 | from stg_dag_with_sk


--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_task_fail_adf.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('activityRunId') }} as task_fail_id
 5 |             , {{ cast_as_string('activityRunId') }} as task_id
 6 |             , {{ cast_as_string('pipelineName') }} as dag_id
 7 |             , pipelineRunId as run_id
 8 |             , {{ cast_as_date('activityRunStart') }} as execution_date
 9 |             , activityRunStart as execution_start_date
10 |             , activityRunEnd as execution_end_date
11 |             , durationInMs / 1000 as duration
12 |             , "not_implemented_for_adf" as map_index
13 |         from {{ source('raw_adf_monitoring', 'adf_activity_runs') }}
14 |         where status in ('TimedOut', 'Cancelled', 'Failed')
15 |     )
16 | select *
17 | from renamed
18 | 
19 | 


--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_dag_run_databricks_workflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select
 4 |             {{ cast_as_string('run_id') }} as dag_run_id
 5 |             , {{ cast_as_string('job_id') }} as dag_id
 6 |             , {{cast_as_timestamp('start_time')}} as run_date
 7 |             , {{replace_dot_for_colon('state','result_state')}} as dag_state
 8 |             , "trigger" as external_trigger
 9 |             , {{cast_as_timestamp('start_time')}} as execution_start_date
10 |             , {{cast_as_timestamp('end_time')}} as execution_end_date
11 |             , execution_duration / 1000 as duration
12 |             , run_type
13 |             , {{ cast_as_string('run_id') }} as run_id
14 |         from {{ source('raw_databricks_workflow_monitoring', 'job_runs') }}
15 | )
16 | select *
17 | from renamed
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Operational System (please complete the following information):**
27 |  - OS: 
28 |  - Version: 
29 | 
30 | **Do you use WSL? Which version? (please complete the following information):**
31 | 
32 | **Additional context**
33 | Add any other context about the problem here.


--------------------------------------------------------------------------------
/macros/cast_as_string.sql:
--------------------------------------------------------------------------------
 1 | {% macro cast_as_string(column) -%}
 2 |     {{ return(adapter.dispatch('cast_as_string')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro databricks__cast_as_string(column) -%}
 7 |    cast({{ column }} as string) 
 8 | {%- endmacro %}
 9 | 
10 | {% macro bigquery__cast_as_string(column) -%}
11 |    {% if column ==  'invokedBy' or column == 'properties.typeProperties.recurrence.schedule' -%}
12 |       {{ column }}
13 |    {% else -%}
14 |       cast({{ column }} as string)
15 |    {% endif -%}
16 | {%- endmacro %}
17 | 
18 | {% macro snowflake__cast_as_string(column) -%}
19 |     {% if column ==  'null' -%}
20 |       {{ column }}
21 |    {% else -%}
22 |       cast({{ column }} as string) 
23 |    {% endif -%}
24 | {%- endmacro %}
25 | 
26 | 
27 | {% macro redshift__cast_as_string(column) -%}
28 |    cast({{ column }} as varchar) 
29 | {%- endmacro %} 
30 | 


--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_airflow_source.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Enable adf sources in dbt_project.yml
 4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 5 | 
 6 | # Disable databricks sources in dbt_project.yml
 7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)false/\1true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 8 | 
 9 | # Enable adf models in dbt_project.yml
10 | sed -i '/adf_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 | 
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 | 
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Propose a feature request, new capability or improvement.
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 
22 | ## Constraints and Assumptions
23 | Call out any constraint and/or assumption relevant for the development and use of this feature. 
24 | 
25 | ## Tests
26 | Describe here any new test requirement for this feature.
27 | 
28 | ## References
29 | 


--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_adf_source.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Enable adf sources in dbt_project.yml
 4 | sed -i '/raw_adf_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 5 | 
 6 | # Disable databricks sources in dbt_project.yml
 7 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 8 | 
 9 | # Enable adf models in dbt_project.yml
10 | sed -i '/adf_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 | 
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 | 
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['adf'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 | 


--------------------------------------------------------------------------------
/models/marts/bridge_dag_monitoring.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: bridge_dag_monitoring
 5 |     description: "Bridge table used to make relationships between marts."
 6 |     columns:
 7 |       - name: 'generated_date'
 8 |         description: "Date of execution of the DAG."
 9 | 
10 |       - name: dag_fk
11 |         description: "Foreign key for each dag."
12 | 
13 |       - name: task_fk
14 |         description: "Foreign key for each task."
15 | 
16 |       - name: dag_run_fk
17 |         description: "Foreign key for fact_dag_monitoring_dag_run. Composed of: dag_run_id, execution_start_date and execution_end_date"
18 | 
19 |       - name: task_fail_fk
20 |         description: "Foreign key for fact_dag_monitoring_task_fail. Composed of: task_fail_id, execution_end_date and execution_start_date"
21 | 
22 |       - name: task_instance_fk
23 |         description: "Foreign key for fact_dag_monitoring_task_instance. Composed of: task_instance_id, execution_end_date, and execution_start_date"


--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Disable databricks sources in dbt_project.yml
 4 | sed -i 's/\(raw_databricks_workflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 5 | 
 6 | # Enable airflow sources in dbt_project.yml
 7 | sed -i '/raw_airflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 8 | 
 9 | # Enable airflow models in dbt_project.yml
10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 | 
12 | # Disable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 | 
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['airflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
17 | 


--------------------------------------------------------------------------------
/integration_tests/for_CI/change_dbt_project_databricks_source.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Enable databricks sources in dbt_project.yml
 4 | sed -i '/raw_databricks_workflow_monitoring:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 5 | 
 6 | # Disable airflow sources in dbt_project.yml
 7 | sed -i 's/\(raw_airflow_monitoring:\s*\n\s*+enabled:\s*\)true/\1false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
 8 | 
 9 | # Disable airflow_workflow models in dbt_project.yml
10 | sed -i '/airflow_sources:/,/enabled:/s/enabled: true/enabled: false/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
11 | 
12 | # Enable databricks_workflow models in dbt_project.yml
13 | sed -i '/databricks_workflow_sources:/,/enabled:/s/enabled: false/enabled: true/' "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"
14 | 
15 | # Update the enabled_sources in dbt_project.yml
16 | sed -i "s/enabled_sources: \[.*\]/enabled_sources: \['databricks_workflow'\]/" "$GITHUB_WORKSPACE/integration_tests/dbt_project.yml"


--------------------------------------------------------------------------------
/catalog-dag-monitoring.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: backstage.io/v1alpha1
 2 | kind: Component
 3 | metadata:
 4 |  name: dbt_dag_monitoring
 5 |  description: |
 6 |     DAG Monitoring is a product designed to monitor orquestration metadata, formed by a tap, a dbt package and a Power BI dashboard.
 7 |  tags:
 8 |    - product
 9 |    - airflow
10 |    - monitoring
11 |    - powerbi
12 |  links:
13 |    - title: Wiki
14 |      url: https://wiki.indicium.tech/en/central_dados/squad_produtos_horizontais/squad_produtos_horizontais/dag-monitoring
15 |    - title: Repository
16 |      url: https://github.com/techindicium/dbt-dag-monitoring
17 |  annotations:
18 |    indicium.tech/product-url: https://app.powerbi.com/groups/1c5de32c-67f7-493c-ad6d-1d1c574b98bb/reports/132e0228-08ba-4f24-b6c4-a4974414e4b8/ReportSection?experience=power-bi
19 | spec:
20 |  title: DAG Monitoring
21 |  team: Produtos Horizontais
22 |  class: Técnico
23 |  vertical: Monitoring
24 |  businessUnit: TI
25 |  interface: Dashboard
26 |  language: Portuguese
27 |  owner: Indicium
28 |  type: product
29 |  lifecycle: experimental
30 |  system: public-websites


--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_dag_airflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('dag_id') }} as dag_id
 5 |             , {{ cast_as_string('dag_id') }} as dag_name
 6 |             , description as dag_description
 7 |             , case 
 8 |                 when timetable_description like '% hour, between %' then 'hourly'
 9 |                 when timetable_description like 'Between %' then 'hourly'
10 |                 when timetable_description like '% on day % month' then 'monthly'
11 |                 when timetable_description like '% in %' then 'monthly'
12 |                 when timetable_description like '%:% on %' then 'weekly'
13 |                 when timetable_description like '%:%' then 'daily'
14 |                 else timetable_description
15 |             end as dag_frequency
16 |             , timetable_description
17 |             , is_paused
18 |             , is_active
19 |             , fileloc
20 |             , owners
21 |         from {{ source('raw_airflow_monitoring', 'dag') }}
22 |     )
23 | select *
24 | from renamed
25 | 


--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_task.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: dim_dag_monitoring_task
 5 |     description: "dimensions table for Airflow tasks"
 6 |     
 7 |     columns:
 8 |       - name: task_sk
 9 |         description: "Surrogate key. Composed of: task_id and dag_id" 
10 |         tests:
11 |           - unique
12 |           - not_null 
13 |             
14 |       - name: task_id
15 |         description: "task id."
16 |         tests:
17 |           - not_null
18 | 
19 |       - name: dag_id
20 |         description: "DAG id."
21 |         tests:
22 |           - not_null
23 | 
24 |       - name: map_index
25 |         description: "Index for mapping."
26 | 
27 |       - name: hostname
28 |         description: "Task hostname."
29 | 
30 |       - name: operator
31 |         description: " Task operator model."
32 | 
33 |       - name: task_pool
34 |         description: " Airflow's pool in which the task should be executed."
35 | 
36 |       - name: source_system
37 |         description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
38 | 


--------------------------------------------------------------------------------
/integration_tests/macros/seed__task_fail.sql:
--------------------------------------------------------------------------------
 1 | {% macro seed__task_fail() %}
 2 | {% set create_table %}
 3 |   CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_fail (
 4 |     id INT64,
 5 |     task_id STRING,
 6 |     dag_id STRING,
 7 |     start_date TIMESTAMP,
 8 |     end_date TIMESTAMP,
 9 |     duration INT64,
10 |     map_index INT64,
11 |     run_id STRING
12 |   );
13 | {% endset %}
14 | 
15 | {% set insert_table %}
16 | 
17 |   INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_fail (
18 |     id, task_id, dag_id, start_date, end_date, duration, map_index, run_id
19 |   ) VALUES
20 |     (1, 'dbt_freshness', 'dbt_bitrix', TIMESTAMP('2023-12-27T02:30:16.714366Z'), TIMESTAMP('2023-12-21T09:01:57.631415Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00'),
21 |     (2, 'dbt_source_test', 'dbt_bitrix', TIMESTAMP('2023-12-21T09:01:57.073097Z'), TIMESTAMP('2023-12-23T08:30:25.791135Z'), 1, -1, 'scheduled__2022-12-11T06:00:00+00:00');
22 | 
23 | {% endset %}
24 | 
25 | {% do run_query(create_table) %}
26 | {% do log("finished creating table task_fail", info=true) %}
27 | 
28 | {% do run_query(insert_table) %}
29 | {% do log("finished insert table task_fail", info=true) %}
30 | 
31 | {% endmacro %}


--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/stg_dag_databricks_workflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select
 4 |             {{ cast_as_string('job_id') }} as dag_id
 5 |             , {{replace_dot_for_colon('settings','name')}} as dag_name
 6 |             , 'not_implemented_for_databricks_workflow' as dag_description
 7 |             , 'not_implemented_for_databricks_workflow'  as dag_frequency
 8 |             , {{replace_dot_for_colon('settings','schedule.quartz_cron_expression')}} as timetable_description
 9 |             , case
10 |                 when {{replace_dot_for_colon('settings','schedule.pause_status')}} = 'PAUSED' then true
11 |                 else false
12 |             end as is_paused
13 |             , case
14 |                 when {{replace_dot_for_colon('settings','schedule.pause_status')}} != 'PAUSED' then true
15 |                 else false
16 |             end as is_active
17 |             , 'not_implemented_for_databricks_workflow' as fileloc
18 |             , creator_user_name as owners
19 |             , null as ind_extraction_date
20 |             , {{replace_dot_for_colon('settings','schedule.pause_status')}} as pause_status
21 |         from {{ source('raw_databricks_workflow_monitoring', 'jobs') }}
22 | )
23 | select *
24 | from renamed
25 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!-- If you're making a doc PR or something tiny where the below is irrelevant, delete this
 2 | template and use a short description, but in your description aim to include both what the
 3 | change is, and why it is being made, with enough context for anyone to understand. -->
 4 | 
 5 | <details>
 6 |   <summary>PR Checklist</summary>
 7 | 
 8 | ### PR Structure
 9 | 
10 | - [ ] This PR has reasonably narrow scope (if not, break it down into smaller PRs).
11 | - [ ] This PR avoids mixing refactoring changes with feature changes (split into two PRs
12 |       otherwise).
13 | 
14 | ### Thoroughness
15 | 
16 | - [ ] This PR adds tests for the most critical parts of the new functionality or fixes.
17 | - [ ] I've updated the docs and README with the added features, breaking changes, new instructions on how to use the repository.
18 | 
19 | ### Release planning
20 | 
21 | - [ ] I've decided if this PR requires a new major/minor/patch version accordingly to
22 |     [semver](https://semver.org/), and I've changed the name of the BRANCH to release/* , feature/* or patch/* .
23 | </details>
24 | 
25 | ### What
26 | 
27 | [TODO: Short statement about what is changing.]
28 | 
29 | ### Why
30 | 
31 | [TODO: Why this change is being made. Include any context required to understand the why.]
32 | 
33 | ### Known limitations
34 | 
35 | [TODO or N/A]


--------------------------------------------------------------------------------
/integration_tests/profiles.yml:
--------------------------------------------------------------------------------
 1 | dbt_dag_monitoring_integration_tests:
 2 |   target: '{{ env_var(''DBT_DEFAULT_TARGET'', ''databricks'')}}'
 3 |   outputs:
 4 |     databricks:
 5 |       ansi_mode: false
 6 |       catalog: '{{ env_var(''DEV_CATALOG_NAME'')}}'
 7 |       host: '{{ env_var(''DEV_HOST'') }}'
 8 |       http_path: '{{ env_var(''DEV_HTTP_PATH'') }}'
 9 |       schema: '{{ env_var(''DEV_SCHEMA_NAME'')}}'
10 |       threads: 16
11 |       token: '{{ env_var(''DEV_TOKEN'') }}'
12 |       type: databricks
13 | 
14 |     bigquery:
15 |       dataset: "{{ env_var('BIGQUERY_DATASET') }}"
16 |       project: "{{ env_var('BIGQUERY_PROJECT') }}"
17 |       job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}"
18 |       threads: "{{ env_var('DBT_THREADS') | int }}"
19 |       job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}"
20 |       method: oauth
21 |       location: us
22 |       priority: interactive
23 |       type: bigquery
24 | 
25 |     snowflake:
26 |       type: "snowflake"
27 |       account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
28 |       user: "{{ env_var('SNOWFLAKE_USER') }}"
29 |       password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
30 |       role: "{{ env_var('SNOWFLAKE_ROLE') }}"
31 |       database: "{{ env_var('SNOWFLAKE_DATABASE') }}"
32 |       warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
33 |       schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
34 |       threads: 10
35 | 


--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_dag.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: dim_dag_monitoring_dag
 5 |     description: "Dimension table for Airflow DAGs"
 6 |     columns:
 7 |       - name: 'dag_sk'
 8 |         description: "Surrogate key. Composed of: dag_id"
 9 |         tests:
10 |           - unique
11 |           - not_null
12 |             
13 |       - name: dag_id
14 |         description: "Source table ID."
15 |         tests:
16 |           - unique
17 |           - not_null
18 |       
19 |       - name: dag_name
20 |         description: "Descriptive name for DAG."
21 | 
22 |       - name: dag_description
23 |         description: "Description about the DAG."
24 | 
25 |       - name: dag_frequency
26 |         description: "DAG frequency for execution."
27 | 
28 |       - name: timetable_description
29 |         description: "DAGs execution scheduling."
30 | 
31 |       - name: is_paused
32 |         description: "Is the DAG paused."
33 | 
34 |       - name: is_active
35 |         description: "Is the DAG active."
36 | 
37 |       - name: fileloc
38 |         description: "path to file that needs to be imported to load this DAG. `source_code` in source: 11"
39 | 
40 |       - name: owners
41 |         description: "DAG owner."
42 | 
43 |       - name: source_system
44 |         description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow"
45 | 


--------------------------------------------------------------------------------
/profiles.yml:
--------------------------------------------------------------------------------
 1 | dbt_dag_monitoring:
 2 |     target: "{{ env_var('DBT_DEFAULT_TARGET', 'databricks')}}"
 3 |     outputs:
 4 |         databricks: 
 5 |             type: databricks
 6 |             catalog: "{{ env_var('DEV_CATALOG_NAME')}}"
 7 |             schema: "{{ env_var('DEV_SCHEMA_NAME')}}"
 8 |             host: "{{ env_var('DEV_HOST') }}"
 9 |             http_path: "{{ env_var('DEV_HTTP_PATH') }}"
10 |             token: "{{ env_var('DEV_TOKEN') }}"
11 |             threads: 16
12 |             ansi_mode: false
13 |         
14 |         bigquery:
15 |             dataset: "{{ env_var('BIGQUERY_DATASET') }}"
16 |             project: "{{ env_var('BIGQUERY_PROJECT') }}"
17 |             job_execution_timeout_seconds: "{{ env_var('DBT_JOB_TIMEOUT') | int }}"
18 |             threads: "{{ env_var('DBT_THREADS') | int }}"
19 |             job_retries: "{{ env_var('DBT_JOB_RETRIES') | int }}"
20 |             method: oauth
21 |             location: us
22 |             priority: interactive
23 |             type: bigquery
24 | 
25 |         snowflake:
26 |             type: "snowflake"
27 |             account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
28 |             user: "{{ env_var('SNOWFLAKE_USER') }}"
29 |             password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
30 |             role: "{{ env_var('SNOWFLAKE_ROLE') }}"
31 |             database: "{{ env_var('SNOWFLAKE_DATABASE') }}"
32 |             warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
33 |             schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
34 |             threads: 10


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_dag_run.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: fact_dag_monitoring_dag_run
 5 |     description: "Events table of Airflow DAG executions"
 6 |     columns:
 7 |       - name: 'dag_run_sk'
 8 |         description: "Surrogate key. Composed of: dag_run_id, execution_start_date and execution_end_date"
 9 |         tests:
10 |           - unique
11 |           - not_null
12 |             
13 |       - name: dag_fk 
14 |         description: "DAGs ids."
15 |         tests: 
16 |           - relationships:
17 |               to: ref('dim_dag_monitoring_dag')
18 |               field: dag_sk
19 | 
20 |       - name: generated_date
21 |         description: "date of execution of the DAG."
22 |         tests:
23 |           - relationships:
24 |               to: ref('dbt_utils_day')
25 |               field: date_day
26 | 
27 |       - name: external_trigger
28 |         description: "Points out if the DAG execution was triggered externally (True / False)."
29 | 
30 |       - name: execution_start_date
31 |         description: "Data e hora em que iniciou a execução da DAG."
32 | 
33 |       - name: execution_end_date
34 |         description: "Date and hour when the DAG execution ended."
35 | 
36 |       - name: run_type
37 |         description: "Type of execution of the DAG."
38 | 
39 |       - name: duration
40 |         description: "Execution time in seconds."
41 | 
42 |       - name: source_system
43 |         description: "System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
44 | 


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_fail.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: fact_dag_monitoring_task_fail
 5 |     description: "Events table of Airflow tasks failures"
 6 |     columns:
 7 |       - name: task_fail_sk
 8 |         description: "Surrogate key. Composed of: task_fail_id, execution_end_date and execution_start_date"
 9 |         tests:
10 |           - unique 
11 |           - not_null
12 |             
13 |       - name: task_fk
14 |         description: "Unique identifier of the task execution."
15 |         tests:
16 |           - relationships:
17 |               to: ref('dim_dag_monitoring_task')
18 |               field: task_sk
19 | 
20 |       - name: dag_fk
21 |         description: "Id of the DAG."
22 |         tests:
23 |           - relationships:
24 |               to: ref('dim_dag_monitoring_dag')
25 |               field: dag_sk
26 | 
27 |       - name: generated_date
28 |         description: "Date of execution of the DAG."
29 |         tests:
30 |           - relationships:
31 |               to: ref('dbt_utils_day')
32 |               field: date_day
33 | 
34 |       - name: execution_start_date
35 |         description: " Date and hour when the DAG execution started."
36 | 
37 |       - name: execution_end_date
38 |         description: "Date and hour when the DAG execution ended."
39 | 
40 |       - name: duration
41 |         description: "Duration of the execution in seconds."
42 | 
43 |       - name: source_system
44 |         description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
45 | 


--------------------------------------------------------------------------------
/models/staging/airflow_sources/stg_task_instance_airflow.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('task_id') }} as task_id
 5 |             , {{ cast_as_string('dag_id') }} as dag_id
 6 |             , {{ cast_as_string('run_id') }} as run_id
 7 |             , {{ cast_as_date('start_date') }} as execution_date
 8 |             , start_date as execution_start_date
 9 |             , end_date as execution_end_date
10 |             , duration
11 |             , state as state_task_instance
12 |             , try_number
13 |             , hostname
14 |             , pool as task_pool
15 |             , priority_weight
16 |             , operator
17 |             , case 
18 |                 when map_index = -1 then 'no mapping'
19 |             end as map_index
20 |         from {{ source('raw_airflow_monitoring', 'task_instance') }}
21 |     )
22 |     , created_id as (
23 |         /*Table does not have a unique identifier, the id was created as the unique identification of records*/
24 |         select 
25 |             {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
26 |             , task_id
27 |             , dag_id
28 |             , run_id
29 |             , execution_date
30 |             , execution_start_date
31 |             , execution_end_date
32 |             , duration
33 |             , state_task_instance
34 |             , try_number
35 |             , hostname
36 |             , task_pool
37 |             , priority_weight
38 |             , operator
39 |             , map_index
40 |         from renamed
41 |     )
42 | select *
43 | from created_id
44 | 


--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_task_instance_adf.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     renamed as (
 3 |         select distinct
 4 |             {{ cast_as_string('activityRunId') }} as task_id
 5 |             , {{ cast_as_string('pipelineName') }} as dag_id
 6 |             , {{ cast_as_string('pipelineRunId') }} as run_id
 7 |             , {{ cast_as_date('activityRunStart') }} as execution_date
 8 |             , activityRunStart as execution_start_date
 9 |             , activityRunEnd as execution_end_date
10 |             , durationInMs / 1000 as duration
11 |             , status as state_task_instance
12 |             , retryAttempt as try_number
13 |             , "not_implemented_for_adf" as hostname
14 |             , "not_implemented_for_adf" as task_pool
15 |             , "not_implemented_for_adf" as priority_weight
16 |             , activityName as operator
17 |             , "not_implemented_for_adf" as map_index
18 |         from {{ source('raw_adf_monitoring', 'adf_activity_runs') }}
19 |     )
20 |     , created_id as (
21 |         /*Im not sure this is necessary for adf*/
22 |         select 
23 |             {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
24 |             , task_id
25 |             , dag_id
26 |             , run_id
27 |             , execution_date
28 |             , execution_start_date
29 |             , execution_end_date
30 |             , duration
31 |             , state_task_instance
32 |             , try_number
33 |             , hostname
34 |             , task_pool
35 |             , priority_weight
36 |             , operator
37 |             , map_index
38 |         from renamed
39 |     )
40 | select *
41 | from created_id
42 | 


--------------------------------------------------------------------------------
/macros/day_of_week.sql:
--------------------------------------------------------------------------------
 1 | {% macro day_of_week(column) %}
 2 |     {{ return(adapter.dispatch('day_of_week')(column)) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro databricks__day_of_week(column) %}
 6 |             case
 7 |                 when {{ column }} = 1 then 'Sunday'
 8 |                 when {{ column }} = 2 then 'Monday'
 9 |                 when {{ column }} = 3 then 'Tuesday'
10 |                 when {{ column }} = 4 then 'Wednesday'
11 |                 when {{ column }} = 5 then 'Thursday'
12 |                 when {{ column }} = 6 then 'Friday'
13 |                 when {{ column }} = 7 then 'Saturday'
14 |             end as name_of_day
15 | {% endmacro %}
16 | 
17 | {% macro snowflake__day_of_week(column) %}
18 |             case
19 |                 when {{ column }} = 0 then 'Sunday'
20 |                 when {{ column }} = 1 then 'Monday'
21 |                 when {{ column }} = 2 then 'Tuesday'
22 |                 when {{ column }} = 3 then 'Wednesday'
23 |                 when {{ column }} = 4 then 'Thursday'
24 |                 when {{ column }} = 5 then 'Friday'
25 |                 when {{ column }} = 6 then 'Saturday'
26 |             end as name_of_day
27 | {% endmacro %}
28 | 
29 | {% macro bigquery__day_of_week(column) %}
30 |             case
31 |                 when {{ column }} = 1 then 'Sunday'
32 |                 when {{ column }} = 2 then 'Monday'
33 |                 when {{ column }} = 3 then 'Tuesday'
34 |                 when {{ column }} = 4 then 'Wednesday'
35 |                 when {{ column }} = 5 then 'Thursday'
36 |                 when {{ column }} = 6 then 'Friday'
37 |                 when {{ column }} = 7 then 'Saturday'
38 |             end as name_of_day
39 | {% endmacro %}


--------------------------------------------------------------------------------
/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'dbt_dag_monitoring_integration_tests'
 2 | version: '0.2.0'
 3 | 
 4 | require-dbt-version: [">=1.0.0", "<2.0.0"]
 5 | 
 6 | config-version: 2
 7 | 
 8 | target-path: "target"
 9 | clean-targets: ["target", "dbt_modules", "dbt_packages"]
10 | macro-paths: ["macros"]
11 | log-path: "logs"
12 | seed-paths: ["seeds"] 
13 | 
14 | profile: dbt_dag_monitoring_integration_tests
15 | 
16 | dispatch:
17 |   - macro_namespace: 'dbt_utils'
18 |     search_order: ['dbt_utils_integration_tests', 'dbt_utils']
19 | 
20 | sources:
21 |   dbt_dag_monitoring:
22 |     staging:
23 |       adf_sources:
24 |         raw_adf_monitoring:
25 |           +enabled: false
26 |       databricks_workflow_sources:
27 |         raw_databricks_workflow_monitoring:
28 |           +enabled: true
29 |       airflow_sources:
30 |         raw_airflow_monitoring:
31 |           +enabled: false
32 | 
33 | models:
34 |   dbt_dag_monitoring:
35 |     marts:
36 |       +materialized: table
37 |     staging:
38 |       adf_sources:  
39 |         +enabled: false
40 |       airflow_sources:
41 |         +enabled: false
42 |       databricks_workflow_sources:
43 |         +enabled: true
44 |       +materialized: view
45 | 
46 | vars:
47 |   dbt_dag_monitoring:
48 |     enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
49 |     dag_monitoring_start_date: cast('2023-01-01' as date)
50 |     dag_monitoring_airflow_database: cdi_dev
51 |     dag_monitoring_airflow_schema: ci_dbt_dag_monitoring
52 |     dag_monitoring_databricks_database: cdi_dev
53 |     dag_monitoring_databricks_schema: ci_dbt_dag_monitoring
54 |     dag_monitoring_adf_database: cdi_dev
55 |     dag_monitoring_adf_schema: ci_dbt_dag_monitoring


--------------------------------------------------------------------------------
/models/marts/bridge_dag_monitoring.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     fact_dag_run as (
 3 |         select
 4 |             dag_run_sk
 5 |             , dag_fk
 6 |             , generated_date
 7 |         from {{ ref('fact_dag_monitoring_dag_run') }}
 8 |     )
 9 |     , fact_task_fail as (
10 |         select
11 |             task_fail_sk
12 |             , dag_fk
13 |             , task_fk
14 |             , generated_date
15 |         from {{ ref('fact_dag_monitoring_task_fail') }}
16 |     )
17 |     , fact_task_instance as (
18 |         select
19 |             task_instance_sk
20 |             , dag_fk
21 |             , task_fk
22 |             , generated_date
23 |         from {{ ref('fact_dag_monitoring_task_instance') }}
24 |     )
25 |     , bridge as (
26 |         select
27 |             coalesce(fact_task_instance.generated_date, fact_dag_run.generated_date, fact_task_fail.generated_date) as generated_date
28 |             , coalesce(fact_dag_run.dag_fk, fact_task_fail.dag_fk, fact_task_instance.dag_fk) as dag_fk
29 |             , coalesce(fact_task_instance.task_fk, fact_task_fail.task_fk) as task_fk
30 |             , fact_dag_run.dag_run_sk as dag_run_fk
31 |             , fact_task_fail.task_fail_sk as task_fail_fk
32 |             , fact_task_instance.task_instance_sk as task_instance_fk
33 |         from fact_task_instance
34 |         full outer join fact_task_fail
35 |             on fact_task_instance.task_fk = fact_task_fail.task_fk
36 |             and fact_task_instance.generated_date = fact_task_fail.generated_date
37 |         full outer join fact_dag_run
38 |             on coalesce(fact_task_instance.dag_fk, fact_task_fail.dag_fk) = fact_dag_run.dag_fk
39 |             and fact_task_instance.generated_date = fact_dag_run.generated_date
40 |     )
41 | select *
42 | from bridge


--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'dbt_dag_monitoring'
 2 | version: '0.2.0'
 3 | 
 4 | require-dbt-version: [">=1.3.0", "<2.0.0"]
 5 | 
 6 | config-version: 2
 7 | 
 8 | target-path: "target"
 9 | clean-targets: ["target", "dbt_modules", "dbt_packages"]
10 | macro-paths: ["macros"]
11 | log-path: "logs"
12 | seed-paths: ["seeds"] 
13 | 
14 | profile: dbt_dag_monitoring
15 | 
16 | # When using it for testing purposes, you can take out all the comments below and set to true only the sources, models and vars you want to test
17 | 
18 | # sources:
19 | #   dbt_dag_monitoring:
20 | #     staging:
21 | #       adf_sources:
22 | #         raw_adf_monitoring:
23 | #           +enabled: false
24 | #       databricks_workflow_sources:
25 | #         raw_databricks_workflow_monitoring:
26 | #           +enabled: true
27 | #       airflow_sources:
28 | #         raw_airflow_monitoring:
29 | #           +enabled: false
30 | 
31 | # models:
32 | #   dbt_dag_monitoring:
33 | #     marts:
34 | #       +materialized: table
35 | #     staging:
36 | #       adf_sources:  
37 | #         +enabled: false
38 | #       airflow_sources:
39 | #         +enabled: false
40 | #       databricks_workflow_sources:
41 | #         +enabled: true
42 | #       +materialized: view
43 | 
44 | # Only one type of enabled sources can be turned on at a time
45 | 
46 | # vars:
47 | #   dbt_dag_monitoring:
48 | #     enabled_sources: ['databricks_workflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
49 | #     dag_monitoring_start_date: cast('2023-01-01' as date)
50 | #     dag_monitoring_airflow_database: cdi_dev
51 | #     dag_monitoring_airflow_schema: ci_dbt_dag_monitoring
52 | #     dag_monitoring_databricks_database: cdi_dev
53 | #     dag_monitoring_databricks_schema: ci_dbt_dag_monitoring
54 | #     dag_monitoring_adf_database: cdi_dev
55 | #     dag_monitoring_adf_schema: ci_dbt_dag_monitoring


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_instance.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: fact_dag_monitoring_task_instance
 5 |     description: "Events table of Airflow task instances"
 6 |     columns:
 7 |       - name: task_instance_sk
 8 |         description: "Surrogate key. Composed of: task_instance_id, execution_end_date, and execution_start_date"
 9 |         tests:
10 |           - unique
11 |           - not_null 
12 | 
13 |       - name: task_fk 
14 |         description: "Id of the tasks"
15 |         tests:
16 |           - relationships:
17 |               to: ref('dim_dag_monitoring_task')
18 |               field: task_sk 
19 | 
20 |       - name: dag_fk
21 |         description: "Id of the DAG."
22 |         tests:
23 |           - relationships:
24 |               to: ref('dim_dag_monitoring_dag')
25 |               field: dag_sk
26 | 
27 |       - name: generated_date
28 |         description: "Date of execution of the DAG."
29 |         tests:
30 |           - relationships:
31 |               to: ref('dbt_utils_day')
32 |               field: date_day
33 | 
34 |       - name: execution_start_date
35 |         description: " Date and hour when the DAG execution started."
36 | 
37 |       - name: execution_end_date
38 |         description: "Date and hour when the DAG execution ended."
39 | 
40 |       - name: duration
41 |         description: "Duration of the execution in seconds."
42 |       
43 |       - name: state_task_instance
44 |         description: "The state of the task execution."
45 | 
46 |       - name: try_number
47 |         description: "The number of attempts to execute."
48 | 
49 |       - name: priority_weight
50 |         description: "Task priority."
51 | 
52 |       - name: source_system
53 |         description: " System where the data was extracted from, currently the possible values are airflow, adf and databricks_workflow."
54 | 


--------------------------------------------------------------------------------
/integration_tests/README.md:
--------------------------------------------------------------------------------
 1 | > [!WARNING]  
 2 | > ADF source and models were NOT tested in Snowflake connection!
 3 | 
 4 | This README is about the integration tests step inside the ci.yml.
 5 | 
 6 | Integration tests work in a similar way to how an user can reference the dbt-dag-monitoring in their project. That is how we start it, running 
 7 | 	dbt deps in the packages: local: ../
 8 | In that way, we are pulling dbt-dag-monitoring.
 9 |  
10 | As we are simulating the use of the project somewhere else, when checking dbt_project.yml inside the integration_tests folder, we can see that we configured the sources, the models, and vars of it. Those settings are crucial to run the project. 
11 | 
12 | When running the continuous integration in the Github actions, where the integration tests are actually analyzed, we are using the dbt_project.yml inside the integration_tests folder as reference.
13 | 
14 | By looking at the profiles.yml folder, we can see that we use 3 connections: Databricks, BigQuery and Snowflake. Whatever modifications are done in the project, it must pass successfully in the three data warehouses to be accepted to merge.
15 | 
16 | When testing new features in the project, the user can save time by having credentials at each DW to test the changes locally, before passing them to the pull request, due to the fact that each commit that is analyzed by the CI, takes 7 minutes minimum to run.
17 | 
18 | In case you do not find the schema to observe on a DW, it is because of the schema creation and schema deletion just after the CI is done for that specific DW.
19 | To clarify how the integration test functions in the continuous integration, we can take a look at the diagram below:
20 | 
21 | <img src="docs/integration_tests_diagram.png" width="600" />
22 | 
23 | > [!NOTE]  
24 | > Databricks works as a DW and as a source for the models.
25 | 
26 | As you can see on the image above, the .sh files are used to give a transition between an origin and a destination source.
27 | 
28 | 
29 | As we have pattern on the sources of making the transitions between Databricks to ADF to Airflow, we needed a new shell file that is the “change_dbt_project_from_databricks_to_airflow.sh” to make the last transition Databricks to Airflow, as ADF was not tested on Snowflake.
30 | 
31 | 


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_dag_run.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     dim_dag as (
 3 |         select 
 4 |             dag_id
 5 |             , dag_sk as dag_fk
 6 |         from {{ ref('dim_dag_monitoring_dag') }}
 7 |     )
 8 |     , util_days as (
 9 |         select cast(date_day as date) as date_day
10 |         from {{ ref('dbt_utils_day') }}
11 |     )
12 |     , stg_dag_run as (
13 |         {% for src in var('enabled_sources') -%}
14 |         select 
15 |             dag_run_id
16 |             , dag_id
17 |             , run_id
18 |             , run_date
19 |             , execution_start_date
20 |             , execution_end_date
21 |             , duration
22 |             , dag_state
23 |             , external_trigger
24 |             , run_type
25 |             , '{{ src }}' as source_system
26 |         from {{ ref('stg_dag_run_' + src) }}
27 |         {% if not loop.last -%} union {% endif -%}
28 |         {% endfor -%}
29 |     )
30 |     , joined as (
31 |         select
32 |             stg_dag_run.dag_run_id
33 |             , dim_dag.dag_fk
34 |             , dim_dag.dag_id
35 |             , stg_dag_run.run_id
36 |             , util_days.date_day
37 |             , stg_dag_run.execution_start_date
38 |             , stg_dag_run.execution_end_date
39 |             , stg_dag_run.dag_state
40 |             , stg_dag_run.external_trigger
41 |             , stg_dag_run.run_type 
42 |             , stg_dag_run.duration
43 |             , stg_dag_run.source_system
44 |         from stg_dag_run
45 |         left join dim_dag on stg_dag_run.dag_id = dim_dag.dag_id
46 |         left join util_days on {{ cast_as_date('stg_dag_run.run_date') }} = {{ cast_as_date('util_days.date_day') }}
47 |     )
48 |     , joined_with_sk as (
49 |         select 
50 |             {{ dbt_utils.generate_surrogate_key([
51 |                 'dag_run_id'
52 |                 , 'execution_start_date'
53 |                 , 'execution_end_date'
54 |                 , 'run_id']) }} as dag_run_sk
55 |             , dag_fk
56 |             , date_day as generated_date
57 |             , execution_start_date
58 |             , execution_end_date
59 |             , dag_state
60 |             , external_trigger
61 |             , run_type
62 |             , duration
63 |             , source_system
64 |         from joined
65 |     )
66 | select *
67 | from joined_with_sk
68 | 


--------------------------------------------------------------------------------
/models/staging/databricks_workflow_sources/source.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: raw_databricks_workflow_monitoring
 5 |     description: "Raw data from databricks for databricks monitoring analysis."
 6 |     database: "{{ var('dag_monitoring_databricks_database', '')}}"
 7 |     schema: "{{ var('dag_monitoring_databricks_schema',  raw_databricks_workflow_monitoring) }}"
 8 |     tables:
 9 |       - name: jobs
10 |         description: " Table that contains information about databricks jobs(DAGs)."
11 |         columns:
12 |           - name: job_id
13 |             description: "Table unique identifier."
14 |             tests:
15 |               - not_null
16 |               - unique
17 | 
18 |           - name: settings
19 |             description: '{{ doc("settings_doc") }}'
20 | 
21 |           - name: created_time
22 |             description: "Timestamp of the job creation"
23 | 
24 |           - name: creator_user_name
25 |             description: "E-mail of the job creator"
26 | 
27 |           - name: ind_extraction_date
28 |             description: "Date of extraction of the table"
29 | 
30 |       - name: job_runs
31 |         description: "Table that contains the execution data of the Databricks pipelines(jobs)"
32 |         columns:
33 |           - name: run_id
34 |             description: "Unique identifier of the table. Job run id"
35 |             tests:
36 |               - not_null
37 |               - unique
38 | 
39 |           - name: job_id
40 |             description: "Job id."
41 |        
42 |           - name: state
43 |             description: '{{ doc("state_doc") }}'
44 | 
45 |           - name: trigger
46 |             description: "It tells how the pipeline execution was triggered."
47 | 
48 |           - name: start_time
49 |             description: "Timestamp when the DAG execution started."
50 | 
51 |           - name: end_time
52 |             description: "Timestamp when the DAG execution ended."
53 | 
54 |           - name: execution_duration
55 |             description: "Duration of DAG in milliseconds."
56 | 
57 |           - name: run_type
58 |             description: "Type of execution"
59 | 
60 |           - name: tasks
61 |             description: '{{ doc("tasks_doc") }}'
62 |          
63 |       - name: dbt_utils_day
64 |         description: "Table that contains data of the dates created by the dbt_utils macro."
65 | 


--------------------------------------------------------------------------------
/integration_tests/macros/adf_activity_runs.sql:
--------------------------------------------------------------------------------
 1 | {% macro adf_activity_runs() %}
 2 | {% set create_table %}
 3 |     create or replace table `{{ target.database }}`.{{ target.schema }}.adf_activity_runs (
 4 |         activityRunEnd TIMESTAMP,
 5 |         activityName STRING,
 6 |         activityRunStart TIMESTAMP,
 7 |         activityType STRING,
 8 |         durationInMs INT,
 9 |         retryAttempt INT,
10 |         error_errorCode STRING,
11 |         error_message STRING,
12 |         error_failureType STRING,
13 |         error_target STRING,
14 |         activityRunId STRING,
15 |         linkedServiceName STRING,
16 |         pipelineName STRING,
17 |         pipelineRunId STRING,
18 |         status STRING,
19 |         output_effectiveIntegrationRuntime STRING,
20 |         input_source_type STRING
21 |     );
22 | {% endset %}
23 | 
24 | {% set insert_table %}
25 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_activity_runs VALUES 
26 | (
27 |     CAST('2024-08-20T03:30:10.973554Z' AS TIMESTAMP),
28 |     CAST('Set CurrentDate' AS STRING),
29 |     CAST('2024-08-20T03:30:10.742531Z' AS TIMESTAMP),
30 |     CAST('SetVariable' AS STRING),
31 |     CAST(231 AS INT),
32 |     NULL,
33 |     NULL,
34 |     NULL,
35 |     NULL,
36 |     CAST('Set CurrentDate' AS STRING),
37 |     CAST('f653c43a-6508-42f8-8467-0e10152aa3f9' AS STRING),
38 |     NULL,
39 |     CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING),
40 |     CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING),
41 |     CAST('Succeeded' AS STRING),
42 |     NULL,
43 |     NULL
44 | ),
45 | (
46 |     CAST('2024-08-20T03:30:11.538784Z' AS TIMESTAMP),
47 |     CAST('Set Timestamp' AS STRING),
48 |     CAST('2024-08-20T03:30:11.274576Z' AS TIMESTAMP),
49 |     CAST('SetVariable' AS STRING),
50 |     CAST(264 AS INT),
51 |     NULL,
52 |     NULL,
53 |     NULL,
54 |     NULL,
55 |     CAST('Set Timestamp' AS STRING),
56 |     CAST('b8c48c2f-b0e6-45f0-a502-cee31dffba2e' AS STRING),
57 |     NULL,
58 |     CAST('PL-FEMFILESCSVTODatalakeLandingZone-N' AS STRING),
59 |     CAST('9f81a5eb-a7ca-482e-833e-db6082b73db5' AS STRING),
60 |     CAST('Succeeded' AS STRING),
61 |     NULL,
62 |     NULL
63 | );
64 | 
65 | {% endset %}
66 | 
67 | {% do run_query(create_table) %}
68 | {% do log("finished creating table adf_activity_runs", info=true) %}
69 | 
70 | {% do run_query(insert_table) %}
71 | {% do log("finished insert table adf_activity_runs", info=true) %}
72 | {% endmacro %}


--------------------------------------------------------------------------------
/integration_tests/macros/seed__dag.sql:
--------------------------------------------------------------------------------
 1 | {% macro seed__dag() -%}
 2 |     {{ return(adapter.dispatch('seed__dag')()) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro default__seed__dag() %}
 6 | {% set create_table %}
 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag (
 8 |     dag_id STRING,
 9 |     is_paused BOOLEAN,
10 |     is_subdag BOOLEAN,
11 |     is_active BOOLEAN,
12 |     last_parsed_time TIMESTAMP,
13 |     last_pickled TIMESTAMP,
14 |     last_expired TIMESTAMP,
15 |     scheduler_lock STRING,
16 |     pickle_id INT64,
17 |     fileloc STRING,
18 |     owners STRING,
19 |     description STRING,
20 |     default_view STRING,
21 |     schedule_interval STRING,
22 |     root_dag_id STRING,
23 |     next_dagrun TIMESTAMP,
24 |     next_dagrun_create_after TIMESTAMP,
25 |     max_active_tasks INT64,
26 |     has_task_concurrency_limits BOOLEAN,
27 |     max_active_runs INT64,
28 |     next_dagrun_data_interval_start TIMESTAMP,
29 |     next_dagrun_data_interval_end TIMESTAMP,
30 |     has_import_errors BOOLEAN,
31 |     timetable_description STRING,
32 |     processor_subdir STRING
33 | );
34 | {% endset %}
35 | 
36 | {% set insert_table %}
37 | 
38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag VALUES
39 | (
40 |     'test_docker', false, false, false, TIMESTAMP('2022-12-16 09:35:19.433-03'), NULL, NULL, '', NULL, 
41 |     '/opt/airflow/dags/repo/airflow/dags/test_dag.py', 'Indicium', '', 'grid', '0 6 * * *', '', 
42 |     TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), 16, false, 16, 
43 |     TIMESTAMP('2022-12-16 03:00:00.000-03'), TIMESTAMP('2022-12-17 03:00:00.000-03'), false, 'At 06:00', ''
44 | ),
45 | (
46 |     'dbt', true, false, false, TIMESTAMP('2022-11-25 16:12:51.922-03'), NULL, NULL, '', NULL, 
47 |     '/opt/airflow/dags/repo/airflow/dags/all_dags.py', 'airflow', '', 'grid', '7/15 9-23 * * *', '', 
48 |     TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), 16, false, 1, 
49 |     TIMESTAMP('2022-02-01 06:07:00.000-03'), TIMESTAMP('2022-02-01 06:22:00.000-03'), true, 
50 |     'Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59', ''
51 | );
52 | 
53 | {% endset %}
54 | 
55 | {% do run_query(create_table) %}
56 | {% do log("finished creating table dag", info=true) %}
57 | 
58 | {% do run_query(insert_table) %}
59 | {% do log("finished insert table dag", info=true) %}
60 | 
61 | 
62 | {% endmacro %}


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_fail.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     dim_dag as (
 3 |         select 
 4 |             dag_id
 5 |             , dag_sk as dag_fk
 6 |         from {{ ref('dim_dag_monitoring_dag') }}
 7 |     )
 8 |     , dim_task as (
 9 |         select
10 |             task_sk as task_fk
11 |             , task_id
12 |             , dag_id
13 |         from {{ ref('dim_dag_monitoring_task') }} 
14 |     )
15 |     , util_days as (
16 |         select cast(date_day as date) as date_day
17 |         from {{ ref('dbt_utils_day') }}
18 |     )
19 |     , stg_task_fail as (
20 |         {% for src in var('enabled_sources') -%}
21 |         select 
22 |             task_fail_id
23 |             , task_id
24 |             , dag_id
25 |             , run_id
26 |             , execution_start_date
27 |             , execution_end_date
28 |             , duration
29 |             , execution_date
30 |             , map_index
31 |             , '{{ src }}' as source_system
32 |         from {{ ref('stg_task_fail_' + src) }}
33 |         {% if not loop.last -%} union {% endif -%}
34 |         {% endfor -%}
35 |     )
36 |     , joined as (
37 |         select
38 |             stg_task_fail.task_fail_id
39 |             , stg_task_fail.task_id
40 |             , dim_dag.dag_id
41 |             , stg_task_fail.run_id
42 |             , dim_dag.dag_fk
43 |             , dim_task.task_fk
44 |             , util_days.date_day
45 |             , stg_task_fail.execution_start_date
46 |             , stg_task_fail.execution_end_date
47 |             , stg_task_fail.duration
48 |             , stg_task_fail.source_system
49 |         from stg_task_fail
50 |         left join dim_dag on stg_task_fail.dag_id = dim_dag.dag_id
51 |         left join dim_task on 
52 |             stg_task_fail.task_id = dim_task.task_id
53 |             and stg_task_fail.dag_id = dim_task.dag_id
54 |         left join util_days on {{ cast_as_date('stg_task_fail.execution_date') }} = {{ cast_as_date('util_days.date_day') }}
55 |     )
56 |     , surrogate_key as (
57 |         select
58 |             {{ dbt_utils.generate_surrogate_key([
59 |                 'task_fail_id'
60 |                 , 'execution_start_date'
61 |                 , 'execution_end_date'
62 |                 , 'run_id']
63 |             ) }} as task_fail_sk
64 |             , dag_fk
65 |             , task_fk
66 |             , date_day as generated_date
67 |             , execution_start_date
68 |             , execution_end_date
69 |             , duration
70 |             , source_system
71 |         from joined
72 |     )
73 | select *
74 | from surrogate_key
75 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/task_instance.csv:
--------------------------------------------------------------------------------
 1 | "task_id","dag_id","run_id","start_date","end_date","duration","state","try_number","hostname","unixname","job_id","pool","queue","priority_weight","operator","queued_dttm","pid","max_tries","executor_config","pool_slots","queued_by_job_id","external_executor_id","trigger_id","trigger_timeout","next_method","next_kwargs","map_index","updated_at"
 2 | dbt_source_test,dbt_bitrix,scheduled__2023-01-12T06:00:00+00:00,2023-01-13 03:01:07.644 -0300,2023-01-13 03:01:27.852 -0300,20.207217,success,1,dbtbitrixdbtsourcetest-32bc3af501374e48913fad10b54fdd67,root,332,default_pool,default,17,DockerOperator,2023-01-13 03:00:58.037 -0300,21,2,�\u0004}�.,1,201,,,,,"",-1,
 3 | dump_table1_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.688 -0300,2022-11-25 15:58:28.388 -0300,0.700336,success,1,samplefistdumptable1todl-0d85d3eca2b14a58b822dbb5f5c21bec,root,20,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
 4 | copy_table4_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
 5 | dump_table3_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.252 -0300,2022-11-25 15:58:27.923 -0300,0.670813,success,1,samplefistdumptable3todl-dfadd4af7fde472593ee7c824e6ca2ae,root,18,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
 6 | dump_table5_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:59:17.480 -0300,2022-11-25 15:59:18.020 -0300,0.540106,success,1,samplefistdumptable5todl-54e2543a9e694a63b55e112e99c2053d,root,22,default_pool,default,1,BashOperator,2022-11-25 15:57:34.854 -0300,21,1,�\u0004}�.,1,7,,,,,"",-1,
 7 | copy_table3_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
 8 | dump_table4_to_DL,sample_fist,scheduled__2022-02-03T00:00:00+00:00,2022-11-25 15:58:27.540 -0300,2022-11-25 15:58:28.263 -0300,0.722546,success,1,samplefistdumptable4todl-de6d057adeeb4f4b94b777491f5e3611,root,19,default_pool,default,2,BashOperator,2022-11-25 15:57:34.854 -0300,20,1,�\u0004}�.,1,7,,,,,"",-1,
 9 | copy_table1_DL_to_DW,sample_fist,scheduled__2022-02-03T00:00:00+00:00,,,,scheduled,0,"",root,,default_pool,default,1,BashOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
10 | delay,sample_fist,scheduled__2022-02-02T00:00:00+00:00,2022-11-25 15:55:20.412 -0300,2022-11-25 15:55:20.412 -0300,0.0,success,0,"",root,,default_pool,default,10,DummyOperator,,,1,�\u0004}�.,1,,,,,,"",-1,
11 | 


--------------------------------------------------------------------------------
/models/staging/adf_sources/stg_dag_adf.sql:
--------------------------------------------------------------------------------
 1 | with exploded_by_pipeline as (
 2 |     select 
 3 |         *
 4 |     from 
 5 |         {{ source('raw_adf_monitoring', 'adf_triggers') }}
 6 |     {{ flatten_data('properties.pipelines') }} as pipelines
 7 | ), 
 8 | 
 9 | triggers_renamed as (
10 |     select
11 |         id as trigger_id
12 |         , case 
13 |             when properties.typeProperties.recurrence.frequency  = 'Hour' then 'hourly'
14 |             when properties.typeProperties.recurrence.frequency  = 'Day' then 'daily'
15 |             when properties.typeProperties.recurrence.frequency  = 'Week' then 'weekly'
16 |             when properties.typeProperties.recurrence.frequency  = 'Month' then 'monthly'
17 |             when properties.typeProperties.recurrence.frequency  = 'Minute' then 'minutely'
18 |         end as dag_frequency
19 |         ,{{ cast_as_string('properties.typeProperties.recurrence.schedule') }} as timetable_description
20 |         ,properties.typeProperties.recurrence.frequency as adf_frequency
21 |         ,properties.typeProperties.recurrence.startTime as start_time
22 |         , case 
23 |             when properties.runtimeState = 'Started' then 'true'
24 |             else 'false'
25 |             end as is_active
26 |         , case 
27 |             when properties.runtimeState = 'Started' then 'false'
28 |             else 'true'
29 |             end as is_paused
30 |         ,properties.runtimeState
31 |         ,{{adf_pipelines_name('pipelines.pipelineReference.referenceName') }} as pipeline_name
32 |         
33 |     from exploded_by_pipeline
34 | ),
35 | pipeline_with_row_number as (
36 |   select 
37 |     *,
38 |     row_number() over (partition by id order by etag desc) row_number
39 |   from {{ source('raw_adf_monitoring', 'adf_pipelines') }}
40 | ),
41 | pipeline_dedup as (
42 |     select * from 
43 |     pipeline_with_row_number
44 |     where row_number = 1
45 | ),
46 | pipelines_and_triggers as (
47 |     select 
48 |         pipelines.id as dag_id
49 |         ,pipelines.name as dag_name
50 |         ,triggers.*
51 |     from pipeline_dedup pipelines
52 |     left join triggers_renamed triggers
53 |     on pipelines.name = triggers.pipeline_name
54 | )
55 | select 
56 |     {{ cast_as_string('dag_name') }} as dag_id
57 |     , {{ cast_as_string('dag_name') }} as dag_name
58 |     , "not_implemented_for_adf" as dag_description
59 |     , dag_frequency
60 |     , timetable_description
61 |     , is_paused
62 |     , is_active
63 |     , 'not_implemented_for_adf' as fileloc
64 |     , 'not_implemented_for_adf' as owners
65 |     , null as ind_extraction_date
66 | from    
67 |     pipelines_and_triggers
68 | 


--------------------------------------------------------------------------------
/integration_tests/macros/seed__dag_run.sql:
--------------------------------------------------------------------------------
 1 | {% macro seed__dag_run() -%}
 2 |     {{ return(adapter.dispatch('seed__dag_run')()) }}
 3 | {%- endmacro %}
 4 | 
 5 | {% macro default__seed__dag_run() %}
 6 | {% set create_table %}
 7 | create or replace table `{{ target.database }}`.{{ target.schema }}.dag_run (
 8 |     id INT64,
 9 |     dag_id STRING,
10 |     execution_date TIMESTAMP,
11 |     state STRING,
12 |     run_id STRING,
13 |     external_trigger BOOLEAN,
14 |     conf STRING,
15 |     end_date TIMESTAMP,
16 |     start_date TIMESTAMP,
17 |     run_type STRING,
18 |     last_scheduling_decision TIMESTAMP,
19 |     dag_hash STRING,
20 |     creating_job_id INT64,
21 |     queued_at TIMESTAMP,
22 |     data_interval_start TIMESTAMP,
23 |     data_interval_end TIMESTAMP,
24 |     log_template_id INT64,
25 |     updated_at TIMESTAMP
26 | );
27 | 
28 | {% endset %}
29 | 
30 | {% set insert_table %}
31 | 
32 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.dag_run VALUES
33 | (
34 |     87755,
35 |     'airflow_status_monitoring', 
36 |     TIMESTAMP('2024-07-04 10:56:00.000-03'), 
37 |     'success', 
38 |     'scheduled__2024-07-04T13:56:00+00:00', 
39 |     false, 
40 |     'ï¿½\u0004}ï¿½.', 
41 |     TIMESTAMP('2024-07-04 10:57:15.119-03'), 
42 |     TIMESTAMP('2024-07-04 10:57:00.256-03'), 
43 |     'scheduled', 
44 |     TIMESTAMP('2024-07-04 10:57:15.109-03'), 
45 |     '2606cfccb8540961ee80c09fe32dcc8d', 
46 |     110841, 
47 |     TIMESTAMP('2024-07-04 10:57:00.210-03'), 
48 |     TIMESTAMP('2024-07-04 10:56:00.000-03'), 
49 |     TIMESTAMP('2024-07-04 10:57:00.000-03'), 
50 |     1, 
51 |     TIMESTAMP('2024-07-04 10:57:15.120-03')
52 | ),
53 | (
54 |     84260, 
55 |     'dag_tags_rbac', 
56 |     TIMESTAMP('2024-07-02 05:30:00.000-03'), 
57 |     'success', 
58 |     'scheduled__2024-07-02T08:30:00+00:00', 
59 |     false, 
60 |     'ï¿½\u0004}ï¿½.', 
61 |     TIMESTAMP('2024-07-02 06:01:20.844-03'), 
62 |     TIMESTAMP('2024-07-02 06:00:00.624-03'), 
63 |     'scheduled', 
64 |     TIMESTAMP('2024-07-02 06:01:20.836-03'), 
65 |     '3616896069a7d5a3b40f4478372f03da', 
66 |     110841, 
67 |     TIMESTAMP('2024-07-02 06:00:00.550-03'),
68 |     TIMESTAMP('2024-07-02 05:30:00.000-03'), 
69 |     TIMESTAMP('2024-07-02 06:00:00.000-03'), 
70 |     1, 
71 |     TIMESTAMP('2024-07-02 06:01:20.848-03')
72 | );
73 | 
74 | {% endset %}
75 | 
76 | {% do run_query(create_table) %}
77 | {% do log("finished creating table dag_run", info=true) %}
78 | 
79 | {% do run_query(insert_table) %}
80 | {% do log("finished insert table dag_run", info=true) %}
81 | 
82 | 
83 | {% endmacro %}


--------------------------------------------------------------------------------
/models/docs/universal.md:
--------------------------------------------------------------------------------
 1 | [comment]: < Universal >
 2 | 
 3 | {% docs state_doc %}
 4 | Json with state property of the pipeline execution in the following format:
 5 |               {
 6 |                   "life_cycle_state": "TERMINATED",
 7 |                   "result_state": "SUCCESS",
 8 |                   "state_message": "",
 9 |                   "user_cancelled_or_timedout": false
10 |               }
11 | {% enddocs %}
12 | 
13 | 
14 | {% docs tasks_doc %}
15 | List of objects with information about the tasks. Example of a task in json:
16 |               {
17 |                 "attempt_number": "0",
18 |                 "cleanup_duration": "0",
19 |                 "cluster_instance": {
20 |                     "cluster_id": "0426-123-kq2r1tew",
21 |                     "spark_context_id": "123"
22 |                 },
23 |                 "dbt_task": null,
24 |                 "depends_on": null,
25 |                 "description": null,
26 |                 "end_time": "1701855074931",
27 |                 "execution_duration": "110000",
28 |                 "existing_cluster_id": "0426-123-kq2r1tew",
29 |                 "git_source": null,
30 |                 "libraries": null,
31 |                 "notebook_task": {
32 |                     "notebook_path": "/notebook",
33 |                     "source": "WORKSPACE"
34 |                 },
35 |                 "run_id": "123",
36 |                 "setup_duration": "1000",
37 |                 "start_time": "1701854963851",
38 |                 "state": {
39 |                     "life_cycle_state": "TERMINATED",
40 |                     "result_state": "SUCCESS",
41 |                     "state_message": "",
42 |                     "user_cancelled_or_timedout": false
43 |                 },
44 |                 "task_key": "ADFafb-123"
45 |               }
46 | {% enddocs %}
47 | 
48 | {% docs settings_doc %}
49 | Job configuration json like the following:
50 |             {
51 |                 "email_notifications": {
52 |                     "no_alert_for_skipped_runs": false,
53 |                     "on_failure": null,
54 |                     "on_start": null,
55 |                     "on_success": null
56 |                 },
57 |                 "format": "MULTI_TASK",
58 |                 "max_concurrent_runs": "1",
59 |                 "name": "Fact_TransactionProtected_V2",
60 |                 "notification_settings": null,
61 |                 "schedule": {
62 |                     "pause_status": "UNPAUSED",
63 |                     "quartz_cron_expression": "19 0 9 * * ?",
64 |                     "timezone_id": "America/Sao_Paulo"
65 |                 },
66 |                 "timeout_seconds": "0"
67 |               }
68 | {% enddocs %}
69 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/task_fail.csv:
--------------------------------------------------------------------------------
 1 | "id","task_id","dag_id","start_date","end_date","duration","map_index","run_id"
 2 | 1,dbt_freshness,dbt_bitrix,2022-12-12 14:59:57.480 -0300,2022-12-12 14:59:59.035 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00
 3 | 2,dbt_source_test,dbt_bitrix,2022-12-12 15:01:00.909 -0300,2022-12-12 15:01:02.861 -0300,1,-1,scheduled__2022-12-11T06:00:00+00:00
 4 | 3,dbt_freshness,dbt_bitrix,2022-12-15 16:09:59.406 -0300,2022-12-15 16:10:01.164 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
 5 | 4,dbt_freshness,dbt_bitrix,2022-12-15 16:11:33.938 -0300,2022-12-15 16:11:35.532 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
 6 | 5,dbt_freshness,dbt_bitrix,2022-12-15 16:16:44.720 -0300,2022-12-15 16:16:46.632 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
 7 | 6,dbt_freshness,dbt_bitrix,2022-12-15 17:59:26.557 -0300,2022-12-15 17:59:28.156 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
 8 | 7,dbt_freshness,dbt_bitrix,2022-12-15 18:04:37.127 -0300,2022-12-15 18:04:38.722 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
 9 | 8,dbt_freshness,dbt_bitrix,2022-12-15 18:18:55.583 -0300,2022-12-15 18:18:57.274 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
10 | 9,dbt_freshness,dbt_bitrix,2022-12-15 18:26:31.585 -0300,2022-12-15 18:26:33.287 -0300,1,-1,scheduled__2022-12-14T06:00:00+00:00
11 | 10,testing_dockeroperator,test_docker,2022-12-15 18:55:53.329 -0300,2022-12-15 18:55:55.427 -0300,2,-1,manual__2022-12-15T21:55:43.582236+00:00
12 | 11,dbt_freshness,dbt_bitrix,2022-12-16 09:49:25.205 -0300,2022-12-16 09:50:02.437 -0300,37,-1,scheduled__2022-12-15T06:00:00+00:00
13 | 12,dbt_freshness,dbt_bitrix,2022-12-16 11:27:41.768 -0300,2022-12-16 11:27:54.878 -0300,13,-1,scheduled__2022-12-15T06:00:00+00:00
14 | 13,dbt_freshness,dbt_bitrix,2022-12-16 14:14:03.698 -0300,2022-12-16 14:14:17.741 -0300,14,-1,scheduled__2022-12-15T06:00:00+00:00
15 | 14,dbt_freshness,dbt_bitrix,2022-12-16 14:24:27.859 -0300,2022-12-16 14:24:42.344 -0300,14,-1,manual__2022-12-16T17:24:17.778111+00:00
16 | 15,dbt_freshness,dbt_bitrix,2022-12-16 14:55:23.124 -0300,2022-12-16 14:55:36.900 -0300,13,-1,manual__2022-12-16T17:55:14.271759+00:00
17 | 16,dbt_freshness,dbt_bitrix,2022-12-16 15:31:16.824 -0300,2022-12-16 15:31:55.576 -0300,38,-1,manual__2022-12-16T18:31:06.560155+00:00
18 | 17,dbt_freshness,dbt_bitrix,2022-12-17 03:00:17.475 -0300,2022-12-17 03:00:55.782 -0300,38,-1,scheduled__2022-12-16T06:00:00+00:00
19 | 18,dbt_freshness,dbt_bitrix,2022-12-17 03:06:06.742 -0300,2022-12-17 03:06:43.661 -0300,36,-1,scheduled__2022-12-16T06:00:00+00:00
20 | 19,dbt_freshness,dbt_bitrix,2022-12-18 03:00:19.544 -0300,2022-12-18 03:00:57.838 -0300,38,-1,scheduled__2022-12-17T06:00:00+00:00
21 | 20,dbt_freshness,dbt_bitrix,2022-12-18 03:06:07.809 -0300,2022-12-18 03:06:44.903 -0300,37,-1,scheduled__2022-12-17T06:00:00+00:00


--------------------------------------------------------------------------------
/models/marts/dim_dag_monitoring_task.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     stg_task_instance as (
 3 |         {% for src in var('enabled_sources') -%}
 4 |         select distinct 
 5 |             task_id
 6 |             , dag_id
 7 |             , hostname
 8 |             , operator
 9 |             , task_pool
10 |             , map_index
11 |             , '{{ src }}' as source_system
12 |         from {{ ref('stg_task_instance_' + src) }}
13 |         {% if not loop.last -%} union {% endif -%}
14 |         {% endfor -%}
15 |     )   
16 |     , stg_task_fail as (
17 |         {% for src in var('enabled_sources') -%}
18 |         select distinct 
19 |             task_id
20 |             , dag_id
21 |             , map_index
22 |             , {{ cast_as_string('null') }} as hostname
23 |             , {{ cast_as_string('null') }} as operator
24 |             , {{ cast_as_string('null') }} as task_pool
25 |             , '{{ src }}' as source_system
26 |         from {{ ref('stg_task_fail_' + src) }}
27 |         {% if not loop.last -%} union {% endif -%}
28 |         {% endfor -%}
29 |     )
30 |     , union_task_instance_with_fail as (
31 |         select
32 |             task_id
33 |             , dag_id
34 |             , map_index
35 |             , hostname
36 |             , operator
37 |             , task_pool
38 |             , source_system
39 |         from stg_task_instance
40 |         union all
41 |         select
42 |             task_id
43 |             , dag_id
44 |             , map_index
45 |             , hostname
46 |             , operator
47 |             , task_pool
48 |             , source_system
49 |         from stg_task_fail
50 |     )
51 |     , dedup_dim_task as (
52 |         select
53 |             task_id
54 |             , dag_id
55 |             , map_index
56 |             , hostname
57 |             , operator
58 |             , task_pool
59 |             , source_system
60 |             , row_number() over(
61 |                 partition by 
62 |                     task_id
63 |                     , dag_id
64 |                     , source_system
65 |                 order by 
66 |                     task_id
67 |                     , dag_id
68 |                     , source_system
69 |                 ) as dedup
70 |         from union_task_instance_with_fail
71 |     )
72 |     , dim_task_with_sk as (
73 |         select
74 |             {{ dbt_utils.generate_surrogate_key([
75 |                 'task_id'
76 |                 , 'dag_id']
77 |             ) }} as task_sk
78 |             , task_id
79 |             , dag_id
80 |             , map_index
81 |             , hostname
82 |             , operator
83 |             , task_pool
84 |             , source_system
85 |         from dedup_dim_task
86 |         where dedup = 1
87 |     )
88 | select *
89 | from dim_task_with_sk
90 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter and Publisher
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       - closed
 7 |     branches:
 8 |       - main
 9 | 
10 | 
11 | permissions:
12 |   contents: read
13 | 
14 | jobs:
15 |   new_release:
16 |     if: github.event.pull_request.merged == true
17 |     permissions:
18 |       # write permission is required to create a github release
19 |       contents: write
20 |       # write permission is required for autolabeler
21 |       # otherwise, read permission is required at least
22 |       pull-requests: write
23 |     runs-on: ubuntu-latest
24 | 
25 |     steps:
26 |       - name: Checkout code
27 |         uses: actions/checkout@v2
28 |         with:
29 |           fetch-depth: 0
30 | 
31 |       - name: Get branch name
32 |         id: getbranch
33 |         run: echo ::set-output name=BRANCH::${GITHUB_HEAD_REF}
34 | 
35 |       # ${{ github.ref }} was not giving v* as tag name, but refs/tags/v* instead, so I had to abbreviate it
36 |       - name: Get latest abbreviated tag
37 |         id: gettag
38 |         run: echo ::set-output name=TAG::$(git describe --tags $(git rev-list --tags --max-count=1)) # get the latest tag across all branches and put it in the output TAG
39 | 
40 |       - name: Calculate next version
41 |         id: nextversion
42 |         run: |
43 |           BRANCH_NAME="${{ steps.getbranch.outputs.BRANCH }}"
44 |           CURRENT_VERSION="${{ steps.gettag.outputs.TAG }}"
45 |           IFS='.' read -ra VERSION_PARTS <<< "$CURRENT_VERSION"
46 |           if [[ $BRANCH_NAME =~ ^(major|release|Major|Release)/ ]]; then
47 |             VERSION_PARTS[0]=$((VERSION_PARTS[0] + 1))
48 |             VERSION_PARTS[1]=0
49 |             VERSION_PARTS[2]=0
50 |           elif [[ $BRANCH_NAME =~ ^(feature|minor|Feature|Minor)/ ]]; then
51 |             VERSION_PARTS[1]=$((VERSION_PARTS[1] + 1))
52 |             VERSION_PARTS[2]=0
53 |           elif [[ $BRANCH_NAME =~ ^(patch|fix|hotfix|bugfix|Patch|Fix|Hotfix|Bugfix)/ ]]; then
54 |             VERSION_PARTS[2]=$((VERSION_PARTS[2] + 1))
55 |           fi
56 |           NEXT_VERSION="${VERSION_PARTS[0]}.${VERSION_PARTS[1]}.${VERSION_PARTS[2]}"
57 |           echo ::set-output name=NEXT_VERSION::"$NEXT_VERSION"
58 | 
59 |       - name: Create and publish new tag
60 |         run: |
61 |           git tag ${{ steps.nextversion.outputs.NEXT_VERSION }}
62 |           git push origin ${{ steps.nextversion.outputs.NEXT_VERSION }}
63 | 
64 |       - uses: release-drafter/release-drafter@v5
65 |         with:
66 |           commitish: main
67 |           name: "dbt-dag-monitoring ${{ steps.nextversion.outputs.NEXT_VERSION }}"
68 |           tag: ${{ steps.nextversion.outputs.NEXT_VERSION }}
69 |           publish: true
70 |         env:
71 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/adf/adf_activity_runs.csv:
--------------------------------------------------------------------------------
 1 | activityRunEnd,activityName,activityRunStart,activityType,durationInMs,retryAttempt,error_errorCode,error_message,error_failureType,error_target,activityRunId,linkedServiceName,pipelineName,pipelineRunId,status,output_effectiveIntegrationRuntime,input_source_type
 2 | 2024-08-20T03:30:10.9735549Z,Set CurrentDate,2024-08-20T03:30:10.742531Z,SetVariable,231,,,,,Set CurrentDate,f653c43a-6508-42f8-8467-0e10152aa3f9,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
 3 | 2024-08-20T03:30:11.5387841Z,Set Timestamp,2024-08-20T03:30:11.2745768Z,SetVariable,264,,,,,Set Timestamp,b8c48c2f-b0e6-45f0-a502-cee31dffba2e,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
 4 | 2024-08-20T03:38:09.0201429Z,Restart LoadingControl,2024-08-20T03:30:11.9547107Z,DatabricksNotebook,477065,,,,,Restart LoadingControl,af8e3927-c2e3-4c54-9b07-b4c0df7d6564,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),
 5 | 2024-08-20T03:39:13.7734401Z,Get All Tables,2024-08-20T03:38:10.4390219Z,Lookup,63334,,,,,Get All Tables,8df489d3-f7d3-4462-9080-6e5557e78638,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),AzureDatabricksDeltaLakeSource
 6 | 2024-08-20T03:42:27.2972053Z,For Each Tables,2024-08-20T03:39:15.8346054Z,ForEach,191462,,,,,For Each Tables,35a4c708-cda1-470e-b202-ae76aa743c0d,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
 7 | 2024-08-20T03:39:16.8973044Z,Set CurrentDate foreach,2024-08-20T03:39:16.6507636Z,SetVariable,246,,,,,Set CurrentDate foreach,913afaa0-b40b-4c8c-b95a-48011c5e0e1c,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
 8 | 2024-08-20T03:39:16.907916Z,Set CurrentDate foreach,2024-08-20T03:39:16.652598Z,SetVariable,255,,,,,Set CurrentDate foreach,9316bfbf-e4e8-4c3a-a214-474524a71eac,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
 9 | 2024-08-20T03:39:16.8983084Z,Set CurrentDate foreach,2024-08-20T03:39:16.6742498Z,SetVariable,224,,,,,Set CurrentDate foreach,98aaf33e-86eb-4b32-98c4-7af526d677c5,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
10 | 2024-08-20T03:39:16.9416579Z,Set CurrentDate foreach,2024-08-20T03:39:16.673797Z,SetVariable,267,,,,,Set CurrentDate foreach,1d466d96-8210-4f9a-94b9-d25405dae8a7,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,,
11 | 2024-08-20T03:40:37.1476554Z,Update StartDate,2024-08-20T03:39:17.2774453Z,DatabricksNotebook,79870,,,,,Update StartDate,6e608cd1-4444-4061-8384-cb36946508a2,,PL-FEMFILESCSVTODatalakeLandingZone-N,9f81a5eb-a7ca-482e-833e-db6082b73db5,Succeeded,AutoResolveIntegrationRuntime (Brazil South),
12 | 


--------------------------------------------------------------------------------
/models/marts/fact_dag_monitoring_task_instance.sql:
--------------------------------------------------------------------------------
 1 | with
 2 |     dim_dag as (
 3 |         select 
 4 |             dag_id
 5 |             , dag_sk as dag_fk
 6 |         from {{ ref('dim_dag_monitoring_dag') }}
 7 |     )
 8 |     , dim_task as (
 9 |         select
10 |             task_sk as task_fk
11 |             , task_id
12 |             , dag_id
13 |         from {{ ref('dim_dag_monitoring_task') }}
14 |     )
15 |     , util_days as (
16 |         select cast(date_day as date) as date_day
17 |         from {{ ref('dbt_utils_day') }}
18 |     )
19 |     , stg_task_instance as (
20 |         {% for src in var('enabled_sources') -%}
21 |         select 
22 |             task_instance_sk
23 |             , task_id
24 |             , dag_id
25 |             , run_id
26 |             , execution_date
27 |             , execution_start_date
28 |             , execution_end_date
29 |             , duration
30 |             , state_task_instance
31 |             , try_number
32 |             , priority_weight
33 |             , '{{ src }}' as source_system
34 |         from {{ ref('stg_task_instance_' + src) }}
35 |         {% if not loop.last -%} union {% endif -%}
36 |         {% endfor -%}
37 |     )
38 |     , joined as (
39 |         select
40 |             stg_task_instance.task_instance_sk
41 |             , stg_task_instance.task_id
42 |             , stg_task_instance.dag_id
43 |             , stg_task_instance.run_id
44 |             , dim_dag.dag_fk
45 |             , dim_task.task_fk
46 |             , util_days.date_day
47 |             , stg_task_instance.execution_start_date
48 |             , stg_task_instance.execution_end_date
49 |             , stg_task_instance.duration
50 |             , stg_task_instance.state_task_instance
51 |             , stg_task_instance.try_number
52 |             , stg_task_instance.priority_weight
53 |             , stg_task_instance.source_system
54 |         from stg_task_instance
55 |         left join dim_dag on stg_task_instance.dag_id = dim_dag.dag_id
56 |         left join dim_task on 
57 |             stg_task_instance.task_id = dim_task.task_id
58 |             and stg_task_instance.dag_id = dim_task.dag_id
59 |         left join util_days on {{ cast_as_date('stg_task_instance.execution_date') }} = {{ cast_as_date('util_days.date_day') }}
60 |     )
61 |     , surrogate_key as (
62 |         select 
63 |             {{ dbt_utils.generate_surrogate_key([
64 |                 'task_instance_sk'
65 |                 , 'execution_start_date'
66 |                 , 'execution_end_date'
67 |                 , 'run_id']) }} as task_instance_sk
68 |             , dag_fk
69 |             , task_fk
70 |             , date_day as generated_date
71 |             , execution_start_date
72 |             , execution_end_date
73 |             , duration
74 |             , state_task_instance
75 |             , try_number
76 |             , priority_weight
77 |             , source_system
78 |         from joined
79 |     )
80 | select *
81 | from surrogate_key
82 | 


--------------------------------------------------------------------------------
/integration_tests/macros/seed__task_instance.sql:
--------------------------------------------------------------------------------
  1 | {% macro seed__task_instance() %}
  2 | {% set create_table %}
  3 |   CREATE OR REPLACE TABLE `{{ target.database }}`.{{ target.schema }}.task_instance (
  4 |     TASK_ID STRING,
  5 |     DAG_ID STRING,
  6 |     RUN_ID STRING,
  7 |     START_DATE TIMESTAMP,
  8 |     END_DATE TIMESTAMP,
  9 |     DURATION FLOAT64,
 10 |     STATE STRING,
 11 |     TRY_NUMBER INT64,
 12 |     HOSTNAME STRING,
 13 |     UNIXNAME STRING,
 14 |     JOB_ID NUMERIC,
 15 |     POOL STRING,
 16 |     QUEUE STRING,
 17 |     PRIORITY_WEIGHT INT64,
 18 |     OPERATOR STRING,
 19 |     QUEUED_DTTM TIMESTAMP,
 20 |     PID INT64,
 21 |     MAX_TRIES INT64,
 22 |     EXECUTOR_CONFIG STRING,
 23 |     POOL_SLOTS INT64,
 24 |     QUEUED_BY_JOB_ID NUMERIC,
 25 |     EXTERNAL_EXECUTOR_ID NUMERIC,
 26 |     TRIGGER_ID NUMERIC,
 27 |     TRIGGER_TIMEOUT INT64,
 28 |     NEXT_METHOD INT64,
 29 |     NEXT_KWARGS INT64,
 30 |     MAP_INDEX INT64,
 31 |     UPDATED_AT TIMESTAMP
 32 |   );
 33 | 
 34 | {% endset %}
 35 | 
 36 | {% set insert_table %}
 37 | 
 38 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.task_instance (
 39 |     TASK_ID,
 40 |     DAG_ID,
 41 |     RUN_ID,
 42 |     START_DATE,
 43 |     END_DATE,
 44 |     DURATION,
 45 |     STATE,
 46 |     TRY_NUMBER,
 47 |     HOSTNAME,
 48 |     UNIXNAME,
 49 |     JOB_ID,
 50 |     POOL,
 51 |     QUEUE,
 52 |     PRIORITY_WEIGHT,
 53 |     OPERATOR,
 54 |     QUEUED_DTTM,
 55 |     PID,
 56 |     MAX_TRIES,
 57 |     EXECUTOR_CONFIG,
 58 |     POOL_SLOTS,
 59 |     QUEUED_BY_JOB_ID,
 60 |     EXTERNAL_EXECUTOR_ID,
 61 |     TRIGGER_ID,
 62 |     TRIGGER_TIMEOUT,
 63 |     NEXT_METHOD,
 64 |     NEXT_KWARGS,
 65 |     MAP_INDEX,
 66 |     UPDATED_AT
 67 | )
 68 | VALUES
 69 | (
 70 |     'dbt_source_test',
 71 |     'dbt_bitrix',
 72 |     'scheduled__2023-01-12T06:00:00+00:00',
 73 |     TIMESTAMP('2023-12-27T02:30:16.714366Z'),
 74 |     TIMESTAMP('2023-12-21T09:01:57.631415Z'),
 75 |     20.207217,
 76 |     'success',
 77 |     1,
 78 |     'dbtbitrixdb1374e48913fad10b54fdd67',
 79 |     'root',
 80 |     332,
 81 |     'default_pool',
 82 |     'default',
 83 |     17,
 84 |     'DockerOperator',
 85 |     '2024-02-02T11:01:54.071588Z',
 86 |     21,
 87 |     2,
 88 |     '�\u0004}�.',
 89 |     1,
 90 |     201,
 91 |     NULL,
 92 |     NULL,
 93 |     NULL,
 94 |     NULL,
 95 |     NULL,
 96 |     -1,
 97 |     NULL
 98 | ),
 99 | (
100 |     'dump_table1_to_DL',
101 |     'sample_fist',
102 |     'scheduled__2022-02-03T00:00:00+00:00',
103 |     TIMESTAMP('2023-12-21T09:01:57.631415Z'),
104 |     TIMESTAMP('2023-12-23T08:30:25.791135Z'),
105 |     0.700336,
106 |     'success',
107 |     1,
108 |     'samplefistdumpa58b822dbb5f5c21bec',
109 |     'root',
110 |     20,
111 |     'default_pool',
112 |     'default',
113 |     2,
114 |     'BashOperator',
115 |     '2024-02-02T11:02:10.162511Z',
116 |     21,
117 |     1,
118 |     '�\u0004}�.',
119 |     1,
120 |     7,
121 |     NULL,
122 |     NULL,
123 |     NULL,
124 |     NULL,
125 |     NULL,
126 |     -1,
127 |     NULL
128 | );
129 | 
130 | 
131 | {% endset %}
132 | 
133 | {% do run_query(create_table) %}
134 | {% do log("finished creating table task_instance", info=true) %}
135 | 
136 | {% do run_query(insert_table) %}
137 | {% do log("finished insert table task_instance", info=true) %}
138 | 
139 | {% endmacro %}


--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/dag.csv:
--------------------------------------------------------------------------------
 1 | "dag_id","is_paused","is_subdag","is_active","last_parsed_time","last_pickled","last_expired","scheduler_lock","pickle_id","fileloc","owners","description","default_view","schedule_interval","root_dag_id","next_dagrun","next_dagrun_create_after","max_active_tasks","has_task_concurrency_limits","max_active_runs","next_dagrun_data_interval_start","next_dagrun_data_interval_end","has_import_errors","timetable_description","processor_subdir"
 2 | test_docker,false,false,false,2022-12-16 09:35:19.433 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/test_dag.py,Indicium,,grid,"""0 6 * * * """,,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,16,false,16,2022-12-16 03:00:00.000 -0300,2022-12-17 03:00:00.000 -0300,false,At 06:00,
 3 | dbt,true,false,false,2022-11-25 16:12:51.922 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""7/15 9-23 * * *""",,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,16,false,1,2022-02-01 06:07:00.000 -0300,2022-02-01 06:22:00.000 -0300,true,"Every 15 minutes, starting at 7 minutes past the hour, between 09:00 and 23:59",
 4 | enterprise_sync_marketing,true,false,false,2024-03-19 17:41:15.253 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 18 * * *""",,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,16,false,1,2024-03-18 15:00:00.000 -0300,2024-03-19 15:00:00.000 -0300,true,At 18:00,/opt/airflow/dags/b50e21f1f72af1012e31506b48198ba61244fd4f/airflow/dags
 5 | sample_fist,true,false,false,2022-11-25 16:12:51.927 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,"""@daily""",,2022-02-02 21:00:00.000 -0300,,16,false,1,2022-02-02 21:00:00.000 -0300,2022-02-03 21:00:00.000 -0300,true,At 00:00,
 6 | sheets-projects,false,false,false,2023-08-17 16:48:17.895 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/sheets-projects.py,DSaaS,Dag to run xxx pipeline,grid,"""0 6 * * * """,,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,16,false,16,2023-08-17 03:00:00.000 -0300,2023-08-18 03:00:00.000 -0300,true,At 06:00,/opt/airflow/dags/7d473a2591c8679c8529ce9b4600489ee12e7c62/airflow/dags
 7 | dbt_snowflake_enterprise_dsaas,true,false,false,2024-04-16 10:33:35.408 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,airflow,,grid,null,,,,16,false,1,,,false,"Never, external triggers only",/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
 8 | enterprise_sync,true,false,false,2024-04-16 10:33:35.411 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 17 * * *""",,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,16,false,1,2024-04-14 14:00:00.000 -0300,2024-04-15 14:00:00.000 -0300,false,At 17:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
 9 | enterprise_sync_public,true,false,false,2024-04-16 10:33:35.417 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/all_dags.py,DSaaS,,grid,"""0 16 * * *""",,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,16,false,1,2024-04-14 13:00:00.000 -0300,2024-04-15 13:00:00.000 -0300,false,At 16:00,/opt/airflow/dags/172ebb0521cadde76475236c3412f732339c590b/airflow/dags
10 | bitrix_projects_validate_dag,true,false,false,2023-12-19 19:51:48.161 -0300,,,,,/opt/airflow/dags/repo/airflow/dags/bitrix_projects validate_dag.py,DSaaS,Dag to run hours extractions pipeline,grid,"""0 0/12 * * *""",,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,16,false,16,2023-12-18 21:00:00.000 -0300,2023-12-19 09:00:00.000 -0300,false,Every 12 hours,/opt/airflow/dags/fb3710f58ae5f8df430f2bbe40ef211ccebc012b/airflow/dags
11 | 


--------------------------------------------------------------------------------
/integration_tests/seeds/airflow/dag_run.csv:
--------------------------------------------------------------------------------
 1 | "id","dag_id","execution_date","state","run_id","external_trigger","conf","end_date","start_date","run_type","last_scheduling_decision","dag_hash","creating_job_id","queued_at","data_interval_start","data_interval_end","log_template_id","updated_at"
 2 | 87755,airflow_status_monitoring,2024-07-04 10:56:00.000 -0300,success,scheduled__2024-07-04T13:56:00+00:00,false,�\u0004}�.,2024-07-04 10:57:15.119 -0300,2024-07-04 10:57:00.256 -0300,scheduled,2024-07-04 10:57:15.109 -0300,"2606cfccb8540961ee80c09fe32dcc8d",110841,2024-07-04 10:57:00.210 -0300,2024-07-04 10:56:00.000 -0300,2024-07-04 10:57:00.000 -0300,1,2024-07-04 10:57:15.120 -0300
 3 | 84260,dag_tags_rbac,2024-07-02 05:30:00.000 -0300,success,scheduled__2024-07-02T08:30:00+00:00,false,�\u0004}�.,2024-07-02 06:01:20.844 -0300,2024-07-02 06:00:00.624 -0300,scheduled,2024-07-02 06:01:20.836 -0300,"3616896069a7d5a3b40f4478372f03da",110841,2024-07-02 06:00:00.550 -0300,2024-07-02 05:30:00.000 -0300,2024-07-02 06:00:00.000 -0300,1,2024-07-02 06:01:20.848 -0300
 4 | 84507,airflow_status_monitoring,2024-07-02 09:40:00.000 -0300,success,scheduled__2024-07-02T12:40:00+00:00,false,�\u0004}�.,2024-07-02 09:41:17.534 -0300,2024-07-02 09:41:00.866 -0300,scheduled,2024-07-02 09:41:17.528 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:41:00.832 -0300,2024-07-02 09:40:00.000 -0300,2024-07-02 09:41:00.000 -0300,1,2024-07-02 09:41:17.536 -0300
 5 | 84775,airflow_status_monitoring,2024-07-02 13:48:00.000 -0300,success,scheduled__2024-07-02T16:48:00+00:00,false,�\u0004}�.,2024-07-02 13:49:15.733 -0300,2024-07-02 13:49:00.347 -0300,scheduled,2024-07-02 13:49:15.728 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 13:49:00.289 -0300,2024-07-02 13:48:00.000 -0300,2024-07-02 13:49:00.000 -0300,1,2024-07-02 13:49:15.742 -0300
 6 | 84508,airflow_status_monitoring,2024-07-02 09:41:00.000 -0300,success,scheduled__2024-07-02T12:41:00+00:00,false,�\u0004}�.,2024-07-02 09:42:14.676 -0300,2024-07-02 09:42:00.647 -0300,scheduled,2024-07-02 09:42:14.666 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:42:00.591 -0300,2024-07-02 09:41:00.000 -0300,2024-07-02 09:42:00.000 -0300,1,2024-07-02 09:42:14.678 -0300
 7 | 84509,airflow_status_monitoring,2024-07-02 09:42:00.000 -0300,success,scheduled__2024-07-02T12:42:00+00:00,false,�\u0004}�.,2024-07-02 09:43:16.490 -0300,2024-07-02 09:43:00.343 -0300,scheduled,2024-07-02 09:43:16.485 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:43:00.262 -0300,2024-07-02 09:42:00.000 -0300,2024-07-02 09:43:00.000 -0300,1,2024-07-02 09:43:16.491 -0300
 8 | 84346,airflow_status_monitoring,2024-07-02 07:14:00.000 -0300,success,scheduled__2024-07-02T10:14:00+00:00,false,�\u0004}�.,2024-07-02 07:15:17.620 -0300,2024-07-02 07:15:00.566 -0300,scheduled,2024-07-02 07:15:17.614 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 07:15:00.528 -0300,2024-07-02 07:14:00.000 -0300,2024-07-02 07:15:00.000 -0300,1,2024-07-02 07:15:17.621 -0300
 9 | 84510,airflow_status_monitoring,2024-07-02 09:43:00.000 -0300,success,scheduled__2024-07-02T12:43:00+00:00,false,�\u0004}�.,2024-07-02 09:44:16.026 -0300,2024-07-02 09:44:00.704 -0300,scheduled,2024-07-02 09:44:16.020 -0300,cc1ebde1fd1efe890fd5100206099694,110841,2024-07-02 09:44:00.665 -0300,2024-07-02 09:43:00.000 -0300,2024-07-02 09:44:00.000 -0300,1,2024-07-02 09:44:16.027 -0300
10 | 84776,bitrix_refresh_access_token,2024-07-02 13:00:00.000 -0300,success,scheduled__2024-07-02T16:00:00+00:00,false,�\u0004}�.,2024-07-02 13:50:19.597 -0300,2024-07-02 13:50:00.487 -0300,scheduled,2024-07-02 13:50:19.592 -0300,b71d3629e5a26934dd20b6e9a3335f84,110841,2024-07-02 13:50:00.361 -0300,2024-07-02 13:00:00.000 -0300,2024-07-02 13:50:00.000 -0300,1,2024-07-02 13:50:19.599 -0300
11 | 


--------------------------------------------------------------------------------
/macros/model_task_instance_databricks_workflow.sql:
--------------------------------------------------------------------------------
 1 | {% macro model_task_instance_databricks_workflow() -%}
 2 |     {{ return(adapter.dispatch('model_task_instance_databricks_workflow')()) }}
 3 | {%- endmacro %}
 4 | 
 5 | 
 6 | {% macro default__model_task_instance_databricks_workflow() -%}
 7 |     with
 8 |         flatten_data as (
 9 |             select
10 |                 job_runs.job_id
11 |                 , job_runs.inserteddate as inserted_date
12 |                 , exploded_tasks.*
13 |             from
14 |                 {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs
15 |                 {{ flatten_data('tasks') }} as exploded_tasks
16 |         ) 
17 |         , renamed as (
18 |             select 
19 |                 {{ cast_as_string("flatten_data.task_key") }} as task_id
20 |                 , {{ cast_as_string("flatten_data.job_id") }} as dag_id
21 |                 , {{ cast_as_string("flatten_data.run_id") }} as run_id
22 |                 , {{cast_as_timestamp('flatten_data.start_time')}} as execution_date
23 |                 , {{cast_as_timestamp('flatten_data.start_time')}} as execution_start_date
24 |                 , {{cast_as_timestamp('flatten_data.end_time')}} as execution_end_date
25 |                 , (flatten_data.execution_duration / 1000) as duration
26 |                 , {{replace_dot_for_colon('state','result_state')}} as state_task_instance
27 |                 , attempt_number as try_number
28 |                 , {{replace_dot_for_colon('notebook_task','notebook_path')}} as hostname
29 |                 , 'not_implemented_for_databricks_workflow' as task_pool
30 |                 , 'not_implemented_for_databricks_workflow' as priority_weight
31 |                 , case 
32 |                     when {{replace_dot_for_colon('notebook_task','notebook_path')}} is not null then
33 |                         {{replace_dot_for_colon('notebook_task','notebook_path')}} 
34 |                     else flatten_data.task_key
35 |                 end as operator
36 |                 , 'not_implemented_for_databricks_workflow' as map_index
37 |             from flatten_data
38 |         )
39 |     select 
40 |         {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
41 |         , * 
42 |     from renamed
43 | {%- endmacro %}
44 | 
45 | {% macro snowflake__model_task_instance_databricks_workflow() -%}
46 |     with
47 |         flatten_data as (
48 |             select *
49 |             from
50 |                 {{ source('raw_databricks_workflow_monitoring', 'job_runs') }} as job_runs
51 |                 {{ flatten_data('"tasks"') }} as exploded_tasks
52 |         ) 
53 |         , renamed as (
54 |             select 
55 |                 {{ cast_as_string("value:task_key") }} as task_id
56 |                 , {{ cast_as_string("job_id") }} as dag_id
57 |                 , {{ cast_as_string("run_id") }} as run_id
58 |                 , {{cast_as_timestamp('start_time')}} as execution_date
59 |                 , {{cast_as_timestamp('start_time')}} as execution_start_date
60 |                 , {{cast_as_timestamp('end_time')}} as execution_end_date
61 |                 , (execution_duration / 1000) as duration
62 |                 , {{replace_dot_for_colon('state','result_state')}} as state_task_instance
63 |                 , {{replace_dot_for_colon('value','attempt_number')}} as try_number
64 |                 , {{replace_dot_for_colon('value','notebook_task.notebook_path')}} as hostname
65 |                 , 'not_implemented_for_databricks_workflow' as task_pool
66 |                 , 'not_implemented_for_databricks_workflow' as priority_weight
67 |                 , case 
68 |                     when {{replace_dot_for_colon('value','notebook_task.notebook_path')}} is not null then
69 |                         {{replace_dot_for_colon('value','notebook_task.notebook_path')}} 
70 |                     else {{replace_dot_for_colon('value','task_key')}}
71 |                 end as operator
72 |                 , 'not_implemented_for_databricks_workflow' as map_index
73 |             from flatten_data
74 |         )
75 |     select 
76 |         {{ dbt_utils.generate_surrogate_key(['task_id', 'dag_id', 'run_id']) }} as task_instance_sk
77 |         , * 
78 |     from renamed
79 | {%- endmacro %}
80 | 


--------------------------------------------------------------------------------
/models/staging/adf_sources/source.yml:
--------------------------------------------------------------------------------
  1 | version: 2
  2 | 
  3 | sources:
  4 |   - name: raw_adf_monitoring
  5 |     description: " Raw data extracted from ADF for ADF monitoring analysis."
  6 |     database: "{{ var('dag_monitoring_adf_database', '')}}"
  7 |     schema: "{{ var('dag_monitoring_adf_schema',  '') }}"
  8 |     tables:
  9 |       - name: adf_pipeline_runs
 10 |         description: "table that contains data from ADF pipeline runs."
 11 |         columns:
 12 |           - name: id
 13 |             description: "Table id."
 14 |             tests:
 15 |               - not_null
 16 |               - unique
 17 | 
 18 |           - name: pipelineName
 19 |             description: " Name of the pipeline of the dag run."
 20 |             tests:
 21 |               - not_null
 22 | 
 23 |           - name: runStart
 24 |             description: "Execution date"
 25 | 
 26 |           - name: status
 27 |             description: "DAG run state."
 28 | 
 29 |           - name: invokedBy
 30 |             description: "Points if the DAG run was triggered externally (True / False)."
 31 | 
 32 |           - name: runStart
 33 |             description: "Date and time when the DAG run started."
 34 | 
 35 |           - name: runEnd
 36 |             description: "Date and time when the DAG run ended."
 37 | 
 38 |           - name: durationInMs
 39 |             description: "DAG duration in Milliseconds"
 40 | 
 41 |       - name: adf_activity_runs
 42 |         description: "table that contains data from ADF activity runs."
 43 |         columns:
 44 |           - name: activityRunId
 45 |             description: "task id."
 46 |             tests:
 47 |               - not_null
 48 | 
 49 |           - name: pipelineName
 50 |             description: "Pipeline id to which this activity belongs."
 51 |             tests:
 52 |               - not_null
 53 | 
 54 |           - name: pipelineRunId
 55 |             description: "Pipeline execution id to which this activity belongs."
 56 |             tests:
 57 |               - not_null
 58 | 
 59 |           - name: activityRunStart
 60 |             description: " Date and time when the execution started."
 61 | 
 62 |           - name: activityRunEnd
 63 |             description: "Date and time when the execution ended."
 64 | 
 65 |           - name: durationInMs
 66 |             description: "Duration of the execution in Milliseconds."
 67 | 
 68 |           - name: map_index
 69 |             description: "Mapping index"
 70 | 
 71 |       - name: adf_pipelines
 72 |         description: "Table that contains information about ADF pipelines."
 73 |         columns:
 74 |           - name: id
 75 |             description: "table id."
 76 |             tests:
 77 |               - not_null
 78 |               - unique
 79 | 
 80 |           - name: is_paused
 81 |             description: "If the dag is paused."
 82 | 
 83 |           - name: is_active
 84 |             description: "If the DAG is active."
 85 | 
 86 |           - name: description
 87 |             description: "DAG description"
 88 | 
 89 |           - name: fileloc
 90 |             description: "File path that needs to be imported to load this DAG."
 91 | 
 92 |           - name: owners
 93 |             description: "DAG owner."
 94 | 
 95 |           - name: timetable_description
 96 |             description: "Description of the scheduling table"
 97 | 
 98 |           - name: ind_extraction_date
 99 |             description: "Date of extraction of the table"
100 | 
101 |       - name: adf_triggers
102 |         description: "Table that contains information about ADF triggers."
103 |         columns:
104 |           - name: id
105 |             description: "Identification of the table."
106 |             tests:
107 |               - not_null
108 |               - unique
109 | 
110 |           - name: properties.runtimeState
111 |             description: "If the trigger is active or not."
112 | 
113 |           - name: properties.annotations
114 |             description: "Annotations in the trigger."
115 | 
116 |           - name: properties.pipelines
117 |             description: "Pipelines that are executed by this trigger."
118 | 
119 |           - name: properties.typeProperties.recurrence.frequency
120 |             description: "Frequency with which the pipeline is executed e.g Hour, Day, Week, Month"
121 |           
122 |           - name: properties.typeProperties.recurrence.interval
123 |             description: "In how many 'frequency' this trigger is executed e.g 1 Day, 2 Week, being 1 and 2 the interval"
124 |           
125 |           - name: properties.typeProperties.recurrence.schedule
126 |             description: "Scheduling defined by the table"
127 | 
128 |           - name: properties.typeProperties.recurrence.startTime
129 |             description: "First execution"
130 | 
131 |           - name: properties.typeProperties.recurrence.timeZone
132 |             description: "Time zone of the trigger"
133 |     
134 |       - name: dbt_utils_day
135 |         description: "Table that contains data from the dates created from the dbt_utils macro."
136 | 


--------------------------------------------------------------------------------
/models/staging/airflow_sources/source.yml:
--------------------------------------------------------------------------------
  1 | version: 2
  2 | 
  3 | sources:
  4 |   - name: raw_airflow_monitoring
  5 |     description: "Raw data extracted from Airflow for Airflow monitoring analysis."
  6 |     database: "{{ var('dag_monitoring_airflow_database', '')}}"
  7 |     schema: "{{ var('dag_monitoring_airflow_schema',  '') }}"
  8 |     tables:
  9 |       - name: dag_run
 10 |         description: "Table that contains data from Airflow DAG runs."
 11 |         columns:
 12 |           - name: id
 13 |             description: "Id of the table" 
 14 |             tests:
 15 |               - not_null
 16 |               - unique
 17 | 
 18 |           - name: dag_id
 19 |             description: "Id of the dag run."
 20 |             tests:
 21 |               - not_null
 22 | 
 23 |           - name: execution_date
 24 |             description: "Date of execution."
 25 | 
 26 |           - name: state
 27 |             description: "state of the DAG run."
 28 | 
 29 |           - name: external_trigger
 30 |             description: "Points if the DAG run was triggered externally (True / False)."
 31 | 
 32 |           - name: start_date
 33 |             description: "Date and time when the DAG run started."
 34 | 
 35 |           - name: end_date
 36 |             description: "Date and time when the DAG run ended."
 37 | 
 38 |           - name: run_type
 39 |             description: "Type of DAG run."
 40 | 
 41 |       - name: task_instance
 42 |         description: "Table that contains data from Airflow task instances."
 43 |         columns:
 44 |           - name: task_id
 45 |             description: "Id of the executed task"
 46 |             tests:
 47 |               - not_null
 48 | 
 49 |           - name: dag_id
 50 |             description: "Identification of the dag."
 51 |             tests:
 52 |               - not_null
 53 |           
 54 |           - name: run_id
 55 |             description: "Identification of the run."
 56 | 
 57 |           - name: start_date
 58 |             description: " Date and time when the execution started."
 59 | 
 60 |           - name: end_date
 61 |             description: "Date and time when the execution ended."
 62 | 
 63 |           - name: duration
 64 |             description: "Duration of the execution in seconds."
 65 | 
 66 |           - name: state
 67 |             description: "The state of the task execution."
 68 | 
 69 |           - name: try_number
 70 |             description: "Number of execution attempts."
 71 | 
 72 |           - name: hostname
 73 |             description: "Task hostname."
 74 | 
 75 |           - name: pool
 76 |             description: "The airflow pool in which the task should be executed."
 77 | 
 78 |           - name: priority_weight
 79 |             description: "Priority of the task."
 80 | 
 81 |           - name: operator
 82 |             description: "Task model operator."
 83 | 
 84 |           - name: queue
 85 |             description: "Task queue."
 86 | 
 87 |           - name: pool_slots
 88 |             description: "Pool slots quantity."
 89 | 
 90 |           - name: map_index
 91 |             description: "Mapping index."
 92 | 
 93 |       - name: task_fail
 94 |         description: "Table that contains data from Airflow tasks with failures."
 95 |         columns:
 96 |           - name: id
 97 |             description: "Table id."
 98 |             tests:
 99 |               - not_null
100 |               - unique
101 | 
102 |           - name: task_id
103 |             description: "Task id."
104 |             tests:
105 |               - not_null
106 | 
107 |           - name: dag_id
108 |             description: "Dag id."
109 |             tests:
110 |               - not_null
111 | 
112 |           - name: start_date
113 |             description: "Date and time when the execution started."
114 | 
115 |           - name: end_date
116 |             description: "Date and time when the execution ended."
117 | 
118 |           - name: duration
119 |             description: "Duration of the execution in seconds."
120 | 
121 |           - name: map_index
122 |             description: "Mapping index"
123 | 
124 |       - name: dag
125 |         description: "Table that contains information about Airflow DAGs."
126 |         columns:
127 |           - name: dag_id
128 |             description: "Dag id."
129 |             tests:
130 |               - not_null
131 |               - unique
132 | 
133 |           - name: is_paused
134 |             description: "If the dag is paused."
135 | 
136 |           - name: is_active
137 |             description: "If the DAG is active."
138 | 
139 |           - name: description
140 |             description: "DAG description."
141 | 
142 |           - name: fileloc
143 |             description: "File path that needs to be imported to load this DAG."
144 | 
145 |           - name: owners
146 |             description: "DAG owner."
147 | 
148 |           - name: timetable_description
149 |             description: "Description of the scheduling table."
150 | 
151 |       - name: dbt_utils_day
152 |         description: "Table that contains data from dates created from the dbt_utils macro."
153 | 


--------------------------------------------------------------------------------
/models/calendar/dim_dag_monitoring_dates.sql:
--------------------------------------------------------------------------------
  1 | {% set end_date_query %}
  2 | select {{ date_add("year", "100", "current_date()") }} 
  3 | {% endset %}
  4 | 
  5 | {% if execute %}
  6 |     {%set end_date = run_query(end_date_query).columns[0].values()[0] %}
  7 |     {% else %}
  8 |     {% set end_date = ' ' %}
  9 | {% endif %}
 10 | 
 11 | /* generating dates using a dbt-utils macro */
 12 | with
 13 |     dates_raw as (
 14 |     {{ dbt_utils.date_spine(
 15 |         datepart="day",
 16 |         start_date="cast('1970-01-01' as date)",
 17 |         end_date="cast('" ~ end_date ~ "' as date)"
 18 |         )
 19 |     }}
 20 | )
 21 | 
 22 | /* extracting some date information*/
 23 |     , days_info as (
 24 |         select
 25 |             cast(date_day as date) as date_day
 26 |             , extract(DAYOFWEEK from date_day) as week_day
 27 |             , extract(month from date_day) as month_number
 28 |             , extract(quarter from date_day) as quarter_number
 29 |             , {{ day_of_year("date_day") }} as day_of_year
 30 |             , extract(year from date_day) as year_date
 31 |             , {{ month_day('date_day') }} as month_day
 32 |         from dates_raw
 33 |     )
 34 | 
 35 | /**/
 36 |     , days_named as (
 37 |         select
 38 |             *
 39 |             , {{ day_of_week('week_day') }}
 40 |             , case
 41 |                 when month_number = 1 then 'January'
 42 |                 when month_number = 2 then 'February'
 43 |                 when month_number = 3 then 'March'
 44 |                 when month_number = 4 then 'April'
 45 |                 when month_number = 5 then 'May'
 46 |                 when month_number = 6 then 'June'
 47 |                 when month_number = 7 then 'July'
 48 |                 when month_number = 8 then 'August'
 49 |                 when month_number = 9 then 'September'
 50 |                 when month_number = 10 then 'October'
 51 |                 when month_number = 11 then 'November'
 52 |                 else 'December'
 53 |             end as month_name
 54 |             , case
 55 |                 when month_number = 1 then 'Jan'
 56 |                 when month_number = 2 then 'Feb'
 57 |                 when month_number = 3 then 'Mar'
 58 |                 when month_number = 4 then 'Apr'
 59 |                 when month_number = 5 then 'May'
 60 |                 when month_number = 6 then 'Jun'
 61 |                 when month_number = 7 then 'Jul'
 62 |                 when month_number = 8 then 'Aug'
 63 |                 when month_number = 9 then 'Sep'
 64 |                 when month_number = 10 then 'Oct'
 65 |                 when month_number = 11 then 'Nov'
 66 |                 else 'Dec'
 67 |             end as month_short
 68 |             , case
 69 |                 when quarter_number = 1 then '1º quarter'
 70 |                 when quarter_number = 2 then '2º quarter'
 71 |                 when quarter_number = 3 then '3º quarter'
 72 |                 else '4º quarter'
 73 |             end as quarter_name
 74 |             , case
 75 |                 when quarter_number in(1,2) then 1
 76 |                 else 2
 77 |             end as semester
 78 |             , case
 79 |                 when quarter_number in(1,2) then '1º Semester'
 80 |                 else '2º Semester'
 81 |             end as semester_name
 82 |         from days_info
 83 |     )
 84 | 
 85 |     , flags_cte as (
 86 |         /*flags related to holidays and business days*/
 87 |         select
 88 |             *
 89 |             , case
 90 |                 when month_day = '01-01' then true
 91 |                 when month_day = '21-04' then true
 92 |                 when month_day = '01-05' then true
 93 |                 when month_day = '07-09' then true
 94 |                 when month_day = '12-10' then true
 95 |                 when month_day = '02-11' then true
 96 |                 when month_day = '15-11' then true
 97 |                 when month_day = '25-12' then true
 98 |                 else false
 99 |             end as fl_holiday
100 |             , case
101 |                 when week_day in(6, 0) then false
102 |                 when month_day = '01-01' then false
103 |                 when month_day = '21-04' then false
104 |                 when month_day = '01-05' then false
105 |                 when month_day = '07-09' then false
106 |                 when month_day = '12-10' then false
107 |                 when month_day = '02-11' then false
108 |                 when month_day = '15-11' then false
109 |                 when month_day = '25-12' then false
110 |                 else true
111 |             end as fl_business_day
112 |             , coalesce(week_day in(6, 0), false) as fl_weekends
113 |         from days_named
114 |     )
115 | 
116 | /* reorganizing the columns */
117 |     , final_cte as (
118 |         select
119 |             date_day
120 |             , week_day
121 |             , name_of_day
122 |             , month_number
123 |             , month_name
124 |             , month_short
125 |             , quarter_number
126 |             , quarter_name
127 |             , semester
128 |             , semester_name
129 |             , fl_holiday
130 |             , fl_business_day
131 |             , fl_weekends
132 |             , day_of_year
133 |             , year_date
134 |         from flags_cte
135 |     )
136 | 
137 | select *
138 | from final_cte


--------------------------------------------------------------------------------
/integration_tests/macros/adf_pipeline_runs.sql:
--------------------------------------------------------------------------------
  1 | {% macro adf_pipeline_runs() -%}
  2 |     {{ return(adapter.dispatch('adf_pipeline_runs')()) }}
  3 | {%- endmacro %}
  4 | 
  5 | {%- macro default__adf_pipeline_runs() -%}
  6 | {% set create_table %}
  7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs(
  8 |     id STRING,
  9 |     runId STRING,
 10 |     debugRunId STRING,
 11 |     runGroupId STRING,
 12 |     pipelineName STRING,
 13 |     parameters STRUCT<
 14 |         ENVIRONMENT STRING,
 15 |         RESET_TYPE STRING,
 16 |         DAYS_BEFORE STRING
 17 |     >,
 18 |     invokedBy STRUCT<
 19 |         id STRING,
 20 |         name STRING,
 21 |         invokedByType STRING,
 22 |         pipelineName STRING,
 23 |         pipelineRunId STRING
 24 |     >,
 25 |     runStart TIMESTAMP,
 26 |     runEnd TIMESTAMP,
 27 |     durationInMs BIGINT,
 28 |     status STRING,
 29 |     message STRING,
 30 |     pipelineReturnValue MAP<STRING, STRING>,
 31 |     lastUpdated TIMESTAMP,
 32 |     annotations ARRAY<STRING>,
 33 |     runDimension MAP<STRING, STRING>,
 34 |     isLatest BOOLEAN
 35 | );
 36 | {% endset %}
 37 | 
 38 | {% set insert_table %}
 39 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs
 40 | VALUES 
 41 | (
 42 |     '/SUBSCRIPTIONS/9F075ORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5',
 43 |     '9f81a5eb-db6082b73db5',
 44 |     NULL,
 45 |     '9f81a5eb-a7c82b73db5',
 46 |     'PL-FEMFILESCSLandingZone-N',
 47 |     NAMED_STRUCT(
 48 |         'ENVIRONMENT', 'FILESCSV',
 49 |         'RESET_TYPE', 'FULL',
 50 |         'DAYS_BEFORE', NULL
 51 |     ),
 52 |     NAMED_STRUCT(
 53 |         'id', 'cf613b7b-e0c4cfbe8',
 54 |         'name', 'cf613b7b-e0ce924dc0cfbe8',
 55 |         'invokedByType', 'PipelineActivity',
 56 |         'pipelineName', 'PL-OrquestraZeroLoadingControl',
 57 |         'pipelineRunId', '598ec8ee-6e049c4b8c558'
 58 |     ),
 59 |     '2024-08-20T03:30:06.6061079Z',
 60 |     '2024-08-20T03:47:03.9865228Z',
 61 |     1017380,
 62 |     'Succeeded',
 63 |     NULL,
 64 |     MAP(),
 65 |     '2024-08-20T03:47:03.9879388Z',
 66 |     ARRAY(),
 67 |     MAP(),
 68 |     true
 69 | ),
 70 | (
 71 |     '/SUBSCRIPTIONS/9//pipelinerua1-9313-73fa5c0a3f0e',
 72 |     '64c7a8c7--73fa5c0a3f0e',
 73 |     NULL,
 74 |     '64c7a8c7-30e23fa5c0a3f0e',
 75 |     'NET_REC_DAILY_PRICE_UPDATE',
 76 |     NAMED_STRUCT(
 77 |         'ENVIRONMENT', NULL,
 78 |         'RESET_TYPE', NULL,
 79 |         'DAYS_BEFORE', '1'
 80 |     ),
 81 |     NAMED_STRUCT(
 82 |         'id', '0858477451681969CU22',
 83 |         'name', 'NET_REC_DAILY_UPDATE',
 84 |         'invokedByType', 'ScheduleTrigger',
 85 |         'pipelineName', NULL,
 86 |         'pipelineRunId', NULL
 87 |     ),
 88 |     '2024-08-20T12:00:31.2728264Z',
 89 |     '2024-08-20T13:15:52.6545498Z',
 90 |     4521381,
 91 |     'Succeeded',
 92 |     NULL,
 93 |     MAP(),
 94 |     '2024-08-20T13:15:52.6550273Z',
 95 |     ARRAY(
 96 |         'ted'
 97 |     ),
 98 |     MAP(),  
 99 |     true
100 | );
101 | {% endset %}
102 | 
103 | {% do run_query(create_table) %}
104 | {% do log("finished creating table adf_pipeline_runs", info=true) %}
105 | 
106 | {% do run_query(insert_table) %}
107 | {% do log("finished insert table adf_pipeline_runs", info=true) %}
108 | {%- endmacro -%}
109 | 
110 | 
111 | {%- macro bigquery__adf_pipeline_runs() -%}
112 | {% set create_table %}
113 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_pipeline_runs(
114 |     id STRING,
115 |     runId STRING,
116 |     debugRunId STRING,
117 |     runGroupId STRING,
118 |     pipelineName STRING,
119 |     parameters STRUCT<
120 |         ENVIRONMENT STRING,
121 |         RESET_TYPE STRING,
122 |         DAYS_BEFORE STRING
123 |     >,
124 |     invokedBy STRUCT<
125 |         id STRING,
126 |         name STRING,
127 |         invokedByType STRING,
128 |         pipelineName STRING,
129 |         pipelineRunId STRING
130 |     >,
131 |     runStart TIMESTAMP,
132 |     runEnd TIMESTAMP,
133 |     durationInMs BIGINT,
134 |     status STRING,
135 |     message STRING,
136 |     pipelineReturnValue ARRAY<STRUCT<key STRING, value STRING>>,
137 |     lastUpdated TIMESTAMP,
138 |     annotations ARRAY<STRING>,
139 |     runDimension ARRAY<STRUCT<key STRING, value STRING>>,
140 |     isLatest BOOLEAN
141 | );
142 | 
143 | {% endset %}
144 | 
145 | {% set insert_table %}
146 | INSERT INTO `{{ target.database }}.{{ target.schema }}.adf_pipeline_runs`
147 | VALUES 
148 | (
149 |     '/SUBSCRIPTIONS/9FFACTORIES/TLO-DATASTUDIO-ADF-D/pipe082b73db5',
150 |     '9f81a5eb-db6082b73db5',
151 |     NULL,
152 |     '9f81a5eb-a73e-db6082b73db5',
153 |     'PL-FEMFILESCSingZone-N',
154 |     STRUCT(
155 |         'FILESCSV' AS ENVIRONMENT,
156 |         'FULL' AS RESET_TYPE,
157 |         NULL AS DAYS_BEFORE
158 |     ),
159 |     STRUCT(
160 |         'cf613b7b-e04dc0cfbe8' AS id,
161 |         'cf613b7b-e0c4924dc0cfbe8' AS name,
162 |         'PipelineActivity' AS invokedByType,
163 |         'PL-OrquestradorSooLoadingControl' AS pipelineName,
164 |         '598ec8ee-604c-47c7-a3c0-e049c4b8c558' AS pipelineRunId
165 |     ),
166 |     TIMESTAMP('2024-08-20T03:30:06.606107Z'),
167 |     TIMESTAMP('2024-08-20T03:47:03.986522Z'),
168 |     1017380,
169 |     'Succeeded',
170 |     NULL,
171 |     ARRAY<STRUCT<key STRING, value STRING>>[],
172 |     TIMESTAMP('2024-08-20T03:47:03.987938Z'),
173 |     ARRAY<STRING>[],
174 |     ARRAY<STRUCT<key STRING, value STRING>>[],
175 |     TRUE
176 | ),
177 | (
178 |     '/SUBSCRIPTIONS/9/PROVIDERS/MO-ADF-D/pipelinerua1-9313-73fa5c0a3f0e',
179 |     '64c7a8c7-30313-73fa5c0a3f0e',
180 |     NULL,
181 |     '64c7a8c7-30313-73fa5c0a3f0e',
182 |     'NET_REC_DAILY_PRICE_UPDATE',
183 |     STRUCT(
184 |         NULL AS ENVIRONMENT,
185 |         NULL AS RESET_TYPE,
186 |         '1' AS DAYS_BEFORE
187 |     ),
188 |     STRUCT(
189 |         '08584774516819036014561066769CU22' AS id,
190 |         'NET_REC_DAILY_UPDATE' AS name,
191 |         'ScheduleTrigger' AS invokedByType,
192 |         NULL AS pipelineName,
193 |         NULL AS pipelineRunId
194 |     ),
195 |     TIMESTAMP('2024-08-20T12:00:31.272826Z'),
196 |     TIMESTAMP('2024-08-20T13:15:52.654549Z'),
197 |     4521381,
198 |     'Succeeded',
199 |     NULL,
200 |     ARRAY<STRUCT<key STRING, value STRING>>[],
201 |     TIMESTAMP('2024-08-20T13:15:52.655027Z'),
202 |     ARRAY<STRING>['ted'],
203 |     ARRAY<STRUCT<key STRING, value STRING>>[],
204 |     TRUE
205 | );
206 | 
207 | 
208 | {% endset %}
209 | 
210 | {% do run_query(create_table) %}
211 | {% do log("finished creating table adf_pipeline_runs", info=true) %}
212 | 
213 | {% do run_query(insert_table) %}
214 | {% do log("finished insert table adf_pipeline_runs", info=true) %}
215 | {%- endmacro -%}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dag Monitoring
  2 | This package allows you to easily monitor your DAGs from well known orchestration tools, providing helpful info to improve your data pipeline.
  3 | 
  4 | # Table of Contents
  5 | 
  6 | - [Before creating a branch](#Before-creating-a-branch)
  7 | - [Revisions](#revisions)
  8 |   - [Tools supported](#tools-supported)
  9 | - [Quickstart](#:running:-Quickstart)
 10 |   - [requirements](#requirements)
 11 |   - [Profiles](#profiles)
 12 |   - [Installation](#installation)
 13 |   - [Configuring models package](#Configuring-models-package)
 14 |   - [Airflow metadata](#Airflow-metadata)
 15 |   - [ADF metadata](#ADF-metadata)
 16 |   - [Databricks Workflow Data](#Databricks-Workflow-Data)
 17 |   - [Integration tests](#Integration-tests)
 18 | 
 19 | # Before creating a branch
 20 | 
 21 | Pay attention, it is very important to know if your modification to this repository is a release/major (breaking changes), a feature/minor (functionalities) or a patch(to fix bugs). With that information, create your branch name like this:
 22 | 
 23 | - `release/<branch-name>` or `major/<branch-name>` or `Release/<branch-name>` or `Major/<branch-name>`
 24 | - `feature/<branch-name>` or `minor/<branch-name>` with capitalised letters work as well
 25 | - `patch/<branch-name>` or `fix/<branch-name>` or `hotfix/<branch-name>` with capitalised letters work as well
 26 | 
 27 | # Revisions
 28 | 0.3.0 - For Snowflake warehouses
 29 | 0.3.1 - For Redshift warehouses
 30 | 
 31 | ## Tools supported:
 32 | 
 33 | - Azure Datafactory
 34 | - Apache Airflow
 35 | - Databricks Workflows
 36 | 
 37 | If you are cloning this repository, we recommend that the clone happens via SSH key. 
 38 | 
 39 | # :running: Quickstart
 40 | 
 41 | New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/).
 42 | 
 43 | ## Requirements
 44 | dbt version
 45 | * ```dbt version >= 1.3.0```
 46 | 
 47 | dbt_utils package. Read more about them [here](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/).
 48 | * ```dbt-labs/dbt_utils version: 1.1.1``` 
 49 | 
 50 | This package works for most of EL processes and depends on the metadata generated by the respective platform.
 51 | 
 52 | ## Profiles
 53 | Using as example a profile for Databricks workflows, when testing the repository, it is necessary to fill the profiles information below by changing the `example.env` to `.env`, and filling its variables with the adequate values.
 54 | 
 55 | ```
 56 | dbt_dag_monitoring:
 57 |     target: "{{ env_var('DBT_DEFAULT_TARGET', 'dev')}}"
 58 |     outputs:
 59 |         dev: 
 60 |             type: databricks
 61 |             catalog: "{{ env_var('DEV_CATALOG_NAME')}}"
 62 |             schema: "{{ env_var('DEV_SCHEMA_NAME')}}"
 63 |             host: "{{ env_var('DEV_HOST') }}"
 64 |             http_path: "{{ env_var('DEV_HTTP_PATH') }}"
 65 |             token: "{{ env_var('DEV_TOKEN') }}"
 66 |             threads: 16
 67 |             ansi_mode: false
 68 | ```
 69 | 
 70 | When it is done, there are two necessary commands for working locally without difficulties:
 71 | 
 72 | `chmod +x setup.sh`
 73 | 
 74 | and 
 75 | 
 76 | `source setup.sh`
 77 | 
 78 | ## Installation
 79 | 
 80 | 1. Include this package in your `packages.yml` file.
 81 | ```yaml
 82 | packages:
 83 |   - git: "https://github.com/techindicium/dbt-dag-monitoring.git"
 84 |     revision: # 0.3.0 or 0.3.1
 85 | ```
 86 | 
 87 | 2. Run `dbt deps` to install the package.
 88 | 
 89 | ## Configuring models package
 90 | 
 91 | ### Models:
 92 | The functioning of the package on the desired platform depends on the configuration of dbt_project.yml. To define which platform we are transforming the data to, the enabled field must be "true", for the desired platform, and "false" for all others. 
 93 | 
 94 | ### Vars:
 95 | Then, we define the variables: in the first line we determine which platform dbt should consider the variables for. In the third line we define which data the monitoring will be based on, and in the following lines we define which database and data schema will be used, according to the platform defined above.
 96 | ```
 97 | models:
 98 |   dbt_dag_monitoring:
 99 |     marts:
100 |       +materialized: table
101 |     staging:
102 |       +materialized: view
103 |       airflow_sources:
104 |         +enabled: true
105 |       adf_sources:
106 |         +enabled: false
107 |       databricks_workflow_sources:
108 |         +enabled: false
109 | 
110 | sources:
111 |   dbt_dag_monitoring:
112 |     staging:
113 |       adf_sources:
114 |         raw_adf_monitoring:
115 |           +enabled: false
116 |       databricks_workflow_sources:
117 |         raw_databricks_workflow_monitoring:
118 |           +enabled: false
119 |       airflow_sources:
120 |         raw_airflow_monitoring:
121 |           +enabled: true
122 | ```
123 | ...
124 | 
125 | When the vars are added to the dbt_project, it suppresses dbt compilation errors.
126 | ```
127 | vars:
128 |   dbt_dag_monitoring:
129 |     enabled_sources: ['airflow'] #Possible values: 'airflow', 'adf' or 'databricks_workflow'
130 |     dag_monitoring_start_date: cast('2023-01-01' as date)
131 |     dag_monitoring_airflow_database: #landing_zone
132 |     dag_monitoring_airflow_schema: #airflow_metadata
133 |     dag_monitoring_databricks_database: #raw_catalog
134 |     dag_monitoring_databricks_schema: #databricks_metadata
135 |     dag_monitoring_adf_database: #raw
136 |     dag_monitoring_adf_schema: #adf_metadata
137 | ```
138 | 
139 | ## Airflow metadata
140 | 
141 | The airflow sources are based on the Airflow metadata database, any form of extraction from it should suffice.
142 | 
143 | The package is consistent with any type of EL process, and the data warehouse must have the following tables:
144 | - dag_run
145 | - task_instance
146 | - task_fail
147 | - dag
148 | 
149 | ## ADF Metadata
150 | 
151 | The adf models rely on sources extracted by our adf tap:
152 | 
153 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/6b9c9e970518db1e21086ec75a7442d1b6978c93/plugins/custom/tap-azuredatafactory/?at=featuer%2Fadd_adf_extractor
154 | 
155 | ## Databricks Workflow Data
156 | The databricks workflow models rely on sources extracted by our adf tap:
157 | 
158 | https://bitbucket.org/indiciumtech/platform_meltano_el/src/main/plugins/custom/tap-databricksops/
159 | 
160 | specifically the streams:
161 | 
162 | - jobs
163 | - job_runs
164 | 
165 | ## Integration tests
166 | 
167 | > [!IMPORTANT]  
168 | > When using the integration tests folder, for the sake of the continuous integration code run seamlessly, you can NOT change in your pull request the default value of the vars, models and sources being Databricks inside the integration_tests/dbt_project.yml. Following the source pattern is important.
169 | 
170 | More information on the README.md in integration_tests folder.
171 | 
172 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types:
  6 |       - opened
  7 |       - synchronize
  8 |     branches:
  9 |       - main   
 10 | 
 11 | env:
 12 |     DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests
 13 |     DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests
 14 |     DBT_DEFAULT_TARGET: databricks
 15 |     DEV_CATALOG_NAME: cdi_dev
 16 |     DEV_SCHEMA_NAME: ci_dbt_dag_monitoring
 17 |     DEV_HOST: ${{ secrets.DATABRICKS_HOST }}
 18 |     DEV_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
 19 |     DEV_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
 20 | 
 21 |     BIGQUERY_DATASET: ci_dbt_dag_monitoring
 22 |     BIGQUERY_PROJECT: indicium-sandbox
 23 |     DBT_JOB_TIMEOUT: 300
 24 |     DBT_THREADS: 16
 25 |     DBT_JOB_RETRIES: 1
 26 | 
 27 |     SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT}}
 28 |     SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
 29 |     SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD}}
 30 |     SNOWFLAKE_ROLE: INTERNAL_PRODUCTS_CICD
 31 |     SNOWFLAKE_DATABASE: SANDBOX
 32 |     SNOWFLAKE_WAREHOUSE: SANDBOX_WAREHOUSE
 33 |     SNOWFLAKE_SCHEMA: ci_dbt_dag_monitoring
 34 | 
 35 | 
 36 | jobs:
 37 |   dbt-checks:
 38 |     runs-on: ubuntu-latest
 39 | 
 40 |     steps:
 41 |       - name: Checkout repository
 42 |         uses: actions/checkout@v2
 43 | 
 44 |       - name: Set up Python
 45 |         uses: actions/setup-python@v2
 46 |         with:
 47 |           python-version: '3.8'
 48 | 
 49 |       - name: Install dependencies
 50 |         run: |
 51 |           python -m pip install --upgrade pip
 52 |           pip install -r requirements.txt
 53 |       
 54 |       - name: Authenticate to GCP
 55 |         uses: "google-github-actions/auth@v2"
 56 |         with:
 57 |           credentials_json: "${{ secrets.BIGQUERY_AUTH }}"
 58 | 
 59 |       - name: Run dbt debug for Databricks
 60 |         run: dbt debug 
 61 |       
 62 |       - name: Run dbt debug for BigQuery
 63 |         run: dbt debug --target bigquery
 64 |           
 65 |       - name: Run dbt debug for Snowflake
 66 |         run: dbt debug --target snowflake
 67 | 
 68 |       - name: dbt deps
 69 |         run: dbt deps
 70 | 
 71 |       - name: dbt compile
 72 |         run: dbt compile
 73 | 
 74 |   integration-test:
 75 |     runs-on: ubuntu-latest
 76 |     steps:
 77 |       - name: Checkout repository
 78 |         uses: actions/checkout@v2
 79 | 
 80 |       - name: Set up Python
 81 |         uses: actions/setup-python@v2
 82 |         with:
 83 |           python-version: '3.8'
 84 | 
 85 |       - name: Install dependencies
 86 |         run: |
 87 |           python -m pip install --upgrade pip
 88 |           pip install -r requirements.txt
 89 |       
 90 |       - name: enter integration tests
 91 |         run: |
 92 |           cd integration_tests/
 93 | 
 94 |       - name: Authenticate to GCP
 95 |         uses: "google-github-actions/auth@v2"
 96 |         with:
 97 |           credentials_json: "${{ secrets.BIGQUERY_AUTH }}"
 98 |       
 99 |       - name: Run dbt integration tests Databricks source in Databricks connection
100 |         run: |
101 |           dbt deps --target databricks
102 |           
103 |           dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target databricks
104 | 
105 |           dbt seed --target databricks
106 |           
107 |           dbt run-operation jobs --target databricks
108 |           dbt run-operation job_runs --target databricks
109 |           
110 |           dbt test -s source:* --target databricks
111 |           
112 |           dbt build --target databricks
113 | 
114 |       - name: switch enabled sources for adf source
115 |         run: 
116 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh
117 | 
118 |       - name: Run dbt tasks for ADF source in Databricks connection
119 |         run: |
120 |           dbt deps
121 | 
122 |           dbt seed --target databricks
123 | 
124 |           dbt run-operation adf_pipeline_runs --target databricks
125 |           dbt run-operation adf_triggers --target databricks
126 | 
127 |           dbt test -s source:* --target databricks
128 | 
129 |           dbt build --target databricks
130 | 
131 |       - name: switch enabled sources for airflow source 
132 |         run: |
133 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh
134 | 
135 |       - name: Run dbt tasks for Airflow source in Databricks connection
136 |         run: |
137 |           dbt deps
138 | 
139 |           dbt seed --target databricks
140 | 
141 |           dbt test -s source:* --target databricks
142 | 
143 |           dbt build --target databricks
144 | 
145 |           dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}'
146 | 
147 |       - name: change databricks database to bigquery database
148 |         run: |
149 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh
150 |           . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks cdi_dev indicium-sandbox
151 | 
152 |       - name: Run dbt integration tests Databricks source in BigQuery connection
153 |         run: |
154 |           dbt deps --target bigquery
155 |           
156 |           dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery
157 | 
158 |           dbt run-operation jobs --target bigquery
159 |           dbt run-operation job_runs --target bigquery
160 |           
161 |           dbt test -s source:* --target bigquery
162 |           
163 |           dbt build --exclude-resource-type seed --target bigquery
164 | 
165 |       - name: switch enabled sources for adf source
166 |         run: |
167 |           . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh adf cdi_dev indicium-sandbox
168 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_adf_source.sh
169 | 
170 |       - name: Run dbt integration tests ADF source in BigQuery connection
171 |         run: |
172 |           dbt deps
173 | 
174 |           dbt seed -s adf_pipelines --target bigquery
175 | 
176 |           dbt run-operation adf_activity_runs --target bigquery
177 |           dbt run-operation adf_pipeline_runs --target bigquery
178 |           dbt run-operation adf_triggers --target bigquery
179 | 
180 |           dbt test -s source:* --target bigquery
181 |       
182 |           dbt build --exclude-resource-type seed --target bigquery
183 |       
184 |       - name: switch enabled sources for airflow source 
185 |         run: |
186 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_airflow_source.sh
187 |           . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow cdi_dev indicium-sandbox
188 | 
189 |       - name: Run dbt tasks for Airflow source in BigQuery connection
190 |         run: |
191 |           dbt deps
192 | 
193 |           dbt run-operation seed__dag_run --target bigquery
194 |           dbt run-operation seed__dag --target bigquery
195 |           dbt run-operation seed__task_fail --target bigquery
196 |           dbt run-operation seed__task_instance --target bigquery
197 | 
198 |           dbt test -s source:* --target bigquery
199 | 
200 |           dbt build --exclude-resource-type seed --target bigquery
201 |           
202 |           dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target bigquery
203 | 
204 |       - name: change BigQuery database to Snowflake database
205 |         run: |
206 |           . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh databricks indicium-sandbox sandbox
207 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_databricks_source.sh
208 | 
209 |       - name: Run dbt integration tests Databricks source in Snowflake connection
210 |         run: |
211 |           dbt deps
212 | 
213 |           dbt run-operation create_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake
214 |           
215 |           dbt run-operation jobs --target snowflake
216 |           dbt run-operation job_runs --target snowflake
217 |           
218 |           dbt test -s source:* --target snowflake
219 |           
220 |           dbt build  --exclude-resource-type seed --target snowflake
221 | 
222 |       - name: switch enabled sources for airflow source 
223 |         run: |
224 |           . ${{ github.workspace }}/integration_tests/for_CI/change_of_database.sh airflow indicium-sandbox sandbox
225 |           . ${{ github.workspace }}/integration_tests/for_CI/change_dbt_project_from_databricks_to_airflow.sh
226 |   
227 |       - name: Run dbt integration tests Airflow source in Snowflake connection
228 |         env:
229 |           DBT_PROFILES_DIR: ${{ github.workspace }}/integration_tests
230 |           DBT_PROJECT_DIR: ${{ github.workspace }}/integration_tests
231 |         run: |
232 |           dbt deps
233 |           
234 |           dbt seed -s seeds/airflow/* --target snowflake
235 |           
236 |           dbt test -s source:* --target snowflake
237 |           
238 |           dbt build  --exclude-resource-type seed --target snowflake
239 | 
240 |           dbt run-operation drop_schema --args '{schema_name: ci_dbt_dag_monitoring}' --target snowflake
241 | 
242 | 


--------------------------------------------------------------------------------
/integration_tests/macros/adf_triggers.sql:
--------------------------------------------------------------------------------
  1 | {% macro adf_triggers() -%}
  2 |     {{ return(adapter.dispatch('adf_triggers')()) }}
  3 | {%- endmacro %}
  4 | 
  5 | {%- macro default__adf_triggers() -%}
  6 | {% set create_table %}
  7 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers (
  8 |     id STRING,
  9 |     name STRING,
 10 |     type STRING,
 11 |     properties STRUCT<
 12 |         annotations ARRAY<STRING>,
 13 |         pipelines ARRAY<
 14 |             STRUCT<
 15 |                 pipelineReference STRUCT<
 16 |                     referenceName STRING,
 17 |                     type STRING
 18 |                 >,
 19 |                 parameters STRUCT<
 20 |                     days_before STRING,
 21 |                     environment STRING,
 22 |                     reset_type STRING,
 23 |                     Job_ID STRING,
 24 |                     DatabricksWorkspaceID STRING,
 25 |                     WaitRecheckSeconds INT
 26 |                 >
 27 |             >
 28 |         >,
 29 |         type STRING,
 30 |         typeProperties STRUCT<
 31 |             recurrence STRUCT<
 32 |                 frequency STRING,
 33 |                 interval INT,
 34 |                 startTime STRING,
 35 |                 timeZone STRING,
 36 |                 schedule STRUCT<
 37 |                     minutes ARRAY<INT>,
 38 |                     hours ARRAY<INT>,
 39 |                     weekDays ARRAY<STRING>,
 40 |                     monthDays ARRAY<INT>
 41 |                 >
 42 |             >,
 43 |             parentTrigger STRING,
 44 |             requestedStartTime STRING,
 45 |             requestedEndTime STRING,
 46 |             rerunConcurrency INT
 47 |         >,
 48 |         runtimeState STRING
 49 |     >
 50 | );
 51 | {% endset %}
 52 | 
 53 | {% set insert_table %}
 54 | insert into `{{ target.database }}`.{{ target.schema }}.adf_triggers VALUES 
 55 | (
 56 |     '/subscriptions/9f07555crvices-atastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly',
 57 |     'TR-fd-prod-dion-monthly',
 58 |     'Microsoft.Dataes/triggers',
 59 |     NAMED_STRUCT(
 60 |         'annotations', ARRAY('fraud-detection', 'prod', 'duration-estimation'),
 61 |         'pipelines', ARRAY(
 62 |             NAMED_STRUCT(
 63 |                 'pipelineReference', NAMED_STRUCT(
 64 |                     'referenceName', 'fd-prod-duration_estimation',
 65 |                     'type', 'PipelineReference'
 66 |                 ),
 67 |                 'parameters', NAMED_STRUCT(
 68 |                     'days_before', NULL,
 69 |                     'environment', NULL,
 70 |                     'reset_type', NULL,
 71 |                     'Job_ID', NULL,
 72 |                     'DatabricksWorkspaceID', NULL,
 73 |                     'WaitRecheckSeconds', NULL
 74 |                 )
 75 |             )
 76 |         ),
 77 |         'type', 'ScheduleTrigger',
 78 |         'typeProperties', NAMED_STRUCT(
 79 |             'recurrence', NAMED_STRUCT(
 80 |                 'frequency', 'Month',
 81 |                 'interval', 1,
 82 |                 'startTime', '2020-10-14T04:30:00',
 83 |                 'timeZone', 'E. South America Standard Time',
 84 |                 'schedule', NAMED_STRUCT(
 85 |                     'minutes', ARRAY(30),
 86 |                     'hours', ARRAY(4),
 87 |                     'weekDays', NULL,
 88 |                     'monthDays', ARRAY(14)
 89 |                 )
 90 |             ),
 91 |             'parentTrigger', NULL,
 92 |             'requestedStartTime', NULL,
 93 |             'requestedEndTime', NULL,
 94 |             'rerunConcurrency', NULL
 95 |         ),
 96 |         'runtimeState', NULL
 97 |     )
 98 | ),
 99 | (
100 |     '/subscriptions/TR-fd-dev-predict-main',
101 |     'TR-fd-dev-predict-main',
102 |     'Microsoft.Dats/triggers',
103 |     NAMED_STRUCT(
104 |         'annotations', ARRAY('fraud-detection', 'dev', 'predict'),
105 |         'pipelines', ARRAY(
106 |             NAMED_STRUCT(
107 |                 'pipelineReference', NAMED_STRUCT(
108 |                     'referenceName', 'fd-dev-predict-main',
109 |                     'type', 'PipelineReference'
110 |                 ),
111 |                 'parameters', NAMED_STRUCT(
112 |                     'days_before', NULL,
113 |                     'environment', NULL,
114 |                     'reset_type', NULL,
115 |                     'Job_ID', NULL,
116 |                     'DatabricksWorkspaceID', NULL,
117 |                     'WaitRecheckSeconds', NULL
118 |                 )
119 |             )
120 |         ),
121 |         'type', 'ScheduleTrigger',
122 |         'typeProperties', NAMED_STRUCT(
123 |             'recurrence', NAMED_STRUCT(
124 |                 'frequency', 'Week',
125 |                 'interval', 1,
126 |                 'startTime', '2021-01-26T21:50:00',
127 |                 'timeZone', 'E. South America Standard Time',
128 |                 'schedule', NAMED_STRUCT(
129 |                     'minutes', ARRAY(0),
130 |                     'hours', ARRAY(5),
131 |                     'weekDays', ARRAY('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
132 |                     'monthDays', NULL
133 |                 )
134 |             ),
135 |             'parentTrigger', NULL,
136 |             'requestedStartTime', NULL,
137 |             'requestedEndTime', NULL,
138 |             'rerunConcurrency', NULL
139 |         ),
140 |         'runtimeState', 'Stopped'
141 |     )
142 | );
143 | {% endset %}
144 | 
145 | {% do run_query(create_table) %}
146 | {% do log("finished creating table triggers", info=true) %}
147 | 
148 | {% do run_query(insert_table) %}
149 | {% do log("finished insert table triggers", info=true) %}
150 | {%- endmacro -%}
151 | 
152 | 
153 | {%- macro bigquery__adf_triggers() -%}
154 | {% set create_table %}
155 | create or replace table `{{ target.database }}`.{{ target.schema }}.adf_triggers (
156 |     id STRING,
157 |     name STRING,
158 |     type STRING,
159 |     properties STRUCT<
160 |         annotations ARRAY<STRING>,
161 |         pipelines ARRAY<
162 |             STRUCT<
163 |                 pipelineReference STRUCT<
164 |                     referenceName STRING,
165 |                     type STRING
166 |                 >,
167 |                 parameters STRUCT<
168 |                     days_before STRING,
169 |                     environment STRING,
170 |                     reset_type STRING,
171 |                     Job_ID STRING,
172 |                     DatabricksWorkspaceID STRING,
173 |                     WaitRecheckSeconds INT
174 |                 >
175 |             >
176 |         >,
177 |         type STRING,
178 |         typeProperties STRUCT<
179 |             recurrence STRUCT<
180 |                 frequency STRING,
181 |                 `interval` INT,
182 |                 startTime STRING,
183 |                 timeZone STRING,
184 |                 schedule STRUCT<
185 |                     minutes ARRAY<INT>,
186 |                     hours ARRAY<INT>,
187 |                     weekDays ARRAY<STRING>,
188 |                     monthDays ARRAY<INT>
189 |                 >
190 |             >,
191 |             parentTrigger STRING,
192 |             requestedStartTime STRING,
193 |             requestedEndTime STRING,
194 |             rerunConcurrency INT
195 |         >,
196 |         runtimeState STRING
197 |     >
198 | );
199 | 
200 | {% endset %}
201 | 
202 | {% set insert_table %}
203 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.adf_triggers 
204 | VALUES 
205 | (
206 |     '/subscriptions/9f0755tories/tlo-datastudio-adf-d/triggers/TR-fd-prod-duration_estimation-monthly',
207 |     'TR-fd-prod-ation-monthly',
208 |     'Microsoft.Dattories/triggers',
209 |     STRUCT(
210 |         ARRAY<STRING>['fraud-detection', 'prod', 'duration-estimation'],
211 |         ARRAY<STRUCT<
212 |             pipelineReference STRUCT<
213 |                 referenceName STRING,
214 |                 type STRING
215 |             >,
216 |             parameters STRUCT<
217 |                 days_before STRING,
218 |                 environment STRING,
219 |                 reset_type STRING,
220 |                 Job_ID STRING,
221 |                 DatabricksWorkspaceID STRING,
222 |                 WaitRecheckSeconds INT64
223 |             >
224 |         >>[
225 |             STRUCT(
226 |                 STRUCT(
227 |                     'fd-prod-duration_estimation',
228 |                     'PipelineReference'
229 |                 ),
230 |                 STRUCT(
231 |                     NULL,
232 |                     NULL,
233 |                     NULL,
234 |                     NULL,
235 |                     NULL,
236 |                     NULL
237 |                 )
238 |             )
239 |         ],
240 |         'ScheduleTrigger',
241 |         STRUCT(
242 |             STRUCT(
243 |                 'Month',
244 |                 1,
245 |                 '2020-10-14T04:30:00',
246 |                 'E. South America Standard Time',
247 |                 STRUCT(
248 |                     ARRAY<INT64>[30],
249 |                     ARRAY<INT64>[4],
250 |                     NULL,
251 |                     ARRAY<INT64>[14]
252 |                 )
253 |             ),
254 |             NULL,
255 |             NULL,
256 |             NULL,
257 |             NULL
258 |         ),
259 |         NULL
260 |     )
261 | ),
262 | (
263 |     '/subscriptions/y/factories/tlo-datastudio-adf-d/triggers/TR-fd-dev-predict-main',
264 |     'TR-fd-dev-predict-main',
265 |     'Microsoft.DataFactory/factories/triggers',
266 |     STRUCT(
267 |         ARRAY<STRING>['fraud-detection', 'dev', 'predict'],
268 |         ARRAY<STRUCT<
269 |             pipelineReference STRUCT<
270 |                 referenceName STRING,
271 |                 type STRING
272 |             >,
273 |             parameters STRUCT<
274 |                 days_before STRING,
275 |                 environment STRING,
276 |                 reset_type STRING,
277 |                 Job_ID STRING,
278 |                 DatabricksWorkspaceID STRING,
279 |                 WaitRecheckSeconds INT64
280 |             >
281 |         >>[
282 |             STRUCT(
283 |                 STRUCT(
284 |                     'fd-dev-predict-main',
285 |                     'PipelineReference'
286 |                 ),
287 |                 STRUCT(
288 |                     NULL,
289 |                     NULL,
290 |                     NULL,
291 |                     NULL,
292 |                     NULL,
293 |                     NULL
294 |                 )
295 |             )
296 |         ],
297 |         'ScheduleTrigger',
298 |         STRUCT(
299 |             STRUCT(
300 |                 'Week',
301 |                 1,
302 |                 '2021-01-26T21:50:00',
303 |                 'E. South America Standard Time',
304 |                 STRUCT(
305 |                     ARRAY<INT64>[0],
306 |                     ARRAY<INT64>[5],
307 |                     ARRAY<STRING>['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
308 |                     NULL
309 |                 )
310 |             ),
311 |             NULL,
312 |             NULL,
313 |             NULL,
314 |             NULL
315 |         ),
316 |         'Stopped'
317 |     )
318 | );
319 | 
320 | {% endset %}
321 | 
322 | {% do run_query(create_table) %}
323 | {% do log("finished creating table triggers", info=true) %}
324 | 
325 | {% do run_query(insert_table) %}
326 | {% do log("finished insert table triggers", info=true) %}
327 | {%- endmacro -%}


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/integration_tests/macros/jobs.sql:
--------------------------------------------------------------------------------
  1 | {% macro jobs() -%}
  2 |     {{ return(adapter.dispatch('jobs')()) }}
  3 | {%- endmacro %}
  4 | 
  5 | {% macro databricks__jobs() %}
  6 | {% set create_table %}
  7 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs (
  8 |     created_time BIGINT,
  9 |     creator_user_name STRING,
 10 |     job_id BIGINT,
 11 |     settings STRUCT<
 12 |             email_notifications STRUCT<
 13 |             on_failure ARRAY<STRING>,
 14 |             no_alert_for_skipped_runs BOOLEAN
 15 |             >,
 16 |         format STRING,
 17 |         max_concurrent_runs BIGINT,
 18 |         name STRING,
 19 |         schedule STRUCT<
 20 |             pause_status STRING,
 21 |             quartz_cron_expression STRING,
 22 |             timezone_id STRING
 23 |         >,
 24 |         tags STRUCT<
 25 |             dev STRING,
 26 |             env STRING
 27 |         >,
 28 |         timeout_seconds bigint,
 29 |         trigger STRUCT<
 30 |             file_arrival STRUCT<
 31 |                 url STRING
 32 |             >,
 33 |             pause_status STRING
 34 |         >
 35 |     >,
 36 |     insertedDate TIMESTAMP
 37 | );
 38 | {% endset %}
 39 | 
 40 | {% set insert_table %}
 41 | 
 42 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES
 43 | (
 44 |     CAST(1722606667504 AS BIGINT),
 45 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
 46 |     CAST(466340877826952 AS BIGINT),
 47 |     NAMED_STRUCT(
 48 |         'email_notifications', NAMED_STRUCT(
 49 |             'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
 50 |             'no_alert_for_skipped_runs', NULL
 51 |         ),
 52 |         'format', 'MULTI_TASK',
 53 |         'max_concurrent_runs', 1,
 54 |         'name', '[prod] core_dag_monitoring_data_transformation_dbt_job',
 55 |         'schedule', NAMED_STRUCT(
 56 |             'pause_status', NULL,
 57 |             'quartz_cron_expression', NULL,
 58 |             'timezone_id', NULL
 59 |         ),
 60 |         'tags',NAMED_STRUCT(
 61 |             'dev', NULL,
 62 |             'env','prod'
 63 |         ),
 64 |         'timeout_seconds', 0,
 65 |         'trigger',NAMED_STRUCT(
 66 |             'file_arrival',NAMED_STRUCT(
 67 |                 'url',NULL
 68 |             ),
 69 |             'paused_status',NULL
 70 |         )
 71 |     ),
 72 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
 73 | ),
 74 | (
 75 |     CAST(1722544845800 AS BIGINT),
 76 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
 77 |     CAST(823250232903490 AS BIGINT),
 78 |     NAMED_STRUCT(
 79 |         'email_notifications',NAMED_STRUCT(
 80 |             'on_failure', ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
 81 |             'no_alert_for_skipped_runs', NULL
 82 |         ),
 83 |         'format','MULTI_TASK',
 84 |         'max_concurrent_runs',1,
 85 |         'name','[prod] core_dag_monitoring_extraction_meltano_job',
 86 |         'schedule',NAMED_STRUCT(
 87 |             'pause_status','UNPAUSED',
 88 |             'quartz_cron_expression','0 0 0/3 * * ? *',
 89 |             'timezone_id','UTC'
 90 |         ),
 91 |         'tags',NAMED_STRUCT(
 92 |             'dev', NULL,
 93 |             'env','prod'
 94 |         ),
 95 |         'timeout_seconds',0,
 96 |         'trigger',NAMED_STRUCT(
 97 |             'file_arrival',NAMED_STRUCT(
 98 |                 'url',NULL
 99 |             ),
100 |             'paused_status',NULL
101 |         )
102 |     ),
103 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
104 | ),
105 | (
106 |     CAST(1722538441265 AS BIGINT),
107 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
108 |     CAST(790689006770532 AS BIGINT),
109 |     NAMED_STRUCT(
110 |         'email_notifications',NAMED_STRUCT(
111 |             'on_failure',ARRAY('produtos_horizontais@indicium1.opsgenie.net'),
112 |             'no_alert_for_skipped_runs',NULL
113 |         ),
114 |         'format','MULTI_TASK',
115 |         'max_concurrent_runs',1,
116 |         'name','[prod] investment_postgres_extraction_spark_job',
117 |         'schedule',NAMED_STRUCT(
118 |             'pause_status','UNPAUSED',
119 |             'quartz_cron_expression','0 0 0/4 * * ? *',
120 |             'timezone_id','UTC'
121 |         ),
122 |         'tags',NAMED_STRUCT(
123 |             'dev', NULL,
124 |             'env','prod'
125 |         ),
126 |         'timeout_seconds',0,
127 |         'trigger',NAMED_STRUCT(
128 |             'file_arrival',NAMED_STRUCT(
129 |                 'url',NULL
130 |             ),
131 |             'paused_status',NULL
132 |         )
133 |     ),
134 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
135 | );
136 | 
137 | {% endset %}
138 | 
139 | {% do run_query(create_table) %}
140 | {% do log("finished creating table jobs", info=true) %}
141 | 
142 | {% do run_query(insert_table) %}
143 | {% do log("finished insert table jobs ", info=true) %}
144 | 
145 | 
146 | {% endmacro %}
147 | 
148 | {% macro bigquery__jobs() %}
149 | {% set create_table %}
150 | create or replace table `{{ target.database }}`.{{ target.schema }}.jobs (
151 |     created_time BIGINT,
152 |     creator_user_name STRING,
153 |     job_id BIGINT,
154 |     settings STRUCT<
155 |             email_notifications STRUCT<
156 |             on_failure ARRAY<STRING>,
157 |             no_alert_for_skipped_runs BOOLEAN
158 |             >,
159 |         format STRING,
160 |         max_concurrent_runs BIGINT,
161 |         name STRING,
162 |         schedule STRUCT<
163 |             pause_status STRING,
164 |             quartz_cron_expression STRING,
165 |             timezone_id STRING
166 |         >,
167 |         tags STRUCT<
168 |             dev STRING,
169 |             env STRING
170 |         >,
171 |         timeout_seconds bigint,
172 |         trigger STRUCT<
173 |             file_arrival STRUCT<
174 |                 url STRING
175 |             >,
176 |             pause_status STRING
177 |         >
178 |     >,
179 |     insertedDate TIMESTAMP
180 | );
181 | {% endset %}
182 | 
183 | {% set insert_table %}
184 | 
185 | INSERT INTO `{{ target.database }}`.{{ target.schema }}.jobs VALUES
186 | (
187 |     CAST(1722606667504 AS INT64),
188 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
189 |     CAST(466340877826952 AS INT64),
190 |     STRUCT(
191 |         STRUCT(
192 |             ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
193 |             NULL AS no_alert_for_skipped_runs
194 |         ) AS email_notifications,
195 |         'MULTI_TASK' AS format,
196 |         1 AS max_concurrent_runs,
197 |         '[prod] core_dag_monitoring_data_transformation_dbt_job' AS name,
198 |         STRUCT(
199 |             NULL AS pause_status,
200 |             NULL AS quartz_cron_expression,
201 |             NULL AS timezone_id
202 |         ) AS schedule,
203 |         STRUCT(
204 |             NULL AS dev,
205 |             'prod' AS env
206 |         ) AS tags,
207 |         0 AS timeout_seconds,
208 |         STRUCT(
209 |             STRUCT(
210 |                 NULL AS url
211 |             ) AS file_arrival,
212 |             NULL AS paused_status
213 |         ) AS trigger
214 |     ),
215 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
216 | ),
217 | (
218 |     CAST(1722544845800 AS INT64),
219 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
220 |     CAST(823250232903490 AS INT64),
221 |     STRUCT(
222 |         STRUCT(
223 |             ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
224 |             NULL AS no_alert_for_skipped_runs
225 |         ) AS email_notifications,
226 |         'MULTI_TASK' AS format,
227 |         1 AS max_concurrent_runs,
228 |         '[prod] core_dag_monitoring_extraction_meltano_job' AS name,
229 |         STRUCT(
230 |             'UNPAUSED' AS pause_status,
231 |             '0 0 0/3 * * ? *' AS quartz_cron_expression,
232 |             'UTC' AS timezone_id
233 |         ) AS schedule,
234 |         STRUCT(
235 |             NULL AS dev,
236 |             'prod' AS env
237 |         ) AS tags,
238 |         0 AS timeout_seconds,
239 |         STRUCT(
240 |             STRUCT(
241 |                 NULL AS url
242 |             ) AS file_arrival,
243 |             NULL AS paused_status
244 |         ) AS trigger
245 |     ),
246 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
247 | ),
248 | (
249 |     CAST(1722538441265 AS INT64),
250 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS STRING),
251 |     CAST(790689006770532 AS INT64),
252 |     STRUCT(
253 |         STRUCT(
254 |             ARRAY['produtos_horizontais@indicium1.opsgenie.net'] AS on_failure,
255 |             NULL AS no_alert_for_skipped_runs
256 |         ) AS email_notifications,
257 |         'MULTI_TASK' AS format,
258 |         1 AS max_concurrent_runs,
259 |         '[prod] investment_postgres_extraction_spark_job' AS name,
260 |         STRUCT(
261 |             'UNPAUSED' AS pause_status,
262 |             '0 0 0/4 * * ? *' AS quartz_cron_expression,
263 |             'UTC' AS timezone_id
264 |         ) AS schedule,
265 |         STRUCT(
266 |             NULL AS dev,
267 |             'prod' AS env
268 |         ) AS tags,
269 |         0 AS timeout_seconds,
270 |         STRUCT(
271 |             STRUCT(
272 |                 NULL AS url
273 |             ) AS file_arrival,
274 |             NULL AS paused_status
275 |         ) AS trigger
276 |     ),
277 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
278 | );
279 | 
280 | {% endset %}
281 | 
282 | {% do run_query(create_table) %}
283 | {% do log("finished creating table jobs", info=true) %}
284 | 
285 | {% do run_query(insert_table) %}
286 | {% do log("finished insert table jobs ", info=true) %}
287 | 
288 | 
289 | {% endmacro %}
290 | 
291 | {% macro snowflake__jobs() %}
292 | {% set create_table %}
293 | CREATE OR REPLACE TABLE {{ target.database }}.{{ target.schema }}.jobs (
294 |     created_time BIGINT,
295 |     creator_user_name VARCHAR,
296 |     job_id BIGINT,
297 |     settings VARIANT,
298 |     insertedDate TIMESTAMP
299 | );
300 | {% endset %}
301 | 
302 | {% set insert_table %}
303 | 
304 | INSERT INTO {{ target.database }}.{{ target.schema }}.jobs SELECT
305 |     CAST(1722606667504 AS BIGINT),
306 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR),
307 |     CAST(466340877826952 AS BIGINT),
308 |     OBJECT_CONSTRUCT(
309 |         'email_notifications', OBJECT_CONSTRUCT(
310 |             'on_failure', ARRAY_CONSTRUCT('produtosgenie.net'),
311 |             'no_alert_for_skipped_runs', NULL
312 |         ),
313 |         'format', 'MULTI_TASK',
314 |         'max_concurrent_runs', 1,
315 |         'name', '[prod] coreion_dbt_job',
316 |         'schedule', OBJECT_CONSTRUCT(
317 |             'pause_status', NULL,
318 |             'quartz_cron_expression', NULL,
319 |             'timezone_id', NULL
320 |         ),
321 |         'tags', OBJECT_CONSTRUCT(
322 |             'dev', NULL,
323 |             'env', 'prod'
324 |         ),
325 |         'timeout_seconds', 0,
326 |         'trigger', OBJECT_CONSTRUCT(
327 |             'file_arrival', OBJECT_CONSTRUCT(
328 |                 'url', NULL
329 |             ),
330 |             'paused_status', NULL
331 |         )
332 |     ),
333 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
334 | UNION ALL
335 |     SELECT
336 |     CAST(1722544845800 AS BIGINT),
337 |     CAST('13bc3f4b44571518ae' AS VARCHAR),
338 |     CAST(823250232903490 AS BIGINT),
339 |     OBJECT_CONSTRUCT(
340 |         'email_notifications', OBJECT_CONSTRUCT(
341 |             'on_failure', ARRAY_CONSTRUCT('prod.opsgenie.net'),
342 |             'no_alert_for_skipped_runs', NULL
343 |         ),
344 |         'format', 'MULTI_TASK',
345 |         'max_concurrent_runs', 1,
346 |         'name', '[prod] cltano_job',
347 |         'schedule', OBJECT_CONSTRUCT(
348 |             'pause_status', 'UNPAUSED',
349 |             'quartz_cron_expression', '0 0 0/3 * * ? *',
350 |             'timezone_id', 'UTC'
351 |         ),
352 |         'tags', OBJECT_CONSTRUCT(
353 |             'dev', NULL,
354 |             'env', 'prod'
355 |         ),
356 |         'timeout_seconds', 0,
357 |         'trigger', OBJECT_CONSTRUCT(
358 |             'file_arrival', OBJECT_CONSTRUCT(
359 |                 'url', NULL
360 |             ),
361 |             'paused_status', NULL
362 |         )
363 |     ),
364 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
365 | UNION ALL
366 |     SELECT
367 |     CAST(1722538441265 AS BIGINT),
368 |     CAST('13bc3f4b-d299-4b58-9d09-5444571518ae' AS VARCHAR),
369 |     CAST(790689006770532 AS BIGINT),
370 |     OBJECT_CONSTRUCT(
371 |         'email_notifications', OBJECT_CONSTRUCT(
372 |             'on_failure', ARRAY_CONSTRUCT('produtosopsgenie.net'),
373 |             'no_alert_for_skipped_runs', NULL
374 |         ),
375 |         'format', 'MULTI_TASK',
376 |         'max_concurrent_runs', 1,
377 |         'name', '[prod] invspark_job',
378 |         'schedule', OBJECT_CONSTRUCT(
379 |             'pause_status', 'UNPAUSED',
380 |             'quartz_cron_expression', '0 0 0/4 * * ? *',
381 |             'timezone_id', 'UTC'
382 |         ),
383 |         'tags', OBJECT_CONSTRUCT(
384 |             'dev', NULL,
385 |             'env', 'prod'
386 |         ),
387 |         'timeout_seconds', 0,
388 |         'trigger', OBJECT_CONSTRUCT(
389 |             'file_arrival', OBJECT_CONSTRUCT(
390 |                 'url', NULL
391 |             ),
392 |             'paused_status', NULL
393 |         )
394 |     ),
395 |     CAST('2024-08-20T09:11:36.525Z' AS TIMESTAMP)
396 | ;
397 | 
398 | 
399 | {% endset %}
400 | 
401 | {% do run_query(create_table) %}
402 | {% do log("finished creating table jobs", info=true) %}
403 | 
404 | {% do run_query(insert_table) %}
405 | {% do log("finished insert table jobs ", info=true) %}
406 | {% endmacro %}


--------------------------------------------------------------------------------
/integration_tests/seeds/adf/adf_pipelines.csv:
--------------------------------------------------------------------------------
1 | id,name,type,properties_description,properties_activities_0_name,properties_activities_0_type,properties_activities_1_name,properties_activities_1_type,properties_activities_1_typeProperties_items_value,properties_activities_1_typeProperties_items_type,properties_activities_1_typeProperties_isSequential,properties_activities_1_typeProperties_activities_0_name,properties_activities_1_typeProperties_activities_0_type,properties_activities_1_typeProperties_activities_1_name,properties_activities_1_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_2_name,properties_activities_1_typeProperties_activities_2_type,properties_activities_1_typeProperties_activities_2_typeProperties_source_type,properties_activities_1_typeProperties_activities_2_typeProperties_sink_type,properties_activities_1_typeProperties_activities_2_inputs_0_referenceName,properties_activities_1_typeProperties_activities_2_inputs_0_type,properties_activities_1_typeProperties_activities_2_outputs_0_referenceName,properties_activities_1_typeProperties_activities_2_outputs_0_type,properties_activities_1_typeProperties_activities_3_name,properties_activities_1_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_4_name,properties_activities_1_typeProperties_activities_4_type,properties_activities_1_typeProperties_activities_5_name,properties_activities_1_typeProperties_activities_5_type,properties_activities_2_name,properties_activities_2_type,properties_activities_3_name,properties_activities_3_type,properties_activities_4_name,properties_activities_4_type,properties_activities_5_name,properties_activities_5_type,properties_activities_6_name,properties_activities_6_type,etag,properties_activities_1_typeProperties_activities_6_name,properties_activities_1_typeProperties_activities_6_type,properties_activities_1_typeProperties_activities_7_name,properties_activities_1_typeProperties_activities_7_type,properties_activities_5_typeProperties_items_value,properties_activities_5_typeProperties_items_type,properties_activities_5_typeProperties_isSequential,properties_activities_5_typeProperties_activities_0_name,properties_activities_5_typeProperties_activities_0_type,properties_activities_5_typeProperties_activities_0_typeProperties_source_type,properties_activities_5_typeProperties_activities_0_typeProperties_sink_type,properties_activities_5_typeProperties_activities_0_inputs_0_referenceName,properties_activities_5_typeProperties_activities_0_inputs_0_type,properties_activities_5_typeProperties_activities_0_outputs_0_referenceName,properties_activities_5_typeProperties_activities_0_outputs_0_type,properties_activities_7_name,properties_activities_7_type,properties_activities_8_name,properties_activities_8_type,properties_activities_9_name,properties_activities_9_type,properties_activities_10_name,properties_activities_10_type,properties_activities_11_name,properties_activities_11_type,properties_activities_12_name,properties_activities_12_type,properties_activities_13_name,properties_activities_13_type,properties_activities_0_typeProperties_items_value,properties_activities_0_typeProperties_items_type,properties_activities_0_typeProperties_activities_0_name,properties_activities_0_typeProperties_activities_0_type,properties_activities_0_typeProperties_activities_0_typeProperties_source_type,properties_activities_0_typeProperties_activities_0_typeProperties_sink_type,properties_activities_0_typeProperties_activities_0_inputs_0_referenceName,properties_activities_0_typeProperties_activities_0_inputs_0_type,properties_activities_0_typeProperties_activities_0_outputs_0_referenceName,properties_activities_0_typeProperties_activities_0_outputs_0_type,properties_activities_1_typeProperties_activities_0_typeProperties_source_type,properties_activities_1_typeProperties_activities_0_typeProperties_sink_type,properties_activities_1_typeProperties_activities_0_inputs_0_referenceName,properties_activities_1_typeProperties_activities_0_inputs_0_type,properties_activities_1_typeProperties_activities_0_outputs_0_referenceName,properties_activities_1_typeProperties_activities_0_outputs_0_type,properties_activities_0_typeProperties_activities_1_name,properties_activities_0_typeProperties_activities_1_type,properties_activities_0_typeProperties_activities_2_name,properties_activities_0_typeProperties_activities_2_type,properties_activities_14_name,properties_activities_14_type,properties_activities_15_name,properties_activities_15_type,properties_activities_16_name,properties_activities_16_type,properties_activities_17_name,properties_activities_17_type,properties_activities_18_name,properties_activities_18_type,properties_activities_19_name,properties_activities_19_type,properties_activities_20_name,properties_activities_20_type,properties_activities_21_name,properties_activities_21_type,properties_activities_22_name,properties_activities_22_type,properties_activities_23_name,properties_activities_23_type,properties_activities_24_name,properties_activities_24_type,properties_activities_25_name,properties_activities_25_type,properties_activities_26_name,properties_activities_26_type,properties_activities_27_name,properties_activities_27_type,properties_activities_28_name,properties_activities_28_type,properties_activities_29_name,properties_activities_29_type,properties_activities_30_name,properties_activities_30_type,properties_activities_31_name,properties_activities_31_type,properties_activities_32_name,properties_activities_32_type,properties_activities_33_name,properties_activities_33_type,properties_activities_34_name,properties_activities_34_type,properties_activities_35_name,properties_activities_35_type,properties_activities_36_name,properties_activities_36_type,properties_activities_37_name,properties_activities_37_type,properties_activities_38_name,properties_activities_38_type,properties_activities_39_name,properties_activities_39_type,properties_activities_3_typeProperties_items_value,properties_activities_3_typeProperties_items_type,properties_activities_3_typeProperties_isSequential,properties_activities_3_typeProperties_activities_0_name,properties_activities_3_typeProperties_activities_0_type,properties_activities_3_typeProperties_activities_0_typeProperties_source_type,properties_activities_3_typeProperties_activities_1_name,properties_activities_3_typeProperties_activities_1_type,properties_activities_1_typeProperties_activities_1_typeProperties_source_type,properties_activities_1_typeProperties_activities_1_typeProperties_sink_type,properties_activities_1_typeProperties_activities_1_inputs_0_referenceName,properties_activities_1_typeProperties_activities_1_inputs_0_type,properties_activities_1_typeProperties_activities_1_outputs_0_referenceName,properties_activities_1_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_isSequential,properties_activities_0_typeProperties_activities_2_typeProperties_source_type,properties_activities_0_typeProperties_activities_2_typeProperties_sink_type,properties_activities_0_typeProperties_activities_2_inputs_0_referenceName,properties_activities_0_typeProperties_activities_2_inputs_0_type,properties_activities_0_typeProperties_activities_2_outputs_0_referenceName,properties_activities_0_typeProperties_activities_2_outputs_0_type,properties_activities_0_typeProperties_activities_3_name,properties_activities_0_typeProperties_activities_3_type,properties_activities_0_typeProperties_activities_4_name,properties_activities_0_typeProperties_activities_4_type,properties_activities_0_typeProperties_activities_5_name,properties_activities_0_typeProperties_activities_5_type,properties_activities_0_typeProperties_activities_6_name,properties_activities_0_typeProperties_activities_6_type,properties_activities_2_typeProperties_items_value,properties_activities_2_typeProperties_items_type,properties_activities_2_typeProperties_activities_0_name,properties_activities_2_typeProperties_activities_0_type,properties_activities_2_typeProperties_activities_0_typeProperties_source_type,properties_activities_2_typeProperties_activities_0_typeProperties_sink_type,properties_activities_2_typeProperties_activities_0_inputs_0_referenceName,properties_activities_2_typeProperties_activities_0_inputs_0_type,properties_activities_2_typeProperties_activities_0_outputs_0_referenceName,properties_activities_2_typeProperties_activities_0_outputs_0_type,properties_activities_4_typeProperties_items_value,properties_activities_4_typeProperties_items_type,properties_activities_4_typeProperties_isSequential,properties_activities_4_typeProperties_activities_0_name,properties_activities_4_typeProperties_activities_0_type,properties_activities_4_typeProperties_activities_1_name,properties_activities_4_typeProperties_activities_1_type,properties_activities_4_typeProperties_activities_2_name,properties_activities_4_typeProperties_activities_2_type,properties_activities_4_typeProperties_activities_3_name,properties_activities_4_typeProperties_activities_3_type,properties_activities_1_typeProperties_activities_8_name,properties_activities_1_typeProperties_activities_8_type,properties_activities_1_typeProperties_activities_6_typeProperties_source_type,properties_activities_2_typeProperties_isSequential,properties_activities_1_typeProperties_activities_9_name,properties_activities_1_typeProperties_activities_9_type,properties_activities_1_typeProperties_activities_10_name,properties_activities_1_typeProperties_activities_10_type,properties_activities_1_typeProperties_activities_11_name,properties_activities_1_typeProperties_activities_11_type,properties_activities_1_typeProperties_activities_12_name,properties_activities_1_typeProperties_activities_12_type,properties_activities_1_typeProperties_activities_13_name,properties_activities_1_typeProperties_activities_13_type,properties_activities_1_typeProperties_activities_14_name,properties_activities_1_typeProperties_activities_14_type,properties_activities_1_typeProperties_activities_15_name,properties_activities_1_typeProperties_activities_15_type,properties_activities_0_typeProperties_activities_1_typeProperties_source_type,properties_activities_0_typeProperties_activities_1_typeProperties_sink_type,properties_activities_0_typeProperties_activities_1_inputs_0_referenceName,properties_activities_0_typeProperties_activities_1_inputs_0_type,properties_activities_0_typeProperties_activities_1_outputs_0_referenceName,properties_activities_0_typeProperties_activities_1_outputs_0_type,properties_activities_0_typeProperties_activities_7_name,properties_activities_0_typeProperties_activities_7_type,properties_activities_0_typeProperties_activities_8_name,properties_activities_0_typeProperties_activities_8_type,properties_activities_0_typeProperties_activities_9_name,properties_activities_0_typeProperties_activities_9_type,properties_activities_0_typeProperties_activities_10_name,properties_activities_0_typeProperties_activities_10_type,properties_activities_0_typeProperties_activities_11_name,properties_activities_0_typeProperties_activities_11_type,properties_activities_0_typeProperties_activities_12_name,properties_activities_0_typeProperties_activities_12_type,properties_activities_2_typeProperties_activities_1_name,properties_activities_2_typeProperties_activities_1_type,properties_activities_2_typeProperties_activities_1_typeProperties_source_type,properties_activities_2_typeProperties_activities_2_name,properties_activities_2_typeProperties_activities_2_type,properties_activities_2_typeProperties_activities_3_name,properties_activities_2_typeProperties_activities_3_type,properties_activities_3_typeProperties_activities_2_name,properties_activities_3_typeProperties_activities_2_type,properties_activities_3_typeProperties_activities_3_name,properties_activities_3_typeProperties_activities_3_type
2 | /subscriptions/9f0755ices-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-ESPPIFLEXTODatalakeLandingZone-N,PL-ESPPIFLEXTODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all IFLEX environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__ESPP__Generic,DatasetReference,DS__FEM__Equinix_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a33a-0000-0b00-0000-65fde01e0000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3 | /subscriptions/9f07555c-2taplatformfm-services-dev-rg/providers/Microsoft.DataFactory/factories/tlo-datastudio-adf-d/pipelines/PL-RepomSQLPROD3TODatalakeLandingZone-N,PL-RepomSQLPROD3TODatalakeLandingZone-N,Microsoft.DataFactory/factories/pipelines,This pipeline copies data from all SQLPROD3 environment tables listed in the LoadingControl. ,Get All Tables,Lookup,For Each Tables,ForEach,@activity('Get All Tables').output.value,Expression,false,Update StartDate,DatabricksNotebook,Set CurrentDate foreach,SetVariable,Copy Data,Copy,SqlServerSource,ParquetSink,DS__Repom__Generic,DatasetReference,DS__FEM__UolDiveo_DatalakeParquet,DatasetReference,Update EndDate Success,DatabricksNotebook,Set CurrentDate foreach end,SetVariable,Update EndDate Success Error,DatabricksNotebook,Set CurrentDate,SetVariable,Set Timestamp,SetVariable,Restart LoadingControl,DatabricksNotebook,Load Landing to Bronze,DatabricksNotebook,FInalizacaoPipeline,Wait,c501a23a-0000-0b00-0000-65fde01e0000,FilterType,IfCondition,Update EndDate Success Error CopyData,DatabricksNotebook,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4 | 


--------------------------------------------------------------------------------