├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── workflow.yml
├── integration_tests
    ├── dbt_project.yml
    ├── dbt_utils
    │   ├── models
    │   │   └── test_recency.sql
    │   ├── packages.yml
    │   ├── Makefile
    │   └── dbt_project.yml
    ├── snowplow
    │   ├── packages.yml
    │   ├── Makefile
    │   ├── models
    │   │   ├── sessions_expected.sql
    │   │   ├── page_views_expected.sql
    │   │   └── base_event.sql
    │   └── dbt_project.yml
    └── ci
    │   └── sample.profiles.yml
├── .gitignore
├── macros
    ├── dbt_utils
    │   ├── cross_db_utils
    │   │   ├── array_concat.sql
    │   │   ├── array_construct.sql
    │   │   ├── cast_array_to_string.sql
    │   │   ├── deprecated
    │   │   │   ├── concat.sql
    │   │   │   ├── assert_not_null.sql
    │   │   │   ├── split_part.sql
    │   │   │   ├── datatypes.sql
    │   │   │   ├── datediff.sql
    │   │   │   └── dateadd.sql
    │   │   ├── array_append.sql
    │   │   └── current_timestamp.sql
    │   └── sql
    │   │   └── get_relations_by_prefix.sql
    ├── snowplow
    │   └── convert_timezone.sql
    └── maintenance_operation.sql
├── dbt_project.yml
├── .gitmodules
├── profiles.yml
├── dev-requirements.txt
├── pytest.ini
├── CHANGELOG.md
├── tests
    ├── unit
    │   └── test_macros.py
    └── functional
    │   ├── test_utils.py
    │   └── conftest.py
├── .circleci
    └── config.yml
├── README.md
└── LICENSE


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @dbt-labs/dx
2 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'spark_utils_integration_tests'
2 | version: '0.1.0'
3 | config-version: 2
4 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_utils/models/test_recency.sql:
--------------------------------------------------------------------------------
1 | select
2 |     {{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as today
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | /**/target/
3 | /**/dbt_modules/
4 | /**/dbt_packages/
5 | /**/logs/
6 | /**/env/
7 | /**/__pycache__/
8 | test.env
9 | dbt_internal_packages/


--------------------------------------------------------------------------------
/integration_tests/snowplow/packages.yml:
--------------------------------------------------------------------------------
1 | 
2 | packages:
3 |   - local: ../../
4 |   - local: ../../snowplow
5 |   - local: ../../snowplow/integration_tests
6 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_utils/packages.yml:
--------------------------------------------------------------------------------
1 | 
2 | packages:
3 |   - local: ../../
4 |   - local: ../../dbt-utils
5 |   - local: ../../dbt-utils/integration_tests
6 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/array_concat.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__array_concat(array_1, array_2) -%}
2 |     concat({{ array_1 }}, {{ array_2 }})
3 | {%- endmacro %}


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/array_construct.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__array_construct(inputs, data_type) -%}
2 |     array( {{ inputs|join(' , ') }} )
3 | {%- endmacro %} 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/cast_array_to_string.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__cast_array_to_string(array) %}
2 |     '['||concat_ws(',', {{ array }})||']'
3 | {% endmacro %}


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/concat.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__concat(fields) -%}
2 |     {{ return(adapter.dispatch('concat', 'dbt')(fields)) }}
3 | {%- endmacro %}
4 | 


--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: 'spark_utils'
2 | profile: 'sparkutils'
3 | version: '0.3.0'
4 | config-version: 2
5 | require-dbt-version: [">=1.2.0", "<3.0.0"]
6 | macro-paths: ["macros"]
7 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dbt-utils"]
2 | 	path = dbt-utils
3 | 	url = https://github.com/dbt-labs/dbt-utils
4 | [submodule "snowplow"]
5 | 	path = snowplow
6 | 	url = https://github.com/dbt-labs/snowplow
7 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/array_append.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__array_append(array, new_element) -%}
2 |     {{ dbt_utils.array_concat(array, dbt_utils.array_construct([new_element])) }}
3 | {%- endmacro %}


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql:
--------------------------------------------------------------------------------
1 | {% macro assert_not_null(function, arg) -%}
2 |   {{ return(adapter.dispatch('assert_not_null', 'dbt')(function, arg)) }}
3 | {%- endmacro %}
4 | 


--------------------------------------------------------------------------------
/macros/snowplow/convert_timezone.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__convert_timezone(in_tz, out_tz, in_timestamp) %}
2 |     from_utc_timestamp(to_utc_timestamp({{in_timestamp}}, {{in_tz}}), {{out_tz}})
3 | {% endmacro %}
4 | 


--------------------------------------------------------------------------------
/profiles.yml:
--------------------------------------------------------------------------------
1 | sparkutils:
2 |   target: test
3 |   outputs:
4 |     test:
5 |       type: spark
6 |       method: session
7 |       schema: test
8 |       host: NA        # not used, but required by `dbt-spark`
9 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/current_timestamp.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__current_timestamp() %}
2 |     current_timestamp()
3 | {% endmacro %}
4 | 
5 | 
6 | {% macro spark__current_timestamp_in_utc() %}
7 |     unix_timestamp()
8 | {% endmacro %}
9 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/split_part.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__split_part(string_text, delimiter_text, part_number) %}
2 |   {{ return(adapter.dispatch('split_part', 'dbt')(string_text, delimiter_text, part_number)) }}
3 | {% endmacro %}
4 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/datatypes.sql:
--------------------------------------------------------------------------------
1 | {# numeric  ------------------------------------------------     #}
2 | 
3 | {% macro spark__type_numeric() %}
4 |     {{ return(adapter.dispatch('type_numeric', 'dbt')()) }}
5 | {% endmacro %}
6 | 


--------------------------------------------------------------------------------
/integration_tests/snowplow/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | test-databricks:
3 | 	dbt deps
4 | 	dbt seed --target databricks --full-refresh
5 | 	dbt run --target databricks --full-refresh --vars 'update: false'
6 | 	dbt run --target databricks --vars 'update: true'
7 | 	dbt test --target databricks
8 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pyodbc==4.0.32
3 | git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
4 | git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
5 | git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC,session]
6 | pytest-spark~=0.6.0
7 | pytest-dbt-core~=0.1.0
8 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | filterwarnings =
 3 |     ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning
 4 |     ignore:unclosed file .*:ResourceWarning
 5 | env_files =
 6 |     test.env
 7 | testpaths =
 8 |     tests/functional
 9 | spark_options =
10 |     spark.app.name: spark-utils
11 |     spark.executor.instances: 1
12 |     spark.sql.catalogImplementation: in-memory
13 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_utils/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | test-spark:
 3 | 	dbt deps
 4 | 	dbt seed --target spark --full-refresh --no-version-check
 5 | 	dbt run --target spark --full-refresh --no-version-check
 6 | 	dbt test --target spark --no-version-check
 7 | 
 8 | test-databricks:
 9 | 	dbt deps
10 | 	dbt seed --target databricks --full-refresh
11 | 	dbt run --target databricks --full-refresh
12 | 	dbt test --target databricks
13 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/datediff.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__datediff(first_date, second_date, datepart) %}
2 |     -- dispatch here gets very very confusing
3 |     -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff()
4 |     -- {{ assert_not_null() }}
5 |     {{ return(adapter.dispatch('datediff', 'dbt')(first_date, second_date, datepart)) }}
6 | {% endmacro %}
7 | 


--------------------------------------------------------------------------------
/macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql:
--------------------------------------------------------------------------------
1 | {% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %}
2 |     -- dispatch here gets very very confusing
3 |     -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff()
4 |     -- {{ assert_not_null() }}
5 |     {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }}
6 | {% endmacro %}
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # spark-utils v0.3.0
 2 | This release supports any version (minor and patch) of v1, which means far less need for compatibility releases in the future.
 3 | 
 4 | ### Features
 5 | - Add macros for common maintenance operations ([#18](https://github.com/dbt-labs/spark-utils/pull/18))
 6 | 
 7 | ### Contributors
 8 | - [@JCZuurmond](https://github.com/JCZuurmond) ([#18](https://github.com/dbt-labs/dbt-external-tables/pull/18))
 9 | - [@NielsZeilemaker](https://github.com/NielsZeilemaker) ([#18](https://github.com/dbt-labs/dbt-external-tables/pull/18))
10 | 
11 | # spark-utils v0.2.4
12 | 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉)
13 | 


--------------------------------------------------------------------------------
/integration_tests/snowplow/models/sessions_expected.sql:
--------------------------------------------------------------------------------
 1 | {{config(enabled=snowplow.is_adapter('default'))}}
 2 | 
 3 | select
 4 |     user_custom_id,
 5 |     inferred_user_id,
 6 |     user_snowplow_domain_id,
 7 |     user_snowplow_crossdomain_id,
 8 |     app_id,
 9 |     first_page_url,
10 |     marketing_medium,
11 |     marketing_source,
12 |     marketing_term,
13 |     marketing_campaign,
14 |     marketing_content,
15 |     referer_url,
16 |     to_timestamp(session_start) as session_start,
17 |     to_timestamp(session_end) as session_end,
18 |     session_id,
19 |     time_engaged_in_s,
20 |     session_index,
21 |     first_test_add_col,
22 |     last_test_add_col
23 | 
24 | from {{ ref('snowplow_sessions_expected') }}
25 | 


--------------------------------------------------------------------------------
/integration_tests/snowplow/models/page_views_expected.sql:
--------------------------------------------------------------------------------
 1 | {{config(enabled=snowplow.is_adapter('default'))}}
 2 | 
 3 | select
 4 | 
 5 |     user_custom_id,
 6 |     user_snowplow_domain_id,
 7 |     user_snowplow_crossdomain_id,
 8 |     session_id,
 9 |     session_index,
10 |     page_view_id,
11 |     to_timestamp(page_view_start) as page_view_start,
12 |     to_timestamp(page_view_end) as page_view_end,
13 |     time_engaged_in_s,
14 |     horizontal_percentage_scrolled,
15 |     vertical_percentage_scrolled,
16 |     page_url,
17 |     marketing_medium,
18 |     marketing_source,
19 |     marketing_term,
20 |     marketing_content,
21 |     marketing_campaign,
22 |     test_add_col
23 | 
24 | from {{ ref('snowplow_page_views_expected') }}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |     - main
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744  # actions/checkout@v3
14 | 
15 |      - name: Set up Python 3.9
16 |        uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c  # actions/setup-python@v4
17 |        with:
18 |          python-version: 3.9
19 | 
20 |      - name: Install dependencies
21 |        shell: bash
22 |        run: |
23 |          sudo apt-get install libsasl2-dev
24 |          python -m pip install --upgrade pip
25 |          python -m pip install -r dev-requirements.txt
26 | 
27 |      - name: Run unit tests
28 |        shell: bash
29 |        run: DBT_PROFILES_DIR=$PWD pytest tests/unit
30 | 


--------------------------------------------------------------------------------
/tests/unit/test_macros.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | 
 3 | import pytest
 4 | from dbt.clients.jinja import MacroGenerator
 5 | from pyspark.sql import SparkSession
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def simple_table(spark_session: SparkSession) -> str:
10 |     """Create and delete a simple table used for testing."""
11 |     table_name = f"default.table_{uuid.uuid4()}".replace("-", "_")
12 |     spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet")
13 |     yield table_name
14 |     spark_session.sql(f"DROP TABLE IF EXISTS {table_name}")
15 | 
16 | 
17 | @pytest.mark.parametrize(
18 |     "macro_generator", ["macro.spark_utils.get_tables"], indirect=True
19 | )
20 | def test_create_table(
21 |     macro_generator: MacroGenerator, simple_table: str
22 | ) -> None:
23 |     """The `get_tables` macro should return the created table."""
24 |     tables = macro_generator()
25 |     assert simple_table in tables
26 | 
27 | 


--------------------------------------------------------------------------------
/integration_tests/ci/sample.profiles.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # HEY! This file is used in the spark_utils integrations tests with CircleCI.
 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :)
 4 | 
 5 | config:
 6 |     send_anonymous_usage_stats: False
 7 |     use_colors: True
 8 | 
 9 | integration_tests:
10 |   target: spark
11 |   outputs:
12 |     spark:
13 |       type: spark
14 |       method: thrift
15 |       schema: spark_utils_dbt_utils_integration_tests
16 |       host: "{{ env_var('SPARK_TEST_HOST') }}"
17 |       port: "{{ env_var('SPARK_TEST_PORT') | as_number }}"
18 |       user: "{{ env_var('SPARK_TEST_USER') }}"
19 |       connect_retries: 5
20 |       connect_timeout: 60
21 |       threads: 5
22 | 
23 |     databricks-utils: &databricks-odbc
24 |       type: spark
25 |       host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}"
26 |       endpoint: "{{ env_var('DBT_DATABRICKS_ENDPOINT') }}"
27 |       token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}"
28 |       method: odbc
29 |       driver: "{{ env_var('ODBC_DRIVER') }}"
30 |       port: 443
31 |       threads: 5
32 |       schema: spark_utils_dbt_utils_integration_tests
33 |       
34 |     databricks-snowplow:
35 |       <<: *databricks-odbc
36 |       schema: spark_utils_snowplow_integration_tests
37 | 


--------------------------------------------------------------------------------
/integration_tests/snowplow/models/base_event.sql:
--------------------------------------------------------------------------------
 1 | {%- set cols = adapter.get_columns_in_relation(ref('sp_event')) -%}
 2 | {%- set col_list = [] -%}
 3 | 
 4 | {% set type_overrides = {
 5 |     "br_cookies": "boolean",
 6 |     "br_features_director": "boolean",
 7 |     "br_features_flash": "boolean",
 8 |     "br_features_gears": "boolean",
 9 |     "br_features_java": "boolean",
10 |     "br_features_pdf": "boolean",
11 |     "br_features_quicktime": "boolean",
12 |     "br_features_realplayer": "boolean",
13 |     "br_features_silverlight": "boolean",
14 |     "br_features_windowsmedia": "boolean",
15 |     "collector_tstamp": "timestamp",
16 |     "derived_tstamp": "timestamp",
17 |     "dvce_ismobile": "boolean"
18 | } %}
19 | 
20 | {%- for col in cols -%}
21 |     {%- set col_statement -%}
22 |     {%- if col.column in type_overrides.keys() %}
23 |     cast({{col.column}} as {{type_overrides[col.column]}}) as {{col.column}}
24 |     {% else %}
25 |     {{col.column}}
26 |     {% endif -%}
27 |     {%- endset -%}
28 |     {%- do col_list.append(col_statement) -%}
29 | {%- endfor -%}
30 | 
31 | {%- set col_list_csv = col_list|join(',') -%}
32 | 
33 | select {{col_list_csv}} from {{ ref('sp_event') }}
34 | 
35 | {% if var('update', False) %}
36 | 
37 |     union all
38 | 
39 |     select {{col_list_csv}} from {{ ref('sp_event_update') }}
40 | 
41 | {% endif %}
42 | 


--------------------------------------------------------------------------------
/integration_tests/dbt_utils/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'spark_utils_dbt_utils_integration_tests'
 2 | version: '1.0'
 3 | config-version: 2
 4 | 
 5 | profile: 'integration_tests'
 6 | 
 7 | analysis-paths: ["analysis"]
 8 | test-paths: ["tests"]
 9 | macro-paths: ["macros"]
10 | clean-targets:         # directories to be removed by `dbt clean`
11 |   - "target"
12 |   - "dbt_modules"
13 | dispatch:
14 |   - macro_namespace: dbt_utils
15 |     search_order:
16 |       - spark_utils
17 |       - dbt_utils_integration_tests
18 |       - dbt_utils
19 | 
20 | seeds:
21 |   dbt_utils_integration_tests:
22 |     +file_format: delta
23 | 
24 | models:
25 |   dbt_utils_integration_tests:
26 |     +file_format: delta
27 | 
28 |     sql:
29 |       # macro doesn't work for this integration test (schema pattern)
30 |       test_get_relations_by_pattern:
31 |         +enabled: false
32 |       # integration test doesn't work
33 |       test_groupby:
34 |         +enabled: false
35 |       # integration test doesn't work
36 |       test_pivot_apostrophe:
37 |         +enabled: false
38 |     generic_tests:
39 |       # default version of this integration test uses an explicit cast to 'datetime'
40 |       # which SparkSQL does not support. override with our own version
41 |       test_recency:
42 |         +enabled: false
43 |     cross_db_utils:
44 |       # integration test doesn't work
45 |       test_any_value:
46 |         +enabled: false
47 | 
48 | tests:
49 |   dbt_utils_integration_tests:
50 |     cross_db_utils:
51 |       # expect exactly two failures
52 |       # (both use "order by", which isn't supported in SparkSQL)
53 |       assert_equal_test_listagg_actual__expected:
54 |         +error_if: ">2"
55 | flags:
56 |   require_generic_test_arguments_property: true
57 | seed-paths: ["data"]
58 | model-paths: ["models"]


--------------------------------------------------------------------------------
/macros/dbt_utils/sql/get_relations_by_prefix.sql:
--------------------------------------------------------------------------------
 1 | {% macro spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %}
 2 | 
 3 |     {%- call statement('get_tables', fetch_result=True) %}
 4 | 
 5 |         show table extended in {{ schema_pattern }} like '{{ table_pattern }}'
 6 | 
 7 |     {%- endcall -%}
 8 | 
 9 |     {%- set table_list = load_result('get_tables') -%}
10 | 
11 |     {%- if table_list and table_list['table'] -%}
12 |     {%- set tbl_relations = [] -%}
13 |     {%- for row in table_list['table'] -%}
14 |         {%- set tbl_relation = api.Relation.create(
15 |             database=None,
16 |             schema=row[0],
17 |             identifier=row[1],
18 |             type=('view' if 'Type: VIEW' in row[3] else 'table')
19 |         ) -%}
20 |         {%- do tbl_relations.append(tbl_relation) -%}
21 |     {%- endfor -%}
22 | 
23 |     {{ return(tbl_relations) }}
24 |     {%- else -%}
25 |     {{ return([]) }}
26 |     {%- endif -%}
27 | 
28 | {% endmacro %}
29 | 
30 | {% macro spark__get_relations_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database) %}
31 |     {% set table_pattern = table_pattern ~ '*' %}
32 |     {{ return(spark_utils.spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database)) }}
33 | {% endmacro %}
34 | 
35 | {% macro spark__get_tables_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %}
36 |     {{ return(spark_utils.spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database)) }}
37 | {% endmacro %}
38 | 
39 | {% macro spark__get_tables_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database) %}
40 |     {{ return(spark_utils.spark__get_relations_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database)) }}
41 | {% endmacro %}
42 | 


--------------------------------------------------------------------------------
/integration_tests/snowplow/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | name: 'spark_utils_snowplow_integration_tests'
 2 | version: '1.0'
 3 | config-version: 2
 4 | 
 5 | profile: 'integration_tests'
 6 | 
 7 | analysis-paths: ["analysis"]
 8 | test-paths: ["tests"]
 9 | macro-paths: ["macros"]
10 | clean-targets:         # directories to be removed by `dbt clean`
11 |   - "target"
12 |   - "dbt_modules"
13 | 
14 | dispatch:
15 |   - macro_namespace: dbt_utils
16 |     search_order: ['spark_utils', 'dbt_utils']
17 |   - macro_namespace: snowplow
18 |     search_order: ['spark_utils', 'snowplow']
19 | 
20 | models:
21 |   snowplow_integration_tests:
22 |     pre:
23 |       default:
24 |         base_event:
25 |           +enabled: false
26 |     post:
27 |       page_views:
28 |         default:
29 |           page_views_expected:
30 |             +enabled: false
31 |       sessions:
32 |         default:
33 |           sessions_expected:
34 |             +enabled: false
35 | 
36 |   +incremental_strategy: merge
37 |   +file_format: delta
38 | vars:
39 |   'snowplow:timezone': 'America/New_York'
40 |   'snowplow:events': '{{ ref("base_event") }}'
41 |   'snowplow:context:web_page': '{{ ref("base_web_page") }}'
42 |   'snowplow:context:performance_timing': false
43 |   'snowplow:context:useragent': false
44 |   'snowplow:pass_through_columns': ['test_add_col']
45 | seeds:
46 |   snowplow_integration_tests:
47 |     event:
48 |       +column_types:
49 |         br_cookies: string
50 |         br_features_director: string
51 |         br_features_flash: string
52 |         br_features_gears: string
53 |         br_features_java: string
54 |         br_features_pdf: string
55 |         br_features_quicktime: string
56 |         br_features_realplayer: string
57 |         br_features_silverlight: string
58 |         br_features_windowsmedia: string
59 |         collector_tstamp: string
60 |         derived_tstamp: string
61 |         dvce_ismobile: string
62 |     expected:
63 |       snowplow_page_views_expected:
64 |         +column_types:
65 |           page_view_start: string
66 |           page_view_end: string
67 |       snowplow_sessions_expected:
68 |         +column_types:
69 |           session_start: string
70 |           session_end: string
71 |   +quote_columns: false
72 | flags:
73 |   require_generic_test_arguments_property: true
74 | seed-paths: ["data"]
75 | model-paths: ["models"]


--------------------------------------------------------------------------------
/tests/functional/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from dbt.tests.util import run_dbt
 4 | 
 5 | from dbt.tests.adapter.utils.base_utils import BaseUtils
 6 | from dbt.tests.adapter.utils.test_concat import BaseConcat
 7 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 8 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 9 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
10 | 
11 | from dbt.tests.adapter.utils.data_types.base_data_type_macro import BaseDataTypeMacro
12 | from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
13 | 
14 | 
15 | class BaseSparkUtilsBackCompat:
16 |     # install this repo as a package
17 |     @pytest.fixture(scope="class")
18 |     def packages(self):
19 |         return {
20 |             "packages": [
21 |                 {"local": os.getcwd()},
22 |                 {"git": "https://github.com/dbt-labs/dbt-utils"}
23 |             ]}
24 |     
25 |     @pytest.fixture(scope="class")
26 |     def project_config_update(self):
27 |         return {
28 |             "dispatch": [{
29 |                 "macro_namespace": "dbt_utils",
30 |                 "search_order": ["spark_utils", "dbt_utils"]
31 |             }]
32 |         }
33 | 
34 |     # call the macros from the 'dbt_utils' namespace
35 |     # instead of the unspecified / global namespace
36 |     def macro_namespace(self):
37 |         return "dbt_utils"
38 | 
39 | 
40 | class BaseSparkUtilsBackCompatUtil(BaseSparkUtilsBackCompat, BaseUtils):
41 |     # actual test sequence needs to run 'deps' first
42 |     def test_build_assert_equal(self, project):
43 |         run_dbt(['deps'])
44 |         super().test_build_assert_equal(project)
45 | 
46 | 
47 | class BaseSparkUtilsBackCompatDataType(BaseSparkUtilsBackCompat, BaseDataTypeMacro):
48 |     # actual test sequence needs to run 'deps' first
49 |     def test_check_types_assert_match(self, project):
50 |         run_dbt(['deps'])
51 |         super().test_check_types_assert_match(project)
52 | 
53 | 
54 | class TestConcat(BaseSparkUtilsBackCompatUtil, BaseConcat):
55 |     pass
56 | 
57 | 
58 | class TestDateAdd(BaseSparkUtilsBackCompatUtil, BaseDateAdd):
59 |     pass
60 | 
61 | 
62 | class TestDateDiff(BaseSparkUtilsBackCompatUtil, BaseDateDiff):
63 |     pass
64 | 
65 | 
66 | class TestSplitPart(BaseSparkUtilsBackCompatUtil, BaseSplitPart):
67 |     pass
68 | 
69 | 
70 | class TestTypeNumeric(BaseSparkUtilsBackCompatDataType, BaseTypeNumeric):
71 |     def numeric_fixture_type(self):
72 |         return "decimal(28,6)"
73 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | jobs:
 4 | 
 5 |   integration-dbt-utils-databricks: &databricks-odbc
 6 |     environment:
 7 |       DBT_INVOCATION_ENV: circle
 8 |       ODBC_DRIVER: Simba # TODO: move env var to Docker image
 9 |     docker:
10 |       # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed
11 |       - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest
12 |         aws_auth:
13 |           aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING
14 |           aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING
15 | 
16 |     steps:
17 |       - checkout
18 |       
19 |       - run: &pull-submodules
20 |           name: "Pull Submodules"
21 |           command: |
22 |             git submodule init
23 |             git submodule sync --recursive
24 |             git submodule foreach --recursive git fetch
25 |             git submodule update --init --recursive
26 | 
27 |       - run: &setup-dbt
28 |           name: "Setup dbt"
29 |           command: |
30 |             python3.8 -m venv venv
31 |             . venv/bin/activate
32 |             pip install --upgrade pip setuptools
33 |             pip install -r dev-requirements.txt
34 |             mkdir -p ~/.dbt
35 |             cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
36 | 
37 |       - run:
38 |           name: "Run Functional Tests"
39 |           command: |
40 |             . venv/bin/activate
41 |             python3 -m pytest tests/functional --profile databricks_sql_endpoint
42 | 
43 |       - run:
44 |           name: "Run Tests - dbt-utils"
45 |             
46 |           command: |
47 |             . venv/bin/activate
48 |             cd integration_tests/dbt_utils
49 |             dbt deps --target databricks-utils
50 |             dbt seed --target databricks-utils --full-refresh
51 |             dbt run --target databricks-utils --full-refresh
52 |             dbt test --target databricks-utils
53 | 
54 |       - store_artifacts:
55 |           path: ./logs
56 | 
57 |   integration-snowplow-databricks:
58 |     <<: *databricks-odbc
59 |     steps:
60 |       - checkout
61 |       - run: *pull-submodules
62 |       - run: *setup-dbt
63 | 
64 |       - run:  
65 |           name: "Run Tests - Snowplow"
66 |           command: |
67 |             . venv/bin/activate
68 |             cd integration_tests/snowplow
69 |             dbt deps --target databricks-snowplow
70 |             dbt seed --target databricks-snowplow --full-refresh
71 |             dbt run --target databricks-snowplow --full-refresh --vars 'update: false'
72 |             dbt run --target databricks-snowplow --vars 'update: true'
73 |             dbt test --target databricks-snowplow
74 | 
75 |       - store_artifacts:
76 |           path: ./logs
77 | 
78 | workflows:
79 |   version: 2
80 |   test-shims:
81 |     jobs:
82 |       - integration-dbt-utils-databricks:
83 |           context: aws-credentials
84 |       - integration-snowplow-databricks:
85 |           context: aws-credentials
86 | 


--------------------------------------------------------------------------------
/tests/functional/conftest.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | 
  4 | pytest_plugins = ["dbt.tests.fixtures.project"]
  5 | 
  6 | 
  7 | def pytest_addoption(parser):
  8 |     parser.addoption("--profile", action="store", default="apache_spark", type=str)
  9 | 
 10 | 
 11 | # Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type'
 12 | # autouse fixture below
 13 | def pytest_configure(config):
 14 |     config.addinivalue_line(
 15 |         "markers",
 16 |         "skip_profile(profile): skip test for the given profile",
 17 |     )
 18 | 
 19 | 
 20 | @pytest.fixture(scope="session")
 21 | def dbt_profile_target(request):
 22 |     profile_type = request.config.getoption("--profile")
 23 |     if profile_type == "databricks_cluster":
 24 |         target = databricks_cluster_target()
 25 |     elif profile_type == "databricks_sql_endpoint":
 26 |         target = databricks_sql_endpoint_target()
 27 |     elif profile_type == "apache_spark":
 28 |         target = apache_spark_target()
 29 |     elif profile_type == "databricks_http_cluster":
 30 |         target = databricks_http_cluster_target()
 31 |     elif profile_type == "spark_session":
 32 |         target = spark_session_target()
 33 |     else:
 34 |         raise ValueError(f"Invalid profile type '{profile_type}'")
 35 |     return target
 36 | 
 37 | 
 38 | def apache_spark_target():
 39 |     return {
 40 |         "type": "spark",
 41 |         "host": "localhost",
 42 |         "user": "dbt",
 43 |         "method": "thrift",
 44 |         "port": 10000,
 45 |         "connect_retries": 3,
 46 |         "connect_timeout": 5,
 47 |         "retry_all": True,
 48 |     }
 49 | 
 50 | 
 51 | def databricks_cluster_target():
 52 |     return {
 53 |         "type": "spark",
 54 |         "method": "odbc",
 55 |         "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
 56 |         "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"),
 57 |         "token": os.getenv("DBT_DATABRICKS_TOKEN"),
 58 |         "driver": os.getenv("ODBC_DRIVER"),
 59 |         "port": 443,
 60 |         "connect_retries": 3,
 61 |         "connect_timeout": 5,
 62 |         "retry_all": True,
 63 |     }
 64 | 
 65 | 
 66 | def databricks_sql_endpoint_target():
 67 |     return {
 68 |         "type": "spark",
 69 |         "method": "odbc",
 70 |         "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
 71 |         "endpoint": os.getenv("DBT_DATABRICKS_ENDPOINT"),
 72 |         "token": os.getenv("DBT_DATABRICKS_TOKEN"),
 73 |         "driver": os.getenv("ODBC_DRIVER"),
 74 |         "port": 443,
 75 |         "connect_retries": 3,
 76 |         "connect_timeout": 5,
 77 |         "retry_all": True,
 78 |     }
 79 | 
 80 | 
 81 | def databricks_http_cluster_target():
 82 |     return {
 83 |         "type": "spark",
 84 |         "host": os.getenv('DBT_DATABRICKS_HOST_NAME'),
 85 |         "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
 86 |         "token": os.getenv('DBT_DATABRICKS_TOKEN'),
 87 |         "method": "http",
 88 |         "port": 443,
 89 |         # more retries + longer timout to handle unavailability while cluster is restarting
 90 |         # return failures quickly in dev, retry all failures in CI (up to 5 min)
 91 |         "connect_retries": 5,
 92 |         "connect_timeout": 60, 
 93 |         "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
 94 |     }
 95 | 
 96 | 
 97 | def spark_session_target():
 98 |     return {
 99 |         "type": "spark",
100 |         "host": "localhost",
101 |         "method": "session",
102 |     }
103 | 
104 | 
105 | @pytest.fixture(autouse=True)
106 | def skip_by_profile_type(request):
107 |     profile_type = request.config.getoption("--profile")
108 |     if request.node.get_closest_marker("skip_profile"):
109 |         for skip_profile_type in request.node.get_closest_marker("skip_profile").args:
110 |             if skip_profile_type == profile_type:
111 |                 pytest.skip("skipped on '{profile_type}' profile")
112 | 


--------------------------------------------------------------------------------
/macros/maintenance_operation.sql:
--------------------------------------------------------------------------------
  1 | {% macro get_tables(table_regex_pattern='.*') %}
  2 | 
  3 |   {% set tables = [] %}
  4 |   {% for database in spark__list_schemas('not_used') %}
  5 |     {% for table in spark__list_relations_without_caching(database[0]) %}
  6 |       {% set db_tablename = database[0] ~ "." ~ table[1] %}
  7 |       {% set is_match = modules.re.match(table_regex_pattern, db_tablename) %}
  8 |       {% if is_match %}
  9 |         {% call statement('table_detail', fetch_result=True) -%}
 10 |           describe extended {{ db_tablename }}
 11 |         {% endcall %}
 12 | 
 13 |         {% set table_type = load_result('table_detail').table|reverse|selectattr(0, 'in', ('type', 'TYPE', 'Type'))|first %}
 14 |         {% if table_type[1]|lower != 'view' %}
 15 |           {{ tables.append(db_tablename) }}
 16 |         {% endif %}
 17 |       {% endif %}
 18 |     {% endfor %}
 19 |   {% endfor %}
 20 |   {{ return(tables) }}
 21 | 
 22 | {% endmacro %}
 23 | 
 24 | {% macro get_delta_tables(table_regex_pattern='.*') %}
 25 | 
 26 |   {% set delta_tables = [] %}
 27 |   {% for db_tablename in get_tables(table_regex_pattern) %}
 28 |     {% call statement('table_detail', fetch_result=True) -%}
 29 |       describe extended {{ db_tablename }}
 30 |     {% endcall %}
 31 | 
 32 |     {% set table_type = load_result('table_detail').table|reverse|selectattr(0, 'in', ('provider', 'PROVIDER', 'Provider'))|first %}
 33 |     {% if table_type[1]|lower == 'delta' %}
 34 |       {{ delta_tables.append(db_tablename) }}
 35 |     {% endif %}
 36 |   {% endfor %}
 37 |   {{ return(delta_tables) }}
 38 | 
 39 | {% endmacro %}
 40 | 
 41 | {% macro get_statistic_columns(table) %}
 42 | 
 43 |   {% call statement('input_columns', fetch_result=True) %}
 44 |     SHOW COLUMNS IN {{ table }}
 45 |   {% endcall %}
 46 |   {% set input_columns = load_result('input_columns').table %}
 47 | 
 48 |   {% set output_columns = [] %}
 49 |   {% for column in input_columns %}
 50 |     {% call statement('column_information', fetch_result=True) %}
 51 |       DESCRIBE TABLE {{ table }} `{{ column[0] }}`
 52 |     {% endcall %}
 53 |     {% if not load_result('column_information').table[1][1].startswith('struct') and not load_result('column_information').table[1][1].startswith('array')  %}
 54 |       {{ output_columns.append('`' ~ column[0] ~ '`') }}
 55 |     {% endif %}
 56 |   {% endfor %}
 57 |   {{ return(output_columns) }}
 58 | 
 59 | {% endmacro %}
 60 | 
 61 | {% macro spark_optimize_delta_tables(table_regex_pattern='.*') %}
 62 | 
 63 |   {% for table in get_delta_tables(table_regex_pattern) %}
 64 |     {% set start=modules.datetime.datetime.now() %}
 65 |     {% set message_prefix=loop.index ~ " of " ~ loop.length %}
 66 |     {{ dbt_utils.log_info(message_prefix ~ " Optimizing " ~ table) }}
 67 |     {% do run_query("optimize " ~ table) %}
 68 |     {% set end=modules.datetime.datetime.now() %}
 69 |     {% set total_seconds = (end - start).total_seconds() | round(2)  %}
 70 |     {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }}
 71 |   {% endfor %}
 72 | 
 73 | {% endmacro %}
 74 | 
 75 | {% macro spark_vacuum_delta_tables(table_regex_pattern='.*') %}
 76 | 
 77 |   {% for table in get_delta_tables(table_regex_pattern) %}
 78 |     {% set start=modules.datetime.datetime.now() %}
 79 |     {% set message_prefix=loop.index ~ " of " ~ loop.length %}
 80 |     {{ dbt_utils.log_info(message_prefix ~ " Vacuuming " ~ table) }}
 81 |     {% do run_query("vacuum " ~ table) %}
 82 |     {% set end=modules.datetime.datetime.now() %}
 83 |     {% set total_seconds = (end - start).total_seconds() | round(2)  %}
 84 |     {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }}
 85 |   {% endfor %}
 86 | 
 87 | {% endmacro %}
 88 | 
 89 | {% macro spark_analyze_tables(table_regex_pattern='.*') %}
 90 | 
 91 |   {% for table in get_tables(table_regex_pattern) %}
 92 |     {% set start=modules.datetime.datetime.now() %}
 93 |     {% set columns = get_statistic_columns(table) | join(',') %}
 94 |     {% set message_prefix=loop.index ~ " of " ~ loop.length %}
 95 |     {{ dbt_utils.log_info(message_prefix ~ " Analyzing " ~ table) }}
 96 |     {% if columns != '' %}
 97 |       {% do run_query("analyze table " ~ table ~ " compute statistics for columns " ~ columns) %}
 98 |     {% endif %}
 99 |     {% set end=modules.datetime.datetime.now() %}
100 |     {% set total_seconds = (end - start).total_seconds() | round(2)  %}
101 |     {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }}
102 |   {% endfor %}
103 | 
104 | {% endmacro %}
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | This [dbt](https://github.com/dbt-labs/dbt) package contains macros 
  2 | that:
  3 | - can be (re)used across dbt projects running on Spark
  4 | - define Spark-specific implementations of [dispatched macros](https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch) from other packages
  5 | 
  6 | ## Installation Instructions
  7 | 
  8 | Check [dbt Hub](https://hub.getdbt.com) for the latest installation 
  9 | instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) 
 10 | for more information on installing packages.
 11 | 
 12 | ----
 13 | 
 14 | ## Compatibility
 15 | 
 16 | This package provides "shims" for:
 17 | - [dbt_utils](https://github.com/dbt-labs/dbt-utils), except for:
 18 |     - `dbt_utils.get_relations_by_pattern`
 19 |     - `dbt_utils.groupby`
 20 |     - `dbt_utils.recency`
 21 |     - `dbt_utils.any_value`
 22 |     - `dbt_utils.listagg`
 23 |     - `dbt_utils.pivot` with apostrophe(s) in the `values` 
 24 | - [snowplow](https://github.com/dbt-labs/snowplow) (tested on Databricks only)
 25 | 
 26 | In order to use these "shims," you should set a `dispatch` config in your root project (on dbt v0.20.0 and newer). For example, with this project setting, dbt will first search for macro implementations inside the `spark_utils` package when resolving macros from the `dbt_utils` namespace:
 27 | ```
 28 | dispatch:
 29 |   - macro_namespace: dbt_utils
 30 |     search_order: ['spark_utils', 'dbt_utils']
 31 | ```
 32 | 
 33 | ### Note to maintainers of other packages
 34 | 
 35 | The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a dependency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to:
 36 | - Install `spark_utils` alongside your package
 37 | - Add a `dispatch` config in their root project, like the one above
 38 | 
 39 | ----
 40 | 
 41 | ## Useful macros: maintenance
 42 | 
 43 | _Caveat: These are not tested in CI, or guaranteed to work on all platforms._
 44 | 
 45 | Each of these macros accepts a regex pattern, finds tables with names matching the pattern, and will loop over those tables to perform a maintenance operation:
 46 | 
 47 | - `spark_optimize_delta_tables`: Runs `optimize` for all matched Delta tables
 48 | - `spark_vacuum_delta_tables`: Runs `vacuum` for all matched Delta tables
 49 | - `spark_analyze_tables`: Compute statistics for all matched tables
 50 | 
 51 | ----
 52 | 
 53 | ### Contributing
 54 | 
 55 | We welcome contributions to this repo! To contribute a new feature or a fix, 
 56 | please open a Pull Request with 1) your changes and 2) updated documentation for 
 57 | the `README.md` file.
 58 | 
 59 | ## Testing
 60 | 
 61 | The macros are tested with [`pytest`](https://docs.pytest.org) and
 62 | [`pytest-dbt-core`](https://pypi.org/project/pytest-dbt-core/). For example,
 63 | the [`create_tables` macro is tested](./tests/test_macros.py) by:
 64 | 
 65 | 1. Create a test table (test setup):
 66 |    ``` python
 67 |    spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet")
 68 |    ```
 69 | 2. Call the macro generator:
 70 |    ``` python
 71 |    tables = macro_generator()
 72 |    ```
 73 | 3. Assert test condition:
 74 |    ``` python
 75 |    assert simple_table in tables
 76 |    ```
 77 | 4. Delete the test table (test cleanup):
 78 |    ``` python
 79 |    spark_session.sql(f"DROP TABLE IF EXISTS {table_name}")
 80 |    ```
 81 | 
 82 | A macro is fetched using the 
 83 | [`macro_generator`](https://pytest-dbt-core.readthedocs.io/en/latest/dbt_spark.html#usage) 
 84 | fixture and providing the macro name trough 
 85 | [indirect parameterization](https://docs.pytest.org/en/7.1.x/example/parametrize.html?highlight=indirect#indirect-parametrization):
 86 | 
 87 | ``` python
 88 | @pytest.mark.parametrize(
 89 |     "macro_generator", ["macro.spark_utils.get_tables"], indirect=True
 90 | )
 91 | def test_create_table(macro_generator: MacroGenerator) -> None:
 92 | ```
 93 | 
 94 | ----
 95 | 
 96 | ### Getting started with dbt + Spark
 97 | 
 98 | - [What is dbt](https://docs.getdbt.com/docs/introduction)?
 99 | - [Installation](https://github.com/dbt-labs/dbt-spark)
100 | - Join the #spark channel in [dbt Slack](http://slack.getdbt.com/)
101 | 
102 | 
103 | ## Code of Conduct
104 | 
105 | Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, 
106 | and mailing lists is expected to follow the 
107 | [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/).
108 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction,
 10 | and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by
 13 | the copyright owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all
 16 | other entities that control, are controlled by, or are under common
 17 | control with that entity. For the purposes of this definition,
 18 | "control" means (i) the power, direct or indirect, to cause the
 19 | direction or management of such entity, whether by contract or
 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 | outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 | "You" (or "Your") shall mean an individual or Legal Entity
 24 | exercising permissions granted by this License.
 25 | 
 26 | "Source" form shall mean the preferred form for making modifications,
 27 | including but not limited to software source code, documentation
 28 | source, and configuration files.
 29 | 
 30 | "Object" form shall mean any form resulting from mechanical
 31 | transformation or translation of a Source form, including but
 32 | not limited to compiled object code, generated documentation,
 33 | and conversions to other media types.
 34 | 
 35 | "Work" shall mean the work of authorship, whether in Source or
 36 | Object form, made available under the License, as indicated by a
 37 | copyright notice that is included in or attached to the work
 38 | (an example is provided in the Appendix below).
 39 | 
 40 | "Derivative Works" shall mean any work, whether in Source or Object
 41 | form, that is based on (or derived from) the Work and for which the
 42 | editorial revisions, annotations, elaborations, or other modifications
 43 | represent, as a whole, an original work of authorship. For the purposes
 44 | of this License, Derivative Works shall not include works that remain
 45 | separable from, or merely link (or bind by name) to the interfaces of,
 46 | the Work and Derivative Works thereof.
 47 | 
 48 | "Contribution" shall mean any work of authorship, including
 49 | the original version of the Work and any modifications or additions
 50 | to that Work or Derivative Works thereof, that is intentionally
 51 | submitted to Licensor for inclusion in the Work by the copyright owner
 52 | or by an individual or Legal Entity authorized to submit on behalf of
 53 | the copyright owner. For the purposes of this definition, "submitted"
 54 | means any form of electronic, verbal, or written communication sent
 55 | to the Licensor or its representatives, including but not limited to
 56 | communication on electronic mailing lists, source code control systems,
 57 | and issue tracking systems that are managed by, or on behalf of, the
 58 | Licensor for the purpose of discussing and improving the Work, but
 59 | excluding communication that is conspicuously marked or otherwise
 60 | designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 | "Contributor" shall mean Licensor and any individual or Legal Entity
 63 | on behalf of whom a Contribution has been received by Licensor and
 64 | subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 | this License, each Contributor hereby grants to You a perpetual,
 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 | copyright license to reproduce, prepare Derivative Works of,
 70 | publicly display, publicly perform, sublicense, and distribute the
 71 | Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 | this License, each Contributor hereby grants to You a perpetual,
 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 | (except as stated in this section) patent license to make, have made,
 77 | use, offer to sell, sell, import, and otherwise transfer the Work,
 78 | where such license applies only to those patent claims licensable
 79 | by such Contributor that are necessarily infringed by their
 80 | Contribution(s) alone or by combination of their Contribution(s)
 81 | with the Work to which such Contribution(s) was submitted. If You
 82 | institute patent litigation against any entity (including a
 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 | or a Contribution incorporated within the Work constitutes direct
 85 | or contributory patent infringement, then any patent licenses
 86 | granted to You under this License for that Work shall terminate
 87 | as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 | Work or Derivative Works thereof in any medium, with or without
 91 | modifications, and in Source or Object form, provided that You
 92 | meet the following conditions:
 93 | 
 94 | (a) You must give any other recipients of the Work or
 95 | Derivative Works a copy of this License; and
 96 | 
 97 | (b) You must cause any modified files to carry prominent notices
 98 | stating that You changed the files; and
 99 | 
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 | 
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 | 
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!)  The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------