├── .github ├── CODEOWNERS └── workflows │ └── workflow.yml ├── integration_tests ├── dbt_project.yml ├── dbt_utils │ ├── models │ │ └── test_recency.sql │ ├── packages.yml │ ├── Makefile │ └── dbt_project.yml ├── snowplow │ ├── packages.yml │ ├── Makefile │ ├── models │ │ ├── sessions_expected.sql │ │ ├── page_views_expected.sql │ │ └── base_event.sql │ └── dbt_project.yml └── ci │ └── sample.profiles.yml ├── .gitignore ├── macros ├── dbt_utils │ ├── cross_db_utils │ │ ├── array_concat.sql │ │ ├── array_construct.sql │ │ ├── cast_array_to_string.sql │ │ ├── deprecated │ │ │ ├── concat.sql │ │ │ ├── assert_not_null.sql │ │ │ ├── split_part.sql │ │ │ ├── datatypes.sql │ │ │ ├── datediff.sql │ │ │ └── dateadd.sql │ │ ├── array_append.sql │ │ └── current_timestamp.sql │ └── sql │ │ └── get_relations_by_prefix.sql ├── snowplow │ └── convert_timezone.sql └── maintenance_operation.sql ├── dbt_project.yml ├── .gitmodules ├── profiles.yml ├── dev-requirements.txt ├── pytest.ini ├── CHANGELOG.md ├── tests ├── unit │ └── test_macros.py └── functional │ ├── test_utils.py │ └── conftest.py ├── .circleci └── config.yml ├── README.md └── LICENSE /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @dbt-labs/dx 2 | -------------------------------------------------------------------------------- /integration_tests/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'spark_utils_integration_tests' 2 | version: '0.1.0' 3 | config-version: 2 4 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/models/test_recency.sql: -------------------------------------------------------------------------------- 1 | select 2 | {{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as today 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /**/target/ 3 | /**/dbt_modules/ 4 | /**/dbt_packages/ 5 | /**/logs/ 6 | /**/env/ 7 | /**/__pycache__/ 8 | test.env 9 | dbt_internal_packages/ -------------------------------------------------------------------------------- /integration_tests/snowplow/packages.yml: -------------------------------------------------------------------------------- 1 | 2 | packages: 3 | - local: ../../ 4 | - local: ../../snowplow 5 | - local: ../../snowplow/integration_tests 6 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/packages.yml: -------------------------------------------------------------------------------- 1 | 2 | packages: 3 | - local: ../../ 4 | - local: ../../dbt-utils 5 | - local: ../../dbt-utils/integration_tests 6 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/array_concat.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__array_concat(array_1, array_2) -%} 2 | concat({{ array_1 }}, {{ array_2 }}) 3 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/array_construct.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__array_construct(inputs, data_type) -%} 2 | array( {{ inputs|join(' , ') }} ) 3 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/cast_array_to_string.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__cast_array_to_string(array) %} 2 | '['||concat_ws(',', {{ array }})||']' 3 | {% endmacro %} -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/concat.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__concat(fields) -%} 2 | {{ return(adapter.dispatch('concat', 'dbt')(fields)) }} 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'spark_utils' 2 | profile: 'sparkutils' 3 | version: '0.3.0' 4 | config-version: 2 5 | require-dbt-version: [">=1.2.0", "<3.0.0"] 6 | macro-paths: ["macros"] 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dbt-utils"] 2 | path = dbt-utils 3 | url = https://github.com/dbt-labs/dbt-utils 4 | [submodule "snowplow"] 5 | path = snowplow 6 | url = https://github.com/dbt-labs/snowplow 7 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/array_append.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__array_append(array, new_element) -%} 2 | {{ dbt_utils.array_concat(array, dbt_utils.array_construct([new_element])) }} 3 | {%- endmacro %} -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/assert_not_null.sql: -------------------------------------------------------------------------------- 1 | {% macro assert_not_null(function, arg) -%} 2 | {{ return(adapter.dispatch('assert_not_null', 'dbt')(function, arg)) }} 3 | {%- endmacro %} 4 | -------------------------------------------------------------------------------- /macros/snowplow/convert_timezone.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__convert_timezone(in_tz, out_tz, in_timestamp) %} 2 | from_utc_timestamp(to_utc_timestamp({{in_timestamp}}, {{in_tz}}), {{out_tz}}) 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /profiles.yml: -------------------------------------------------------------------------------- 1 | sparkutils: 2 | target: test 3 | outputs: 4 | test: 5 | type: spark 6 | method: session 7 | schema: test 8 | host: NA # not used, but required by `dbt-spark` 9 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/current_timestamp.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__current_timestamp() %} 2 | current_timestamp() 3 | {% endmacro %} 4 | 5 | 6 | {% macro spark__current_timestamp_in_utc() %} 7 | unix_timestamp() 8 | {% endmacro %} 9 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/split_part.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__split_part(string_text, delimiter_text, part_number) %} 2 | {{ return(adapter.dispatch('split_part', 'dbt')(string_text, delimiter_text, part_number)) }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/datatypes.sql: -------------------------------------------------------------------------------- 1 | {# numeric ------------------------------------------------ #} 2 | 3 | {% macro spark__type_numeric() %} 4 | {{ return(adapter.dispatch('type_numeric', 'dbt')()) }} 5 | {% endmacro %} 6 | -------------------------------------------------------------------------------- /integration_tests/snowplow/Makefile: -------------------------------------------------------------------------------- 1 | 2 | test-databricks: 3 | dbt deps 4 | dbt seed --target databricks --full-refresh 5 | dbt run --target databricks --full-refresh --vars 'update: false' 6 | dbt run --target databricks --vars 'update: true' 7 | dbt test --target databricks 8 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pyodbc==4.0.32 3 | git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core 4 | git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter 5 | git+https://github.com/dbt-labs/dbt-spark.git#egg=dbt-spark[ODBC,session] 6 | pytest-spark~=0.6.0 7 | pytest-dbt-core~=0.1.0 8 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning 4 | ignore:unclosed file .*:ResourceWarning 5 | env_files = 6 | test.env 7 | testpaths = 8 | tests/functional 9 | spark_options = 10 | spark.app.name: spark-utils 11 | spark.executor.instances: 1 12 | spark.sql.catalogImplementation: in-memory 13 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/Makefile: -------------------------------------------------------------------------------- 1 | 2 | test-spark: 3 | dbt deps 4 | dbt seed --target spark --full-refresh --no-version-check 5 | dbt run --target spark --full-refresh --no-version-check 6 | dbt test --target spark --no-version-check 7 | 8 | test-databricks: 9 | dbt deps 10 | dbt seed --target databricks --full-refresh 11 | dbt run --target databricks --full-refresh 12 | dbt test --target databricks 13 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/datediff.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__datediff(first_date, second_date, datepart) %} 2 | -- dispatch here gets very very confusing 3 | -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff() 4 | -- {{ assert_not_null() }} 5 | {{ return(adapter.dispatch('datediff', 'dbt')(first_date, second_date, datepart)) }} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /macros/dbt_utils/cross_db_utils/deprecated/dateadd.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %} 2 | -- dispatch here gets very very confusing 3 | -- we just need to hint to dbt that this is a required macro for resolving dbt.spark__datediff() 4 | -- {{ assert_not_null() }} 5 | {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }} 6 | {% endmacro %} 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # spark-utils v0.3.0 2 | This release supports any version (minor and patch) of v1, which means far less need for compatibility releases in the future. 3 | 4 | ### Features 5 | - Add macros for common maintenance operations ([#18](https://github.com/dbt-labs/spark-utils/pull/18)) 6 | 7 | ### Contributors 8 | - [@JCZuurmond](https://github.com/JCZuurmond) ([#18](https://github.com/dbt-labs/dbt-external-tables/pull/18)) 9 | - [@NielsZeilemaker](https://github.com/NielsZeilemaker) ([#18](https://github.com/dbt-labs/dbt-external-tables/pull/18)) 10 | 11 | # spark-utils v0.2.4 12 | 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉) 13 | -------------------------------------------------------------------------------- /integration_tests/snowplow/models/sessions_expected.sql: -------------------------------------------------------------------------------- 1 | {{config(enabled=snowplow.is_adapter('default'))}} 2 | 3 | select 4 | user_custom_id, 5 | inferred_user_id, 6 | user_snowplow_domain_id, 7 | user_snowplow_crossdomain_id, 8 | app_id, 9 | first_page_url, 10 | marketing_medium, 11 | marketing_source, 12 | marketing_term, 13 | marketing_campaign, 14 | marketing_content, 15 | referer_url, 16 | to_timestamp(session_start) as session_start, 17 | to_timestamp(session_end) as session_end, 18 | session_id, 19 | time_engaged_in_s, 20 | session_index, 21 | first_test_add_col, 22 | last_test_add_col 23 | 24 | from {{ ref('snowplow_sessions_expected') }} 25 | -------------------------------------------------------------------------------- /integration_tests/snowplow/models/page_views_expected.sql: -------------------------------------------------------------------------------- 1 | {{config(enabled=snowplow.is_adapter('default'))}} 2 | 3 | select 4 | 5 | user_custom_id, 6 | user_snowplow_domain_id, 7 | user_snowplow_crossdomain_id, 8 | session_id, 9 | session_index, 10 | page_view_id, 11 | to_timestamp(page_view_start) as page_view_start, 12 | to_timestamp(page_view_end) as page_view_end, 13 | time_engaged_in_s, 14 | horizontal_percentage_scrolled, 15 | vertical_percentage_scrolled, 16 | page_url, 17 | marketing_medium, 18 | marketing_source, 19 | marketing_term, 20 | marketing_content, 21 | marketing_campaign, 22 | test_add_col 23 | 24 | from {{ ref('snowplow_page_views_expected') }} 25 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3 14 | 15 | - name: Set up Python 3.9 16 | uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # actions/setup-python@v4 17 | with: 18 | python-version: 3.9 19 | 20 | - name: Install dependencies 21 | shell: bash 22 | run: | 23 | sudo apt-get install libsasl2-dev 24 | python -m pip install --upgrade pip 25 | python -m pip install -r dev-requirements.txt 26 | 27 | - name: Run unit tests 28 | shell: bash 29 | run: DBT_PROFILES_DIR=$PWD pytest tests/unit 30 | -------------------------------------------------------------------------------- /tests/unit/test_macros.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import pytest 4 | from dbt.clients.jinja import MacroGenerator 5 | from pyspark.sql import SparkSession 6 | 7 | 8 | @pytest.fixture 9 | def simple_table(spark_session: SparkSession) -> str: 10 | """Create and delete a simple table used for testing.""" 11 | table_name = f"default.table_{uuid.uuid4()}".replace("-", "_") 12 | spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") 13 | yield table_name 14 | spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "macro_generator", ["macro.spark_utils.get_tables"], indirect=True 19 | ) 20 | def test_create_table( 21 | macro_generator: MacroGenerator, simple_table: str 22 | ) -> None: 23 | """The `get_tables` macro should return the created table.""" 24 | tables = macro_generator() 25 | assert simple_table in tables 26 | 27 | -------------------------------------------------------------------------------- /integration_tests/ci/sample.profiles.yml: -------------------------------------------------------------------------------- 1 | 2 | # HEY! This file is used in the spark_utils integrations tests with CircleCI. 3 | # You should __NEVER__ check credentials into version control. Thanks for reading :) 4 | 5 | config: 6 | send_anonymous_usage_stats: False 7 | use_colors: True 8 | 9 | integration_tests: 10 | target: spark 11 | outputs: 12 | spark: 13 | type: spark 14 | method: thrift 15 | schema: spark_utils_dbt_utils_integration_tests 16 | host: "{{ env_var('SPARK_TEST_HOST') }}" 17 | port: "{{ env_var('SPARK_TEST_PORT') | as_number }}" 18 | user: "{{ env_var('SPARK_TEST_USER') }}" 19 | connect_retries: 5 20 | connect_timeout: 60 21 | threads: 5 22 | 23 | databricks-utils: &databricks-odbc 24 | type: spark 25 | host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}" 26 | endpoint: "{{ env_var('DBT_DATABRICKS_ENDPOINT') }}" 27 | token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}" 28 | method: odbc 29 | driver: "{{ env_var('ODBC_DRIVER') }}" 30 | port: 443 31 | threads: 5 32 | schema: spark_utils_dbt_utils_integration_tests 33 | 34 | databricks-snowplow: 35 | <<: *databricks-odbc 36 | schema: spark_utils_snowplow_integration_tests 37 | -------------------------------------------------------------------------------- /integration_tests/snowplow/models/base_event.sql: -------------------------------------------------------------------------------- 1 | {%- set cols = adapter.get_columns_in_relation(ref('sp_event')) -%} 2 | {%- set col_list = [] -%} 3 | 4 | {% set type_overrides = { 5 | "br_cookies": "boolean", 6 | "br_features_director": "boolean", 7 | "br_features_flash": "boolean", 8 | "br_features_gears": "boolean", 9 | "br_features_java": "boolean", 10 | "br_features_pdf": "boolean", 11 | "br_features_quicktime": "boolean", 12 | "br_features_realplayer": "boolean", 13 | "br_features_silverlight": "boolean", 14 | "br_features_windowsmedia": "boolean", 15 | "collector_tstamp": "timestamp", 16 | "derived_tstamp": "timestamp", 17 | "dvce_ismobile": "boolean" 18 | } %} 19 | 20 | {%- for col in cols -%} 21 | {%- set col_statement -%} 22 | {%- if col.column in type_overrides.keys() %} 23 | cast({{col.column}} as {{type_overrides[col.column]}}) as {{col.column}} 24 | {% else %} 25 | {{col.column}} 26 | {% endif -%} 27 | {%- endset -%} 28 | {%- do col_list.append(col_statement) -%} 29 | {%- endfor -%} 30 | 31 | {%- set col_list_csv = col_list|join(',') -%} 32 | 33 | select {{col_list_csv}} from {{ ref('sp_event') }} 34 | 35 | {% if var('update', False) %} 36 | 37 | union all 38 | 39 | select {{col_list_csv}} from {{ ref('sp_event_update') }} 40 | 41 | {% endif %} 42 | -------------------------------------------------------------------------------- /integration_tests/dbt_utils/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'spark_utils_dbt_utils_integration_tests' 2 | version: '1.0' 3 | config-version: 2 4 | 5 | profile: 'integration_tests' 6 | 7 | analysis-paths: ["analysis"] 8 | test-paths: ["tests"] 9 | macro-paths: ["macros"] 10 | clean-targets: # directories to be removed by `dbt clean` 11 | - "target" 12 | - "dbt_modules" 13 | dispatch: 14 | - macro_namespace: dbt_utils 15 | search_order: 16 | - spark_utils 17 | - dbt_utils_integration_tests 18 | - dbt_utils 19 | 20 | seeds: 21 | dbt_utils_integration_tests: 22 | +file_format: delta 23 | 24 | models: 25 | dbt_utils_integration_tests: 26 | +file_format: delta 27 | 28 | sql: 29 | # macro doesn't work for this integration test (schema pattern) 30 | test_get_relations_by_pattern: 31 | +enabled: false 32 | # integration test doesn't work 33 | test_groupby: 34 | +enabled: false 35 | # integration test doesn't work 36 | test_pivot_apostrophe: 37 | +enabled: false 38 | generic_tests: 39 | # default version of this integration test uses an explicit cast to 'datetime' 40 | # which SparkSQL does not support. override with our own version 41 | test_recency: 42 | +enabled: false 43 | cross_db_utils: 44 | # integration test doesn't work 45 | test_any_value: 46 | +enabled: false 47 | 48 | tests: 49 | dbt_utils_integration_tests: 50 | cross_db_utils: 51 | # expect exactly two failures 52 | # (both use "order by", which isn't supported in SparkSQL) 53 | assert_equal_test_listagg_actual__expected: 54 | +error_if: ">2" 55 | flags: 56 | require_generic_test_arguments_property: true 57 | seed-paths: ["data"] 58 | model-paths: ["models"] -------------------------------------------------------------------------------- /macros/dbt_utils/sql/get_relations_by_prefix.sql: -------------------------------------------------------------------------------- 1 | {% macro spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %} 2 | 3 | {%- call statement('get_tables', fetch_result=True) %} 4 | 5 | show table extended in {{ schema_pattern }} like '{{ table_pattern }}' 6 | 7 | {%- endcall -%} 8 | 9 | {%- set table_list = load_result('get_tables') -%} 10 | 11 | {%- if table_list and table_list['table'] -%} 12 | {%- set tbl_relations = [] -%} 13 | {%- for row in table_list['table'] -%} 14 | {%- set tbl_relation = api.Relation.create( 15 | database=None, 16 | schema=row[0], 17 | identifier=row[1], 18 | type=('view' if 'Type: VIEW' in row[3] else 'table') 19 | ) -%} 20 | {%- do tbl_relations.append(tbl_relation) -%} 21 | {%- endfor -%} 22 | 23 | {{ return(tbl_relations) }} 24 | {%- else -%} 25 | {{ return([]) }} 26 | {%- endif -%} 27 | 28 | {% endmacro %} 29 | 30 | {% macro spark__get_relations_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database) %} 31 | {% set table_pattern = table_pattern ~ '*' %} 32 | {{ return(spark_utils.spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database)) }} 33 | {% endmacro %} 34 | 35 | {% macro spark__get_tables_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database) %} 36 | {{ return(spark_utils.spark__get_relations_by_pattern(schema_pattern, table_pattern, exclude='', database=target.database)) }} 37 | {% endmacro %} 38 | 39 | {% macro spark__get_tables_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database) %} 40 | {{ return(spark_utils.spark__get_relations_by_prefix(schema_pattern, table_pattern, exclude='', database=target.database)) }} 41 | {% endmacro %} 42 | -------------------------------------------------------------------------------- /integration_tests/snowplow/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'spark_utils_snowplow_integration_tests' 2 | version: '1.0' 3 | config-version: 2 4 | 5 | profile: 'integration_tests' 6 | 7 | analysis-paths: ["analysis"] 8 | test-paths: ["tests"] 9 | macro-paths: ["macros"] 10 | clean-targets: # directories to be removed by `dbt clean` 11 | - "target" 12 | - "dbt_modules" 13 | 14 | dispatch: 15 | - macro_namespace: dbt_utils 16 | search_order: ['spark_utils', 'dbt_utils'] 17 | - macro_namespace: snowplow 18 | search_order: ['spark_utils', 'snowplow'] 19 | 20 | models: 21 | snowplow_integration_tests: 22 | pre: 23 | default: 24 | base_event: 25 | +enabled: false 26 | post: 27 | page_views: 28 | default: 29 | page_views_expected: 30 | +enabled: false 31 | sessions: 32 | default: 33 | sessions_expected: 34 | +enabled: false 35 | 36 | +incremental_strategy: merge 37 | +file_format: delta 38 | vars: 39 | 'snowplow:timezone': 'America/New_York' 40 | 'snowplow:events': '{{ ref("base_event") }}' 41 | 'snowplow:context:web_page': '{{ ref("base_web_page") }}' 42 | 'snowplow:context:performance_timing': false 43 | 'snowplow:context:useragent': false 44 | 'snowplow:pass_through_columns': ['test_add_col'] 45 | seeds: 46 | snowplow_integration_tests: 47 | event: 48 | +column_types: 49 | br_cookies: string 50 | br_features_director: string 51 | br_features_flash: string 52 | br_features_gears: string 53 | br_features_java: string 54 | br_features_pdf: string 55 | br_features_quicktime: string 56 | br_features_realplayer: string 57 | br_features_silverlight: string 58 | br_features_windowsmedia: string 59 | collector_tstamp: string 60 | derived_tstamp: string 61 | dvce_ismobile: string 62 | expected: 63 | snowplow_page_views_expected: 64 | +column_types: 65 | page_view_start: string 66 | page_view_end: string 67 | snowplow_sessions_expected: 68 | +column_types: 69 | session_start: string 70 | session_end: string 71 | +quote_columns: false 72 | flags: 73 | require_generic_test_arguments_property: true 74 | seed-paths: ["data"] 75 | model-paths: ["models"] -------------------------------------------------------------------------------- /tests/functional/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from dbt.tests.util import run_dbt 4 | 5 | from dbt.tests.adapter.utils.base_utils import BaseUtils 6 | from dbt.tests.adapter.utils.test_concat import BaseConcat 7 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd 8 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff 9 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart 10 | 11 | from dbt.tests.adapter.utils.data_types.base_data_type_macro import BaseDataTypeMacro 12 | from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric 13 | 14 | 15 | class BaseSparkUtilsBackCompat: 16 | # install this repo as a package 17 | @pytest.fixture(scope="class") 18 | def packages(self): 19 | return { 20 | "packages": [ 21 | {"local": os.getcwd()}, 22 | {"git": "https://github.com/dbt-labs/dbt-utils"} 23 | ]} 24 | 25 | @pytest.fixture(scope="class") 26 | def project_config_update(self): 27 | return { 28 | "dispatch": [{ 29 | "macro_namespace": "dbt_utils", 30 | "search_order": ["spark_utils", "dbt_utils"] 31 | }] 32 | } 33 | 34 | # call the macros from the 'dbt_utils' namespace 35 | # instead of the unspecified / global namespace 36 | def macro_namespace(self): 37 | return "dbt_utils" 38 | 39 | 40 | class BaseSparkUtilsBackCompatUtil(BaseSparkUtilsBackCompat, BaseUtils): 41 | # actual test sequence needs to run 'deps' first 42 | def test_build_assert_equal(self, project): 43 | run_dbt(['deps']) 44 | super().test_build_assert_equal(project) 45 | 46 | 47 | class BaseSparkUtilsBackCompatDataType(BaseSparkUtilsBackCompat, BaseDataTypeMacro): 48 | # actual test sequence needs to run 'deps' first 49 | def test_check_types_assert_match(self, project): 50 | run_dbt(['deps']) 51 | super().test_check_types_assert_match(project) 52 | 53 | 54 | class TestConcat(BaseSparkUtilsBackCompatUtil, BaseConcat): 55 | pass 56 | 57 | 58 | class TestDateAdd(BaseSparkUtilsBackCompatUtil, BaseDateAdd): 59 | pass 60 | 61 | 62 | class TestDateDiff(BaseSparkUtilsBackCompatUtil, BaseDateDiff): 63 | pass 64 | 65 | 66 | class TestSplitPart(BaseSparkUtilsBackCompatUtil, BaseSplitPart): 67 | pass 68 | 69 | 70 | class TestTypeNumeric(BaseSparkUtilsBackCompatDataType, BaseTypeNumeric): 71 | def numeric_fixture_type(self): 72 | return "decimal(28,6)" 73 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | jobs: 4 | 5 | integration-dbt-utils-databricks: &databricks-odbc 6 | environment: 7 | DBT_INVOCATION_ENV: circle 8 | ODBC_DRIVER: Simba # TODO: move env var to Docker image 9 | docker: 10 | # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed 11 | - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest 12 | aws_auth: 13 | aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING 14 | aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING 15 | 16 | steps: 17 | - checkout 18 | 19 | - run: &pull-submodules 20 | name: "Pull Submodules" 21 | command: | 22 | git submodule init 23 | git submodule sync --recursive 24 | git submodule foreach --recursive git fetch 25 | git submodule update --init --recursive 26 | 27 | - run: &setup-dbt 28 | name: "Setup dbt" 29 | command: | 30 | python3.8 -m venv venv 31 | . venv/bin/activate 32 | pip install --upgrade pip setuptools 33 | pip install -r dev-requirements.txt 34 | mkdir -p ~/.dbt 35 | cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml 36 | 37 | - run: 38 | name: "Run Functional Tests" 39 | command: | 40 | . venv/bin/activate 41 | python3 -m pytest tests/functional --profile databricks_sql_endpoint 42 | 43 | - run: 44 | name: "Run Tests - dbt-utils" 45 | 46 | command: | 47 | . venv/bin/activate 48 | cd integration_tests/dbt_utils 49 | dbt deps --target databricks-utils 50 | dbt seed --target databricks-utils --full-refresh 51 | dbt run --target databricks-utils --full-refresh 52 | dbt test --target databricks-utils 53 | 54 | - store_artifacts: 55 | path: ./logs 56 | 57 | integration-snowplow-databricks: 58 | <<: *databricks-odbc 59 | steps: 60 | - checkout 61 | - run: *pull-submodules 62 | - run: *setup-dbt 63 | 64 | - run: 65 | name: "Run Tests - Snowplow" 66 | command: | 67 | . venv/bin/activate 68 | cd integration_tests/snowplow 69 | dbt deps --target databricks-snowplow 70 | dbt seed --target databricks-snowplow --full-refresh 71 | dbt run --target databricks-snowplow --full-refresh --vars 'update: false' 72 | dbt run --target databricks-snowplow --vars 'update: true' 73 | dbt test --target databricks-snowplow 74 | 75 | - store_artifacts: 76 | path: ./logs 77 | 78 | workflows: 79 | version: 2 80 | test-shims: 81 | jobs: 82 | - integration-dbt-utils-databricks: 83 | context: aws-credentials 84 | - integration-snowplow-databricks: 85 | context: aws-credentials 86 | -------------------------------------------------------------------------------- /tests/functional/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | pytest_plugins = ["dbt.tests.fixtures.project"] 5 | 6 | 7 | def pytest_addoption(parser): 8 | parser.addoption("--profile", action="store", default="apache_spark", type=str) 9 | 10 | 11 | # Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type' 12 | # autouse fixture below 13 | def pytest_configure(config): 14 | config.addinivalue_line( 15 | "markers", 16 | "skip_profile(profile): skip test for the given profile", 17 | ) 18 | 19 | 20 | @pytest.fixture(scope="session") 21 | def dbt_profile_target(request): 22 | profile_type = request.config.getoption("--profile") 23 | if profile_type == "databricks_cluster": 24 | target = databricks_cluster_target() 25 | elif profile_type == "databricks_sql_endpoint": 26 | target = databricks_sql_endpoint_target() 27 | elif profile_type == "apache_spark": 28 | target = apache_spark_target() 29 | elif profile_type == "databricks_http_cluster": 30 | target = databricks_http_cluster_target() 31 | elif profile_type == "spark_session": 32 | target = spark_session_target() 33 | else: 34 | raise ValueError(f"Invalid profile type '{profile_type}'") 35 | return target 36 | 37 | 38 | def apache_spark_target(): 39 | return { 40 | "type": "spark", 41 | "host": "localhost", 42 | "user": "dbt", 43 | "method": "thrift", 44 | "port": 10000, 45 | "connect_retries": 3, 46 | "connect_timeout": 5, 47 | "retry_all": True, 48 | } 49 | 50 | 51 | def databricks_cluster_target(): 52 | return { 53 | "type": "spark", 54 | "method": "odbc", 55 | "host": os.getenv("DBT_DATABRICKS_HOST_NAME"), 56 | "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"), 57 | "token": os.getenv("DBT_DATABRICKS_TOKEN"), 58 | "driver": os.getenv("ODBC_DRIVER"), 59 | "port": 443, 60 | "connect_retries": 3, 61 | "connect_timeout": 5, 62 | "retry_all": True, 63 | } 64 | 65 | 66 | def databricks_sql_endpoint_target(): 67 | return { 68 | "type": "spark", 69 | "method": "odbc", 70 | "host": os.getenv("DBT_DATABRICKS_HOST_NAME"), 71 | "endpoint": os.getenv("DBT_DATABRICKS_ENDPOINT"), 72 | "token": os.getenv("DBT_DATABRICKS_TOKEN"), 73 | "driver": os.getenv("ODBC_DRIVER"), 74 | "port": 443, 75 | "connect_retries": 3, 76 | "connect_timeout": 5, 77 | "retry_all": True, 78 | } 79 | 80 | 81 | def databricks_http_cluster_target(): 82 | return { 83 | "type": "spark", 84 | "host": os.getenv('DBT_DATABRICKS_HOST_NAME'), 85 | "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'), 86 | "token": os.getenv('DBT_DATABRICKS_TOKEN'), 87 | "method": "http", 88 | "port": 443, 89 | # more retries + longer timout to handle unavailability while cluster is restarting 90 | # return failures quickly in dev, retry all failures in CI (up to 5 min) 91 | "connect_retries": 5, 92 | "connect_timeout": 60, 93 | "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)), 94 | } 95 | 96 | 97 | def spark_session_target(): 98 | return { 99 | "type": "spark", 100 | "host": "localhost", 101 | "method": "session", 102 | } 103 | 104 | 105 | @pytest.fixture(autouse=True) 106 | def skip_by_profile_type(request): 107 | profile_type = request.config.getoption("--profile") 108 | if request.node.get_closest_marker("skip_profile"): 109 | for skip_profile_type in request.node.get_closest_marker("skip_profile").args: 110 | if skip_profile_type == profile_type: 111 | pytest.skip("skipped on '{profile_type}' profile") 112 | -------------------------------------------------------------------------------- /macros/maintenance_operation.sql: -------------------------------------------------------------------------------- 1 | {% macro get_tables(table_regex_pattern='.*') %} 2 | 3 | {% set tables = [] %} 4 | {% for database in spark__list_schemas('not_used') %} 5 | {% for table in spark__list_relations_without_caching(database[0]) %} 6 | {% set db_tablename = database[0] ~ "." ~ table[1] %} 7 | {% set is_match = modules.re.match(table_regex_pattern, db_tablename) %} 8 | {% if is_match %} 9 | {% call statement('table_detail', fetch_result=True) -%} 10 | describe extended {{ db_tablename }} 11 | {% endcall %} 12 | 13 | {% set table_type = load_result('table_detail').table|reverse|selectattr(0, 'in', ('type', 'TYPE', 'Type'))|first %} 14 | {% if table_type[1]|lower != 'view' %} 15 | {{ tables.append(db_tablename) }} 16 | {% endif %} 17 | {% endif %} 18 | {% endfor %} 19 | {% endfor %} 20 | {{ return(tables) }} 21 | 22 | {% endmacro %} 23 | 24 | {% macro get_delta_tables(table_regex_pattern='.*') %} 25 | 26 | {% set delta_tables = [] %} 27 | {% for db_tablename in get_tables(table_regex_pattern) %} 28 | {% call statement('table_detail', fetch_result=True) -%} 29 | describe extended {{ db_tablename }} 30 | {% endcall %} 31 | 32 | {% set table_type = load_result('table_detail').table|reverse|selectattr(0, 'in', ('provider', 'PROVIDER', 'Provider'))|first %} 33 | {% if table_type[1]|lower == 'delta' %} 34 | {{ delta_tables.append(db_tablename) }} 35 | {% endif %} 36 | {% endfor %} 37 | {{ return(delta_tables) }} 38 | 39 | {% endmacro %} 40 | 41 | {% macro get_statistic_columns(table) %} 42 | 43 | {% call statement('input_columns', fetch_result=True) %} 44 | SHOW COLUMNS IN {{ table }} 45 | {% endcall %} 46 | {% set input_columns = load_result('input_columns').table %} 47 | 48 | {% set output_columns = [] %} 49 | {% for column in input_columns %} 50 | {% call statement('column_information', fetch_result=True) %} 51 | DESCRIBE TABLE {{ table }} `{{ column[0] }}` 52 | {% endcall %} 53 | {% if not load_result('column_information').table[1][1].startswith('struct') and not load_result('column_information').table[1][1].startswith('array') %} 54 | {{ output_columns.append('`' ~ column[0] ~ '`') }} 55 | {% endif %} 56 | {% endfor %} 57 | {{ return(output_columns) }} 58 | 59 | {% endmacro %} 60 | 61 | {% macro spark_optimize_delta_tables(table_regex_pattern='.*') %} 62 | 63 | {% for table in get_delta_tables(table_regex_pattern) %} 64 | {% set start=modules.datetime.datetime.now() %} 65 | {% set message_prefix=loop.index ~ " of " ~ loop.length %} 66 | {{ dbt_utils.log_info(message_prefix ~ " Optimizing " ~ table) }} 67 | {% do run_query("optimize " ~ table) %} 68 | {% set end=modules.datetime.datetime.now() %} 69 | {% set total_seconds = (end - start).total_seconds() | round(2) %} 70 | {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }} 71 | {% endfor %} 72 | 73 | {% endmacro %} 74 | 75 | {% macro spark_vacuum_delta_tables(table_regex_pattern='.*') %} 76 | 77 | {% for table in get_delta_tables(table_regex_pattern) %} 78 | {% set start=modules.datetime.datetime.now() %} 79 | {% set message_prefix=loop.index ~ " of " ~ loop.length %} 80 | {{ dbt_utils.log_info(message_prefix ~ " Vacuuming " ~ table) }} 81 | {% do run_query("vacuum " ~ table) %} 82 | {% set end=modules.datetime.datetime.now() %} 83 | {% set total_seconds = (end - start).total_seconds() | round(2) %} 84 | {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }} 85 | {% endfor %} 86 | 87 | {% endmacro %} 88 | 89 | {% macro spark_analyze_tables(table_regex_pattern='.*') %} 90 | 91 | {% for table in get_tables(table_regex_pattern) %} 92 | {% set start=modules.datetime.datetime.now() %} 93 | {% set columns = get_statistic_columns(table) | join(',') %} 94 | {% set message_prefix=loop.index ~ " of " ~ loop.length %} 95 | {{ dbt_utils.log_info(message_prefix ~ " Analyzing " ~ table) }} 96 | {% if columns != '' %} 97 | {% do run_query("analyze table " ~ table ~ " compute statistics for columns " ~ columns) %} 98 | {% endif %} 99 | {% set end=modules.datetime.datetime.now() %} 100 | {% set total_seconds = (end - start).total_seconds() | round(2) %} 101 | {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ table ~ " in " ~ total_seconds ~ "s") }} 102 | {% endfor %} 103 | 104 | {% endmacro %} 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This [dbt](https://github.com/dbt-labs/dbt) package contains macros 2 | that: 3 | - can be (re)used across dbt projects running on Spark 4 | - define Spark-specific implementations of [dispatched macros](https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch) from other packages 5 | 6 | ## Installation Instructions 7 | 8 | Check [dbt Hub](https://hub.getdbt.com) for the latest installation 9 | instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) 10 | for more information on installing packages. 11 | 12 | ---- 13 | 14 | ## Compatibility 15 | 16 | This package provides "shims" for: 17 | - [dbt_utils](https://github.com/dbt-labs/dbt-utils), except for: 18 | - `dbt_utils.get_relations_by_pattern` 19 | - `dbt_utils.groupby` 20 | - `dbt_utils.recency` 21 | - `dbt_utils.any_value` 22 | - `dbt_utils.listagg` 23 | - `dbt_utils.pivot` with apostrophe(s) in the `values` 24 | - [snowplow](https://github.com/dbt-labs/snowplow) (tested on Databricks only) 25 | 26 | In order to use these "shims," you should set a `dispatch` config in your root project (on dbt v0.20.0 and newer). For example, with this project setting, dbt will first search for macro implementations inside the `spark_utils` package when resolving macros from the `dbt_utils` namespace: 27 | ``` 28 | dispatch: 29 | - macro_namespace: dbt_utils 30 | search_order: ['spark_utils', 'dbt_utils'] 31 | ``` 32 | 33 | ### Note to maintainers of other packages 34 | 35 | The spark-utils package may be able to provide compatibility for your package, especially if your package leverages dbt-utils macros for cross-database compatibility. This package _does not_ need to be specified as a dependency of your package in `packages.yml`. Instead, you should encourage anyone using your package on Apache Spark / Databricks to: 36 | - Install `spark_utils` alongside your package 37 | - Add a `dispatch` config in their root project, like the one above 38 | 39 | ---- 40 | 41 | ## Useful macros: maintenance 42 | 43 | _Caveat: These are not tested in CI, or guaranteed to work on all platforms._ 44 | 45 | Each of these macros accepts a regex pattern, finds tables with names matching the pattern, and will loop over those tables to perform a maintenance operation: 46 | 47 | - `spark_optimize_delta_tables`: Runs `optimize` for all matched Delta tables 48 | - `spark_vacuum_delta_tables`: Runs `vacuum` for all matched Delta tables 49 | - `spark_analyze_tables`: Compute statistics for all matched tables 50 | 51 | ---- 52 | 53 | ### Contributing 54 | 55 | We welcome contributions to this repo! To contribute a new feature or a fix, 56 | please open a Pull Request with 1) your changes and 2) updated documentation for 57 | the `README.md` file. 58 | 59 | ## Testing 60 | 61 | The macros are tested with [`pytest`](https://docs.pytest.org) and 62 | [`pytest-dbt-core`](https://pypi.org/project/pytest-dbt-core/). For example, 63 | the [`create_tables` macro is tested](./tests/test_macros.py) by: 64 | 65 | 1. Create a test table (test setup): 66 | ``` python 67 | spark_session.sql(f"CREATE TABLE {table_name} (id int) USING parquet") 68 | ``` 69 | 2. Call the macro generator: 70 | ``` python 71 | tables = macro_generator() 72 | ``` 73 | 3. Assert test condition: 74 | ``` python 75 | assert simple_table in tables 76 | ``` 77 | 4. Delete the test table (test cleanup): 78 | ``` python 79 | spark_session.sql(f"DROP TABLE IF EXISTS {table_name}") 80 | ``` 81 | 82 | A macro is fetched using the 83 | [`macro_generator`](https://pytest-dbt-core.readthedocs.io/en/latest/dbt_spark.html#usage) 84 | fixture and providing the macro name trough 85 | [indirect parameterization](https://docs.pytest.org/en/7.1.x/example/parametrize.html?highlight=indirect#indirect-parametrization): 86 | 87 | ``` python 88 | @pytest.mark.parametrize( 89 | "macro_generator", ["macro.spark_utils.get_tables"], indirect=True 90 | ) 91 | def test_create_table(macro_generator: MacroGenerator) -> None: 92 | ``` 93 | 94 | ---- 95 | 96 | ### Getting started with dbt + Spark 97 | 98 | - [What is dbt](https://docs.getdbt.com/docs/introduction)? 99 | - [Installation](https://github.com/dbt-labs/dbt-spark) 100 | - Join the #spark channel in [dbt Slack](http://slack.getdbt.com/) 101 | 102 | 103 | ## Code of Conduct 104 | 105 | Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, 106 | and mailing lists is expected to follow the 107 | [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------