├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── test_connections.py │ ├── test_excel_adapter.py │ └── utils.py ├── functional │ ├── adapter │ │ ├── test_concurrency.py │ │ ├── test_incremental.py │ │ ├── test_external_rematerialize.py │ │ ├── test_attach.py │ │ ├── test_sources.py │ │ ├── test_sources_xlsx.py │ │ ├── test_basic.py │ │ ├── test_utils.py │ │ ├── test_python_model.py │ │ ├── test_ephemeral.py │ │ └── test_external.py │ └── fsspec │ │ └── test_filesystems.py └── conftest.py ├── dbt ├── adapters │ ├── excel │ │ ├── __version__.py │ │ ├── connections.py │ │ ├── __init__.py │ │ ├── impl.py │ │ └── relation.py │ └── __init__.py ├── include │ ├── excel │ │ ├── __init__.py │ │ ├── dbt_project.yml │ │ └── macros │ │ │ ├── utils │ │ │ └── external_location.sql │ │ │ ├── adapters.sql │ │ │ └── materializations │ │ │ └── external.sql │ └── __init__.py └── __init__.py ├── MANIFEST.in ├── CHANGELOG.md ├── docs └── jaffle_shop_with_dbt_excel │ ├── .gitignore │ ├── profiles.yml │ ├── sources │ └── jaffle_shop.xlsx │ ├── models │ ├── overview.md │ ├── staging │ │ ├── stg_customers.sql │ │ ├── stg_orders.sql │ │ ├── stg_payments.sql │ │ └── schema.yml │ ├── docs.md │ ├── orders.sql │ ├── customers.sql │ └── schema.yml │ └── dbt_project.yml ├── mypy.ini ├── assets └── dbt-excel.png ├── .flake8 ├── pytest.ini ├── dev-requirements.txt ├── tox.ini ├── .gitignore ├── .pre-commit-config.yaml ├── setup.py ├── README.md ├── .github └── workflows │ └── workflow.yml └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dbt/adapters/excel/__version__.py: -------------------------------------------------------------------------------- 1 | version = "1.4.0" 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include dbt/include *.sql *.yml *.md 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 1.4.0rc1 (2023-03-15) 2 | --------------------- 3 | - MVP 4 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | mypy_path = ./third-party-stubs 3 | namespace_packages = True 4 | -------------------------------------------------------------------------------- /dbt/include/excel/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PACKAGE_PATH = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /assets/dbt-excel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/godatadriven/dbt-excel/HEAD/assets/dbt-excel.png -------------------------------------------------------------------------------- /dbt/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/include/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | __path__ = extend_path(__path__, __name__) # type: ignore 4 | -------------------------------------------------------------------------------- /dbt/include/excel/dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | name: dbt_excel 3 | version: 1.0 4 | config-version: 2 5 | 6 | macro-paths: ["macros"] 7 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/profiles.yml: -------------------------------------------------------------------------------- 1 | dbt_excel: 2 | target: dev 3 | outputs: 4 | dev: 5 | type: excel 6 | path: dbt_excel.duckdb 7 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/sources/jaffle_shop.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/godatadriven/dbt-excel/HEAD/docs/jaffle_shop_with_dbt_excel/sources/jaffle_shop.xlsx -------------------------------------------------------------------------------- /tests/functional/adapter/test_concurrency.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.concurrency.test_concurrency import TestConcurenncy 2 | 3 | 4 | class TestConcurrencyDuckDB(TestConcurenncy): 5 | pass 6 | -------------------------------------------------------------------------------- /dbt/include/excel/macros/utils/external_location.sql: -------------------------------------------------------------------------------- 1 | {%- macro external_location(relation, format) -%} 2 | {{- adapter.external_root() }}/{{ relation.identifier }}.{{ format }} 3 | {%- endmacro -%} 4 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = 3 | E 4 | W 5 | F 6 | ignore = 7 | W503 # makes Flake8 work like black 8 | W504 9 | E203 # makes Flake8 work like black 10 | E741 11 | E501 12 | exclude = tests 13 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning 4 | ignore:unclosed file .*:ResourceWarning 5 | testpaths = 6 | tests/functional 7 | tests/unit 8 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_incremental.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.incremental.test_incremental_unique_id import ( 2 | BaseIncrementalUniqueKey, 3 | ) 4 | 5 | 6 | class TestBaseIncrementalUniqueKey(BaseIncrementalUniqueKey): 7 | pass 8 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/overview.md: -------------------------------------------------------------------------------- 1 | {% docs __overview__ %} 2 | 3 | ## Data Documentation for Jaffle Shop 4 | 5 | `jaffle_shop` is a fictional ecommerce store. 6 | 7 | This [dbt](https://www.getdbt.com/) project is for testing out code. 8 | 9 | The source code can be found [here](https://github.com/clrcrl/jaffle_shop). 10 | 11 | {% enddocs %} 12 | -------------------------------------------------------------------------------- /dbt/adapters/excel/connections.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from dbt.adapters.duckdb.connections import DuckDBConnectionManager 4 | from dbt.adapters.duckdb.connections import DuckDBCredentials 5 | 6 | 7 | @dataclass 8 | class ExcelCredentials(DuckDBCredentials): 9 | @property 10 | def type(self): 11 | return "excel" 12 | 13 | 14 | class ExcelConnectionManager(DuckDBConnectionManager): 15 | TYPE = "excel" 16 | -------------------------------------------------------------------------------- /dbt/adapters/excel/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.base import AdapterPlugin 2 | from dbt.adapters.excel.connections import ExcelConnectionManager # noqa F401 3 | from dbt.adapters.excel.connections import ExcelCredentials 4 | from dbt.adapters.excel.impl import ExcelAdapter 5 | from dbt.include import excel 6 | 7 | Plugin = AdapterPlugin( 8 | adapter=ExcelAdapter, # type: ignore 9 | credentials=ExcelCredentials, 10 | include_path=excel.PACKAGE_PATH, 11 | dependencies=["duckdb"], 12 | ) 13 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ source('jaffle_shop', 'raw_customers') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as customer_id, 15 | first_name, 16 | last_name 17 | 18 | from source 19 | 20 | ) 21 | 22 | select * from renamed 23 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/staging/stg_orders.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ source('jaffle_shop', 'raw_orders') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as order_id, 15 | user_id as customer_id, 16 | order_date, 17 | status 18 | 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | # Import the standard functional fixtures as a plugin 5 | # Note: fixtures with session scope need to be local 6 | pytest_plugins = ["dbt.tests.fixtures.project"] 7 | 8 | # The profile dictionary, used to write out profiles.yml 9 | # dbt will supply a unique schema per test, so we do not specify 'schema' here 10 | @pytest.fixture(scope="class") 11 | def dbt_profile_target(): 12 | return { 13 | "type": "excel", 14 | "threads": 1, 15 | "path": ":memory:", 16 | } 17 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'jaffle_shop' 2 | 3 | config-version: 2 4 | version: '0.1' 5 | 6 | profile: 'dbt_excel' 7 | 8 | model-paths: ["models"] 9 | seed-paths: ["seeds"] 10 | test-paths: ["tests"] 11 | analysis-paths: ["analysis"] 12 | macro-paths: ["macros"] 13 | 14 | target-path: "target" 15 | clean-targets: 16 | - "target" 17 | - "dbt_modules" 18 | - "logs" 19 | 20 | require-dbt-version: [">=1.0.0", "<2.0.0"] 21 | 22 | models: 23 | jaffle_shop: 24 | materialized: table 25 | staging: 26 | materialized: view 27 | -------------------------------------------------------------------------------- /dbt/adapters/excel/impl.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.base.meta import available 2 | from dbt.adapters.duckdb.impl import DuckDBAdapter 3 | from dbt.adapters.excel.connections import ExcelConnectionManager 4 | from dbt.adapters.excel.relation import ExcelRelation 5 | 6 | 7 | class ExcelAdapter(DuckDBAdapter): 8 | ConnectionManager = ExcelConnectionManager 9 | Relation = ExcelRelation # type: ignore 10 | 11 | @available 12 | def output_excel(self, location): 13 | import pandas as pd 14 | 15 | pd.read_parquet(location + ".parquet").to_excel(location) 16 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Normally we would select from the table here, but we are using seeds to load 5 | our data in this project 6 | #} 7 | select * from {{ source('jaffle_shop', 'raw_payments') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as payment_id, 15 | order_id, 16 | payment_method, 17 | 18 | -- `amount` is currently stored in cents, so we convert it to dollars 19 | amount / 100 as amount 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed 26 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # install latest changes in dbt-core 2 | # git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core 3 | # git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter 4 | 5 | dbt-tests-adapter==1.4.5 6 | 7 | boto3 8 | mypy-boto3-glue 9 | pandas 10 | pyarrow 11 | black==23.3.0 12 | bumpversion 13 | flake8 14 | flaky 15 | freezegun==1.2.2 16 | fsspec 17 | ipdb 18 | mypy==1.2.0 19 | openpyxl 20 | pip-tools 21 | pre-commit 22 | pytest 23 | pytest-dotenv 24 | pytest-logbook 25 | pytest-csv 26 | pytest-xdist 27 | pytest-mock 28 | pytz 29 | tox>=3.13 30 | twine 31 | wheel 32 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/staging/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_customers 5 | columns: 6 | - name: customer_id 7 | tests: 8 | - unique 9 | - not_null 10 | 11 | - name: stg_orders 12 | columns: 13 | - name: order_id 14 | tests: 15 | - unique 16 | - not_null 17 | - name: status 18 | tests: 19 | - accepted_values: 20 | values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] 21 | 22 | - name: stg_payments 23 | columns: 24 | - name: payment_id 25 | tests: 26 | - unique 27 | - not_null 28 | - name: payment_method 29 | tests: 30 | - accepted_values: 31 | values: ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] 32 | config: 33 | severity: warn 34 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37,py38,py39,py310,py311 3 | isolated_build = true 4 | skip_missing_interpreters = true 5 | 6 | [testenv:{unit,py37,py38,py39,py310,py311,py}] 7 | description = unit testing 8 | passenv = * 9 | extras = 10 | test 11 | glue 12 | commands = {envpython} -m pytest {posargs} tests/unit 13 | 14 | [testenv:{functional,py37,py38,py39,py310,py311,py}] 15 | description = adapter plugin functional testing 16 | passenv = * 17 | extras = 18 | test 19 | glue 20 | commands = {envpython} -m pytest {posargs} tests/functional/adapter 21 | 22 | [testenv:{fsspec,py37,py38,py39,py310,py311,py}] 23 | description = adapter plugin functional testing 24 | passenv = * 25 | extras = 26 | test 27 | glue 28 | commands = {envpython} -m pytest {posargs} tests/functional/fsspec 29 | -------------------------------------------------------------------------------- /dbt/include/excel/macros/adapters.sql: -------------------------------------------------------------------------------- 1 | {% macro write_to_file(relation, location, format, delimiter=',') -%} 2 | {% if format == 'parquet' %} 3 | {% set copy_to %} 4 | copy {{ relation }} to '{{ location }}' (FORMAT 'parquet'); 5 | {% endset %} 6 | 7 | {% elif format == 'csv' %} 8 | {% set copy_to %} 9 | copy {{ relation }} to '{{ location }}' (HEADER 1, DELIMITER '{{ delimiter }}'); 10 | {% endset %} 11 | 12 | {% elif format == 'xlsx' %} 13 | {% set copy_to %} 14 | copy {{ relation }} to '{{ location }}.parquet' (FORMAT 'parquet'); 15 | {% endset %} 16 | 17 | {% else %} 18 | {% do exceptions.raise_compiler_error("%s external format is not supported!" % format) %} 19 | {% endif %} 20 | 21 | {% call statement('write_to_file') -%} 22 | {{ copy_to }} 23 | {%- endcall %} 24 | 25 | {% if format == 'xlsx' %} 26 | {{ adapter.output_excel(location) }} 27 | {% endif %} 28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/docs.md: -------------------------------------------------------------------------------- 1 | {% docs orders_status %} 2 | 3 | Orders can be one of the following statuses: 4 | 5 | | status | description | 6 | |----------------|------------------------------------------------------------------------------------------------------------------------| 7 | | placed | The order has been placed but has not yet left the warehouse | 8 | | shipped | The order has ben shipped to the customer and is currently in transit | 9 | | completed | The order has been received by the customer | 10 | | return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse | 11 | | returned | The order has been returned by the customer and received at the warehouse | 12 | 13 | 14 | {% enddocs %} 15 | -------------------------------------------------------------------------------- /tests/functional/fsspec/test_filesystems.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | 4 | models_file_model_sql = """ 5 | {{ config(materialized='table') }} 6 | select * 7 | from read_csv_auto('github://data/team_ratings.csv') 8 | WHERE conf = 'West' 9 | """ 10 | 11 | 12 | class TestFilesystems: 13 | @pytest.fixture(scope="class") 14 | def profiles_config_update(self): 15 | return { 16 | "test": { 17 | "outputs": { 18 | "dev": { 19 | "type": "excel", 20 | "path": ":memory:", 21 | "filesystems": [ 22 | {"fs": "github", "org": "jwills", "repo": "nba_monte_carlo"} 23 | ], 24 | } 25 | }, 26 | "target": "dev", 27 | } 28 | } 29 | 30 | @pytest.fixture(scope="class") 31 | def models(self): 32 | return { 33 | "file_model.sql": models_file_model_sql, 34 | } 35 | 36 | def test_filesystems(self, project): 37 | results = run_dbt() 38 | assert len(results) == 1 39 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/orders.sql: -------------------------------------------------------------------------------- 1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} 2 | 3 | with orders as ( 4 | 5 | select * from {{ ref('stg_orders') }} 6 | 7 | ), 8 | 9 | payments as ( 10 | 11 | select * from {{ ref('stg_payments') }} 12 | 13 | ), 14 | 15 | order_payments as ( 16 | 17 | select 18 | order_id, 19 | 20 | {% for payment_method in payment_methods -%} 21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, 22 | {% endfor -%} 23 | 24 | sum(amount) as total_amount 25 | 26 | from payments 27 | 28 | group by order_id 29 | 30 | ), 31 | 32 | final as ( 33 | 34 | select 35 | orders.order_id, 36 | orders.customer_id, 37 | orders.order_date, 38 | orders.status, 39 | 40 | {% for payment_method in payment_methods -%} 41 | 42 | order_payments.{{ payment_method }}_amount, 43 | 44 | {% endfor -%} 45 | 46 | order_payments.total_amount as amount 47 | 48 | from orders 49 | 50 | 51 | left join order_payments 52 | on orders.order_id = order_payments.order_id 53 | 54 | ) 55 | 56 | select * from final 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | logs/ 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | test.env 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | #Ipython Notebook 64 | .ipynb_checkpoints 65 | 66 | #Emacs 67 | *~ 68 | 69 | # Sublime Text 70 | *.sublime-* 71 | 72 | # Vim 73 | *.sw* 74 | 75 | .python-version 76 | 77 | # Vim 78 | *.sw* 79 | .DS_Store 80 | 81 | # Example dbt project 82 | docs/jaffle_shop_with_dbt_excel/dbt_excel.duckdb 83 | docs/jaffle_shop_with_dbt_excel/sources/*/*.csv 84 | docs/jaffle_shop_with_dbt_excel/sources/~*.xlsx 85 | docs/jaffle_shop_with_dbt_excel/.user.yml 86 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # For more on configuring pre-commit hooks (see https://pre-commit.com/) 2 | 3 | # TODO: remove global exclusion of tests when testing overhaul is complete 4 | exclude: "^tests/.*" 5 | 6 | 7 | default_language_version: 8 | python: python3.8 9 | 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v3.2.0 13 | hooks: 14 | - id: check-yaml 15 | args: [--unsafe] 16 | - id: check-json 17 | exclude: ^.devcontainer/ 18 | - id: end-of-file-fixer 19 | - id: trailing-whitespace 20 | - id: check-case-conflict 21 | - repo: https://github.com/asottile/reorder_python_imports 22 | rev: v3.9.0 23 | hooks: 24 | - id: reorder-python-imports 25 | - repo: https://github.com/psf/black 26 | rev: 22.10.0 27 | hooks: 28 | - id: black 29 | args: 30 | - "--line-length=99" 31 | - "--target-version=py38" 32 | - id: black 33 | alias: black-check 34 | stages: [manual] 35 | args: 36 | - "--line-length=99" 37 | - "--target-version=py38" 38 | - "--check" 39 | - "--diff" 40 | - repo: https://github.com/pycqa/flake8 41 | rev: 4.0.1 42 | hooks: 43 | - id: flake8 44 | - id: flake8 45 | alias: flake8-check 46 | stages: [manual] 47 | - repo: https://github.com/pre-commit/mirrors-mypy 48 | rev: v0.782 49 | hooks: 50 | - id: mypy 51 | args: [--show-error-codes, --ignore-missing-imports] 52 | files: ^dbt/adapters/.* 53 | language: system 54 | - id: mypy 55 | alias: mypy-check 56 | stages: [manual] 57 | args: [--show-error-codes, --pretty, --ignore-missing-imports] 58 | files: ^dbt/adapters 59 | language: system 60 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/customers.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | 3 | config( 4 | materialized='external', 5 | location='./customers.xlsx', 6 | format="xlsx" 7 | ) 8 | 9 | }} 10 | 11 | with customers as ( 12 | 13 | select * from {{ ref('stg_customers') }} 14 | 15 | ), 16 | 17 | orders as ( 18 | 19 | select * from {{ ref('stg_orders') }} 20 | 21 | ), 22 | 23 | payments as ( 24 | 25 | select * from {{ ref('stg_payments') }} 26 | 27 | ), 28 | 29 | customer_orders as ( 30 | 31 | select 32 | customer_id, 33 | 34 | min(order_date) as first_order, 35 | max(order_date) as most_recent_order, 36 | count(order_id) as number_of_orders 37 | from orders 38 | 39 | group by customer_id 40 | 41 | ), 42 | 43 | customer_payments as ( 44 | 45 | select 46 | orders.customer_id, 47 | sum(amount) as total_amount 48 | 49 | from payments 50 | 51 | left join orders on 52 | payments.order_id = orders.order_id 53 | 54 | group by orders.customer_id 55 | 56 | ), 57 | 58 | final as ( 59 | 60 | select 61 | customers.customer_id, 62 | customers.first_name, 63 | customers.last_name, 64 | customer_orders.first_order, 65 | customer_orders.most_recent_order, 66 | customer_orders.number_of_orders, 67 | customer_payments.total_amount as customer_lifetime_value 68 | 69 | from customers 70 | 71 | left join customer_orders 72 | on customers.customer_id = customer_orders.customer_id 73 | 74 | left join customer_payments 75 | on customers.customer_id = customer_payments.customer_id 76 | 77 | ) 78 | 79 | select * from final 80 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import re 4 | 5 | from setuptools import find_namespace_packages 6 | from setuptools import setup 7 | 8 | this_directory = os.path.abspath(os.path.dirname(__file__)) 9 | with open(os.path.join(this_directory, "README.md")) as f: 10 | long_description = f.read() 11 | 12 | package_name = "dbt-excel" 13 | 14 | 15 | def _dbt_excel_version(): 16 | _version_path = os.path.join(this_directory, "dbt", "adapters", "excel", "__version__.py") 17 | _version_pattern = r"""version\s*=\s*["'](.+)["']""" 18 | with open(_version_path) as f: 19 | match = re.search(_version_pattern, f.read().strip()) 20 | if match is None: 21 | raise ValueError(f"invalid version at {_version_path}") 22 | return match.group(1) 23 | 24 | 25 | package_version = _dbt_excel_version() + "rc2" 26 | description = """The excel adapter plugin for dbt (data build tool)""" 27 | 28 | setup( 29 | name=package_name, 30 | version=package_version, 31 | description=description, 32 | long_description=long_description, 33 | long_description_content_type="text/markdown", 34 | author="Cor Zuurmond,Dumky de Wilde,Juan Perafan,Henk Griffioen", 35 | author_email="Cor.Zuurmond@xebia.com,Dumky.deWilde@xebia.com,Henk.Griffioen@xebia.com, juan.perafan@xebia.com", 36 | url="https://github.com/godatadriven/dbt-excel", 37 | packages=find_namespace_packages(include=["dbt", "dbt.*"]), 38 | include_package_data=True, 39 | install_requires=[ 40 | "dbt-duckdb~=1.4.0", 41 | "pandas>=1.0.0,<3.0.0", 42 | "pyarrow>=9.0.0", 43 | "openpyxl>=3.0.0,<4.0.0", 44 | ], 45 | extras_require={ 46 | "glue": ["boto3", "mypy-boto3-glue"], 47 | "test": ["pytest", "dbt-tests-adapter"], 48 | }, 49 | ) 50 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_external_rematerialize.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.util import run_dbt 3 | from dbt.adapters.excel import ExcelConnectionManager 4 | 5 | upstream_model_sql = """ 6 | select range from range(3) 7 | """ 8 | 9 | 10 | downstream_model_sql = """ 11 | select range * 2 from {{ ref('upstream_model') }} 12 | """ 13 | 14 | other_downstream_model_sql = """ 15 | select range * 5 from {{ ref('upstream_model') }} 16 | """ 17 | 18 | # class must begin with 'Test' 19 | class TestRematerializeDownstreamExternalModel: 20 | """ 21 | External models should load in dependencies when they exist. 22 | 23 | We test that after materializing upstream and downstream models, we can 24 | materialize the downstream model by itself, even if we are using an 25 | in-memory database. 26 | """ 27 | 28 | @pytest.fixture(scope="class") 29 | def dbt_profile_target(self): 30 | return { 31 | "type": "excel", 32 | "path": ":memory:", 33 | } 34 | 35 | @pytest.fixture(scope="class") 36 | def project_config_update(self): 37 | return { 38 | "name": "base", 39 | "models": {"+materialized": "external"}, 40 | "on-run-start": ["{{ register_upstream_external_models() }}"], 41 | } 42 | 43 | @pytest.fixture(scope="class") 44 | def models(self): 45 | return { 46 | "upstream_model.sql": upstream_model_sql, 47 | "downstream_model.sql": downstream_model_sql, 48 | "other_downstream_model.sql": other_downstream_model_sql, 49 | } 50 | 51 | def test_run(self, project): 52 | run_dbt(["run"]) 53 | 54 | # Force close the :memory: connection 55 | ExcelConnectionManager.close_all_connections() 56 | run_dbt(["run", "--select", "downstream_model,other_downstream_model"]) 57 | -------------------------------------------------------------------------------- /dbt/adapters/excel/relation.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import Any 4 | from typing import Optional 5 | from typing import Type 6 | 7 | import pandas as pd 8 | 9 | from dbt.adapters.base.relation import BaseRelation 10 | from dbt.adapters.base.relation import Self 11 | from dbt.contracts.graph.nodes import SourceDefinition 12 | 13 | 14 | @dataclass(frozen=True, eq=False, repr=False) 15 | class ExcelRelation(BaseRelation): 16 | external_location: Optional[str] = None 17 | 18 | @classmethod 19 | def create_from_source(cls: Type[Self], source: SourceDefinition, **kwargs: Any) -> Self: 20 | 21 | if "external_location" in source.meta: 22 | external_location = source.meta["external_location"] 23 | elif "external_location" in source.source_meta: 24 | external_location = source.source_meta["external_location"] 25 | else: 26 | external_location = None 27 | 28 | if external_location is not None: 29 | external_location = external_location.format( 30 | schema=source.schema, 31 | name=source.name, 32 | identifier=source.identifier, 33 | ) 34 | if external_location.endswith(".xlsx"): 35 | excel_location = Path(external_location.strip("'")) 36 | csv_location = ( 37 | excel_location.parent / excel_location.stem / source.identifier 38 | ).with_suffix(".csv") 39 | csv_location.parent.mkdir(exist_ok=True) 40 | pd.read_excel(excel_location, sheet_name=source.identifier).to_csv( 41 | csv_location, index=False 42 | ) 43 | external_location = str(csv_location) 44 | if "(" not in external_location and not external_location.startswith("'"): 45 | external_location = f"'{external_location}'" 46 | kwargs["external_location"] = external_location 47 | 48 | return super().create_from_source(source, **kwargs) # type: ignore 49 | 50 | def render(self) -> str: 51 | if self.external_location: 52 | return self.external_location 53 | else: 54 | return super().render() 55 | -------------------------------------------------------------------------------- /tests/unit/test_connections.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from botocore.credentials import Credentials 4 | from dbt.adapters.duckdb.connections import Attachment 5 | 6 | from dbt.adapters.excel.connections import ExcelCredentials 7 | 8 | 9 | def test_load_basic_settings(): 10 | creds = ExcelCredentials() 11 | creds.settings = { 12 | "s3_access_key_id": "abc", 13 | "s3_secret_access_key": "xyz", 14 | "s3_region": "us-west-2", 15 | } 16 | settings = creds.load_settings() 17 | assert creds.settings == settings 18 | 19 | 20 | @mock.patch("boto3.session.Session") 21 | def test_load_aws_creds(mock_session_class): 22 | mock_session_object = mock.Mock() 23 | mock_client = mock.Mock() 24 | 25 | mock_session_object.get_credentials.return_value = Credentials( 26 | "access_key", "secret_key", "token" 27 | ) 28 | mock_session_object.client.return_value = mock_client 29 | mock_session_class.return_value = mock_session_object 30 | mock_client.get_caller_identity.return_value = {} 31 | 32 | creds = ExcelCredentials(use_credential_provider="aws") 33 | creds.settings = {"some_other_setting": 1} 34 | 35 | settings = creds.load_settings() 36 | assert settings["s3_access_key_id"] == "access_key" 37 | assert settings["s3_secret_access_key"] == "secret_key" 38 | assert settings["s3_session_token"] == "token" 39 | assert settings["some_other_setting"] == 1 40 | 41 | 42 | def test_attachments(): 43 | creds = ExcelCredentials() 44 | creds.attach = [ 45 | {"path": "/tmp/f1234.db"}, 46 | {"path": "/tmp/g1234.db", "alias": "g"}, 47 | {"path": "/tmp/h5678.db", "read_only": 1}, 48 | {"path": "/tmp/i9101.db", "type": "sqlite"}, 49 | {"path": "/tmp/jklm.db", "alias": "jk", "read_only": 1, "type": "sqlite"}, 50 | ] 51 | 52 | expected_sql = [ 53 | "ATTACH '/tmp/f1234.db'", 54 | "ATTACH '/tmp/g1234.db' AS g", 55 | "ATTACH '/tmp/h5678.db' (READ_ONLY)", 56 | "ATTACH '/tmp/i9101.db' (TYPE sqlite)", 57 | "ATTACH '/tmp/jklm.db' AS jk (TYPE sqlite, READ_ONLY)", 58 | ] 59 | 60 | for i, a in enumerate(creds.attach): 61 | attachment = Attachment(**a) 62 | assert expected_sql[i] == attachment.to_sql() 63 | -------------------------------------------------------------------------------- /tests/unit/test_excel_adapter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | import dbt.flags as flags 5 | from dbt.adapters.excel import ExcelAdapter 6 | from tests.unit.utils import config_from_parts_or_dicts, mock_connection 7 | 8 | 9 | class TestExcelAdapter(unittest.TestCase): 10 | def setUp(self): 11 | pass 12 | flags.STRICT_MODE = True 13 | 14 | profile_cfg = { 15 | "outputs": { 16 | "test": { 17 | "type": "excel", 18 | "path": ":memory:", 19 | } 20 | }, 21 | "target": "test", 22 | } 23 | 24 | project_cfg = { 25 | "name": "X", 26 | "version": "0.1", 27 | "profile": "test", 28 | "project-root": "/tmp/dbt/does-not-exist", 29 | "quoting": { 30 | "identifier": False, 31 | "schema": True, 32 | }, 33 | "config-version": 2, 34 | } 35 | 36 | self.config = config_from_parts_or_dicts(project_cfg, profile_cfg) 37 | self._adapter = None 38 | 39 | @property 40 | def adapter(self): 41 | if self._adapter is None: 42 | self._adapter = ExcelAdapter(self.config) 43 | return self._adapter 44 | 45 | # TODO: Fix this test 46 | # @mock.patch("dbt.adapters.excel.connections.duckdb") 47 | # def test_acquire_connection(self, connector): 48 | # connection = self.adapter.acquire_connection("dummy") 49 | # 50 | # connector.connect.assert_not_called() 51 | # connection.handle 52 | # self.assertEqual(connection.state, "open") 53 | # self.assertNotEqual(connection.handle, None) 54 | # connector.connect.assert_called_once() 55 | 56 | def test_cancel_open_connections_empty(self): 57 | self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) 58 | 59 | def test_cancel_open_connections_main(self): 60 | key = self.adapter.connections.get_thread_identifier() 61 | self.adapter.connections.thread_connections[key] = mock_connection( 62 | "main" 63 | ) 64 | self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) 65 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_attach.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import duckdb 4 | import pytest 5 | import yaml 6 | 7 | from dbt.tests.util import run_dbt 8 | 9 | sources_schema_yml = """version: 2 10 | sources: 11 | - name: attached_source 12 | database: attach_test 13 | schema: analytics 14 | tables: 15 | - name: attached_table 16 | description: "An attached table" 17 | columns: 18 | - name: id 19 | description: "An id" 20 | tests: 21 | - unique 22 | - not_null 23 | """ 24 | 25 | models_source_model_sql = """select * from {{ source('attached_source', 'attached_table') }} 26 | """ 27 | 28 | models_target_model_sql = """ 29 | {{ config(materialized='table', database='attach_test') }} 30 | SELECT * FROM {{ ref('source_model') }} 31 | """ 32 | 33 | 34 | class TestAttachedDatabase: 35 | @pytest.fixture(scope="class") 36 | def attach_test_db(self): 37 | db = duckdb.connect("/tmp/attach_test.duckdb") 38 | db.execute("CREATE SCHEMA analytics") 39 | db.execute("CREATE TABLE analytics.attached_table AS SELECT 1 as id") 40 | db.close() 41 | yield 42 | os.unlink("/tmp/attach_test.duckdb") 43 | 44 | @pytest.fixture(scope="class") 45 | def profiles_config_update(self, attach_test_db): 46 | return { 47 | "test": { 48 | "outputs": { 49 | "dev": { 50 | "type": "excel", 51 | "path": ":memory:", 52 | "attach": [{"path": "/tmp/attach_test.duckdb"}], 53 | } 54 | }, 55 | "target": "dev", 56 | } 57 | } 58 | 59 | @pytest.fixture(scope="class") 60 | def models(self): 61 | return { 62 | "schema.yml": sources_schema_yml, 63 | "source_model.sql": models_source_model_sql, 64 | "target_model.sql": models_target_model_sql, 65 | } 66 | 67 | def test_attached_databases(self, project): 68 | results = run_dbt() 69 | assert len(results) == 2 70 | 71 | test_results = run_dbt(["test"]) 72 | assert len(test_results) == 2 73 | 74 | # check that the model is created in the attached db 75 | db = duckdb.connect("/tmp/attach_test.duckdb") 76 | ret = db.execute(f"SELECT * FROM target_model").fetchall() 77 | assert ret[0][0] == 1 78 | 79 | # check that everything works on a re-run of dbt 80 | rerun_results = run_dbt() 81 | assert len(rerun_results) == 2 82 | -------------------------------------------------------------------------------- /docs/jaffle_shop_with_dbt_excel/models/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: "jaffle_shop" 5 | meta: 6 | external_location: "./sources/jaffle_shop.xlsx" 7 | tables: 8 | - name: raw_customers 9 | - name: raw_payments 10 | - name: raw_orders 11 | 12 | models: 13 | - name: customers 14 | description: This table has basic information about a customer, as well as some derived facts based on a customer's orders 15 | 16 | columns: 17 | - name: customer_id 18 | description: This is a unique identifier for a customer 19 | tests: 20 | - unique 21 | - not_null 22 | 23 | - name: first_name 24 | description: Customer's first name. PII. 25 | 26 | - name: last_name 27 | description: Customer's last name. PII. 28 | 29 | - name: first_order 30 | description: Date (UTC) of a customer's first order 31 | 32 | - name: most_recent_order 33 | description: Date (UTC) of a customer's most recent order 34 | 35 | - name: number_of_orders 36 | description: Count of the number of orders a customer has placed 37 | 38 | - name: total_order_amount 39 | description: Total value (AUD) of a customer's orders 40 | 41 | - name: orders 42 | description: This table has basic information about orders, as well as some derived facts based on payments 43 | 44 | columns: 45 | - name: order_id 46 | tests: 47 | - unique 48 | - not_null 49 | description: This is a unique identifier for an order 50 | 51 | - name: customer_id 52 | description: Foreign key to the customers table 53 | tests: 54 | - not_null 55 | - relationships: 56 | to: ref('customers') 57 | field: customer_id 58 | 59 | - name: order_date 60 | description: Date (UTC) that the order was placed 61 | 62 | - name: status 63 | description: '{{ doc("orders_status") }}' 64 | tests: 65 | - accepted_values: 66 | values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] 67 | 68 | - name: amount 69 | description: Total amount (AUD) of the order 70 | tests: 71 | - not_null 72 | 73 | - name: credit_card_amount 74 | description: Amount of the order (AUD) paid for by credit card 75 | tests: 76 | - not_null 77 | 78 | - name: coupon_amount 79 | description: Amount of the order (AUD) paid for by coupon 80 | tests: 81 | - not_null 82 | 83 | - name: bank_transfer_amount 84 | description: Amount of the order (AUD) paid for by bank transfer 85 | tests: 86 | - not_null 87 | 88 | - name: gift_card_amount 89 | description: Amount of the order (AUD) paid for by gift card 90 | tests: 91 | - not_null 92 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_sources.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | import yaml 5 | from dbt.tests.util import run_dbt 6 | 7 | sources_schema_yml = """version: 2 8 | sources: 9 | - name: external_source 10 | meta: 11 | external_location: "/tmp/{name}.csv" 12 | tables: 13 | - name: seeds_source 14 | description: "A source table" 15 | columns: 16 | - name: id 17 | description: "An id" 18 | tests: 19 | - unique 20 | - not_null 21 | - name: seeds_ost 22 | identifier: "seeds_other_source_table" 23 | meta: 24 | external_location: "read_csv_auto('/tmp/{identifier}.csv')" 25 | """ 26 | 27 | models_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }} 28 | """ 29 | 30 | models_multi_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }} 31 | inner join {{ source('external_source', 'seeds_ost') }} USING (id) 32 | """ 33 | 34 | 35 | class TestExternalSources: 36 | @pytest.fixture(scope="class", autouse=True) 37 | def setEnvVars(self): 38 | os.environ["DBT_TEST_SCHEMA_NAME_VARIABLE"] = "test_run_schema" 39 | 40 | yield 41 | 42 | del os.environ["DBT_TEST_SCHEMA_NAME_VARIABLE"] 43 | 44 | @pytest.fixture(scope="class") 45 | def models(self): 46 | return { 47 | "schema.yml": sources_schema_yml, 48 | "source_model.sql": models_source_model_sql, 49 | "multi_source_model.sql": models_multi_source_model_sql, 50 | } 51 | 52 | def run_dbt_with_vars(self, project, cmd, *args, **kwargs): 53 | vars_dict = { 54 | "test_run_schema": project.test_schema, 55 | } 56 | cmd.extend(["--vars", yaml.safe_dump(vars_dict)]) 57 | return run_dbt(cmd, *args, **kwargs) 58 | 59 | @pytest.fixture(scope="class") 60 | def seeds_source_file(self): 61 | with open("/tmp/seeds_source.csv", "w") as f: 62 | f.write("id,a,b\n1,2,3\n4,5,6\n7,8,9") 63 | yield 64 | os.unlink("/tmp/seeds_source.csv") 65 | 66 | @pytest.fixture(scope="class") 67 | def ost_file(self): 68 | with open("/tmp/seeds_other_source_table.csv", "w") as f: 69 | f.write("id,c,d\n1,2,3\n4,5,6\n7,8,9") 70 | yield 71 | os.unlink("/tmp/seeds_other_source_table.csv") 72 | 73 | def test_external_sources(self, seeds_source_file, ost_file, project): 74 | results = self.run_dbt_with_vars(project, ["run"]) 75 | assert len(results) == 2 76 | test_results = self.run_dbt_with_vars(project, ["test"]) 77 | assert len(test_results) == 2 78 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_sources_xlsx.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import pytest 5 | import yaml 6 | from dbt.tests.util import run_dbt 7 | 8 | sources_schema_yml = """version: 2 9 | sources: 10 | - name: external_source 11 | meta: 12 | external_location: "./seeds/{name}.xlsx" 13 | tables: 14 | - name: seeds_source 15 | description: "A source table" 16 | columns: 17 | - name: id 18 | description: "An id" 19 | tests: 20 | - unique 21 | - not_null 22 | - name: seeds_ost 23 | identifier: "seeds_other_source_table" 24 | meta: 25 | external_location: "./seeds/{identifier}.xlsx" 26 | """ 27 | 28 | models_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }} 29 | """ 30 | 31 | models_multi_source_model_sql = """select * from {{ source('external_source', 'seeds_source') }} 32 | inner join {{ source('external_source', 'seeds_ost') }} USING (id) 33 | """ 34 | 35 | 36 | class TestExternalSources: 37 | @pytest.fixture(scope="class", autouse=True) 38 | def setEnvVars(self): 39 | os.environ["DBT_TEST_SCHEMA_NAME_VARIABLE"] = "test_run_schema" 40 | 41 | yield 42 | 43 | del os.environ["DBT_TEST_SCHEMA_NAME_VARIABLE"] 44 | 45 | @pytest.fixture(scope="class") 46 | def models(self): 47 | return { 48 | "schema.yml": sources_schema_yml, 49 | "source_model.sql": models_source_model_sql, 50 | "multi_source_model.sql": models_multi_source_model_sql, 51 | } 52 | 53 | def run_dbt_with_vars(self, project, cmd, *args, **kwargs): 54 | vars_dict = { 55 | "test_run_schema": project.test_schema, 56 | } 57 | cmd.extend(["--vars", yaml.safe_dump(vars_dict)]) 58 | return run_dbt(cmd, *args, **kwargs) 59 | 60 | def test_external_sources(self, project): 61 | # abusing the 'seeds' directory a little bit here, but it works 62 | seeds_source = pd.DataFrame( 63 | {"id": [1, 4, 7], "a": [2, 5, 8], "b": [3, 6, 9]} 64 | ) 65 | seeds_source.to_excel( 66 | "seeds/seeds_source.xlsx", sheet_name="seeds_source", index=False 67 | ) 68 | seeds_other_source_table = pd.DataFrame( 69 | {"id": [1, 4, 7], "c": [2, 5, 8], "d": [3, 6, 9]} 70 | ) 71 | seeds_other_source_table.to_excel( 72 | "seeds/seeds_other_source_table.xlsx", 73 | sheet_name="seeds_other_source_table", 74 | index=False, 75 | ) 76 | 77 | results = self.run_dbt_with_vars(project, ["run"]) 78 | assert len(results) == 2 79 | 80 | test_results = self.run_dbt_with_vars(project, ["test"]) 81 | assert len(test_results) == 2 82 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_basic.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations 4 | from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests 5 | from dbt.tests.adapter.basic.test_singular_tests_ephemeral import ( 6 | BaseSingularTestsEphemeral, 7 | ) 8 | from dbt.tests.adapter.basic.test_empty import BaseEmpty 9 | from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral 10 | from dbt.tests.adapter.basic.test_incremental import BaseIncremental 11 | from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests 12 | from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols 13 | from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp 14 | from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod 15 | from dbt.tests.adapter.basic.test_validate_connection import BaseValidateConnection 16 | from dbt.tests.adapter.basic.test_docs_generate import ( 17 | BaseDocsGenerate, 18 | BaseDocsGenReferences, 19 | ) 20 | from dbt.tests.adapter.basic.expected_catalog import ( 21 | base_expected_catalog, 22 | no_stats, 23 | expected_references_catalog, 24 | ) 25 | 26 | 27 | class TestSimpleMaterializationsDuckDB(BaseSimpleMaterializations): 28 | pass 29 | 30 | 31 | class TestSingularTestsDuckDB(BaseSingularTests): 32 | pass 33 | 34 | 35 | class TestSingularTestsEphemeralDuckDB(BaseSingularTestsEphemeral): 36 | pass 37 | 38 | 39 | class TestEmptyDuckDB(BaseEmpty): 40 | pass 41 | 42 | 43 | class TestEphemeralDuckDB(BaseEphemeral): 44 | pass 45 | 46 | 47 | class TestIncrementalDuckDB(BaseIncremental): 48 | pass 49 | 50 | 51 | class TestGenericTestsDuckDB(BaseGenericTests): 52 | pass 53 | 54 | 55 | class TestSnapshotCheckColsDuckDB(BaseSnapshotCheckCols): 56 | pass 57 | 58 | 59 | class TestSnapshotTimestampDuckDB(BaseSnapshotTimestamp): 60 | pass 61 | 62 | 63 | class TestBaseAdapterMethodDuckDB(BaseAdapterMethod): 64 | pass 65 | 66 | 67 | class TestValidateConnectionDuckDB(BaseValidateConnection): 68 | pass 69 | 70 | 71 | class TestDocsGenerateDuckDB(BaseDocsGenerate): 72 | @pytest.fixture(scope="class") 73 | def expected_catalog(self, project): 74 | return base_expected_catalog( 75 | project, 76 | role=None, 77 | id_type="INTEGER", 78 | text_type="VARCHAR", 79 | time_type="TIMESTAMP", 80 | view_type="VIEW", 81 | table_type="BASE TABLE", 82 | model_stats=no_stats(), 83 | ) 84 | 85 | 86 | class TestDocsGenReferencesDuckDB(BaseDocsGenReferences): 87 | @pytest.fixture(scope="class") 88 | def expected_catalog(self, project): 89 | return expected_references_catalog( 90 | project, 91 | role=None, 92 | id_type="INTEGER", 93 | text_type="VARCHAR", 94 | time_type="TIMESTAMP", 95 | view_type="VIEW", 96 | table_type="BASE TABLE", 97 | model_stats=no_stats(), 98 | bigint_type="BIGINT", 99 | ) 100 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_utils.py: -------------------------------------------------------------------------------- 1 | from dbt.tests.adapter.utils.test_any_value import BaseAnyValue 2 | from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend 3 | from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat 4 | from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct 5 | from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr 6 | from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText 7 | from dbt.tests.adapter.utils.test_concat import BaseConcat 8 | from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive 9 | from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc 10 | from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd 11 | from dbt.tests.adapter.utils.test_datediff import BaseDateDiff 12 | from dbt.tests.adapter.utils.test_escape_single_quotes import ( 13 | BaseEscapeSingleQuotesQuote, 14 | ) 15 | from dbt.tests.adapter.utils.test_except import BaseExcept 16 | from dbt.tests.adapter.utils.test_hash import BaseHash 17 | from dbt.tests.adapter.utils.test_intersect import BaseIntersect 18 | from dbt.tests.adapter.utils.test_last_day import BaseLastDay 19 | from dbt.tests.adapter.utils.test_length import BaseLength 20 | from dbt.tests.adapter.utils.test_listagg import BaseListagg 21 | from dbt.tests.adapter.utils.test_position import BasePosition 22 | from dbt.tests.adapter.utils.test_replace import BaseReplace 23 | from dbt.tests.adapter.utils.test_right import BaseRight 24 | from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast 25 | from dbt.tests.adapter.utils.test_split_part import BaseSplitPart 26 | from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral 27 | 28 | 29 | class TestAnyValue(BaseAnyValue): 30 | pass 31 | 32 | 33 | class TestBoolOr(BaseBoolOr): 34 | pass 35 | 36 | 37 | class TestCastBoolToText(BaseCastBoolToText): 38 | pass 39 | 40 | 41 | class TestConcat(BaseConcat): 42 | pass 43 | 44 | 45 | class TestDateAdd(BaseDateAdd): 46 | pass 47 | 48 | 49 | class TestDateDiff(BaseDateDiff): 50 | pass 51 | 52 | 53 | class TestDateTrunc(BaseDateTrunc): 54 | pass 55 | 56 | 57 | class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote): 58 | pass 59 | 60 | 61 | class TestExcept(BaseExcept): 62 | pass 63 | 64 | 65 | class TestHash(BaseHash): 66 | pass 67 | 68 | 69 | class TestIntersect(BaseIntersect): 70 | pass 71 | 72 | 73 | class TestLastDay(BaseLastDay): 74 | pass 75 | 76 | 77 | class TestLength(BaseLength): 78 | pass 79 | 80 | 81 | class TestListagg(BaseListagg): 82 | pass 83 | 84 | 85 | class TestPosition(BasePosition): 86 | pass 87 | 88 | 89 | class TestReplace(BaseReplace): 90 | pass 91 | 92 | 93 | class TestRight(BaseRight): 94 | pass 95 | 96 | 97 | class TestSafeCast(BaseSafeCast): 98 | pass 99 | 100 | 101 | class TestSplitPart(BaseSplitPart): 102 | pass 103 | 104 | 105 | class TestStringLiteral(BaseStringLiteral): 106 | pass 107 | 108 | 109 | class TestArrayAppend(BaseArrayAppend): 110 | pass 111 | 112 | 113 | class TestArrayConcat(BaseArrayConcat): 114 | pass 115 | 116 | 117 | class TestArrayConcat(BaseArrayConstruct): 118 | pass 119 | 120 | 121 | class TestCurrentTimestamp(BaseCurrentTimestampNaive): 122 | pass 123 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_python_model.py: -------------------------------------------------------------------------------- 1 | # TODO: Fix python model tests 2 | import pytest 3 | from dbt.tests.adapter.python_model.test_python_model import ( 4 | BasePythonIncrementalTests, 5 | BasePythonModelTests, 6 | basic_sql, 7 | m_1, 8 | schema_yml, 9 | second_sql, 10 | ) 11 | from dbt.tests.util import run_dbt 12 | 13 | basic_python_template = """ 14 | import pandas as pd 15 | 16 | def model(dbt, _): 17 | dbt.config( 18 | materialized='table', 19 | ) 20 | pdf = pd.DataFrame() 21 | df = dbt.ref("my_sql_model") 22 | df2 = dbt.source('test_source', 'test_table') 23 | df = df.limit(2) 24 | return df{extension} 25 | """ 26 | 27 | 28 | # class TestBasePythonModelDuckDBPyRelation(BasePythonModelTests): 29 | # @pytest.fixture(scope="class") 30 | # def models(self): 31 | # return { 32 | # "schema.yml": schema_yml, 33 | # "my_sql_model.sql": basic_sql, 34 | # "my_python_model.py": basic_python_template.format(extension=""), 35 | # "second_sql_model.sql": second_sql, 36 | # } 37 | # 38 | # 39 | # class TestBasePythonModelPandasDF(BasePythonModelTests): 40 | # @pytest.fixture(scope="class") 41 | # def models(self): 42 | # return { 43 | # "schema.yml": schema_yml, 44 | # "my_sql_model.sql": basic_sql, 45 | # "my_python_model.py": basic_python_template.format(extension=".df()"), 46 | # "second_sql_model.sql": second_sql, 47 | # } 48 | 49 | 50 | incremental_python = """ 51 | def model(dbt, session): 52 | dbt.config(materialized="incremental", unique_key='id') 53 | df = dbt.ref("m_1") 54 | if dbt.is_incremental: 55 | # incremental runs should only apply to part of the data 56 | df = df.filter("id > 5") 57 | return df.df() 58 | """ 59 | 60 | 61 | # class TestBasePythonIncremental(BasePythonIncrementalTests): 62 | # @pytest.fixture(scope="class") 63 | # def project_config_update(self): 64 | # return {"models": {"+incremental_strategy": "delete+insert"}} 65 | # 66 | # @pytest.fixture(scope="class") 67 | # def models(self): 68 | # return {"m_1.sql": m_1, "incremental.py": incremental_python} 69 | 70 | 71 | empty_upstream_model_python = """ 72 | def model(dbt, con): 73 | dbt.config( 74 | materialized='table', 75 | ) 76 | return con.query("select 'a'::varchar as a, 0::boolean as b limit 0") 77 | """ 78 | 79 | 80 | class TestEmptyPythonModel: 81 | """ 82 | This test ensures that Python models returning a DuckDBPyRelation are materialized 83 | with the correct schema, even when empty. I.e. ensure pyarrow is being used instead 84 | of pandas. 85 | """ 86 | 87 | @pytest.fixture(scope="class") 88 | def models(self): 89 | return { 90 | "upstream_model.py": empty_upstream_model_python, 91 | } 92 | 93 | 94 | # def test_run(self, project): 95 | # run_dbt(["run"]) 96 | # result = project.run_sql( 97 | # f""" 98 | # select column_name, data_type from information_schema.columns 99 | # where table_name='upstream_model' order by column_name 100 | # """, 101 | # fetch="all", 102 | # ) 103 | # assert result == [("a", "VARCHAR"), ("b", "BOOLEAN")] 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :no_entry: [DEPRECATED] `dbt-excel` is not supported use `dbt-duckdb` instead 2 | 3 | [dbt-excel](https://dbt-excel.com/) was an April fools joke. Though, the joke 4 | inspired adding Excel capabilities to `dbt-duckdb`. If you would like to use 5 | `dbt-excel`, use [`dbt-duckdb`](https://github.com/jwills/dbt-duckdb) with the 6 | plugins capabilities to read from and write to Excel files. 7 | 8 | # dbt-excel: Unleash the Power of Excel in Your Data Stack 9 | 10 | ## The ultimate solution for the data-driven world 11 | 12 | Welcome to dbt-excel, the revolutionary dbt adapter that combines the rigor of dbt with the flexibility and familiarity of Excel. Get ready to change the way you look at data analytics forever. 13 | 14 | ![dbt-excel logo](assets/dbt-excel.png) 15 | 16 | ### Features 17 | 18 | - Integrate Excel with dbt: query data that you didn't know existed, for people who didn't know you existed, all from Excel! 19 | - Use your favorite Excel functions: VLOOKUP, sheet support, averageif, and more, because life's too short for learning SQL functions. 20 | - Runs blazingly fast queries thanks to duckdb running in the background, so you can have your cake and eat it too! 21 | - Monitor your data assets, collaborate on data models, and document your queries, all within Excel. Remember, if it's not in Excel, it's not worth doing! 22 | 23 | ### **Installation** 24 | 25 | To install dbt-excel, just run the following command in your terminal: 26 | 27 | ```bash 28 | python -m pip install dbt-excel 29 | ``` 30 | 31 | ### **Development Installation** 32 | 33 | This section details how to setup a local environment to develop this dbt adapter. 34 | 35 | 1. Clone this repo to your local machine and change directory: 36 | ```bash 37 | git clone git@github.com:godatadriven/dbt-excel.git 38 | cd dbt-excel 39 | ``` 40 | 41 | 1. Create a virtual environment: 42 | ```bash 43 | python -m virtualenv .venv 44 | source .venv/bin/activate 45 | ``` 46 | 47 | 1. To install dbt-excel in editable mode, run the following command in your terminal: 48 | ```bash 49 | python -m pip install -e . 50 | ``` 51 | 52 | #### Profile setup 53 | 54 | The default profile will create a duckdb file in /tmp, of the custom `excel` type. You can adjust the path if necessary. 55 | 56 | ``` 57 | dbt_excel: 58 | target: dev 59 | outputs: 60 | dev: 61 | type: excel 62 | path: /tmp/dbt.duckdb 63 | 64 | ``` 65 | 66 | 67 | ### Running your first DBT Excel model 68 | 69 | Steps: 70 | 71 | 1. `cd` into the `dbt_project` directory, then run `dbt run -s my_first_dbt_model --profiles-dir .` this will run the models in the `models/example/my_first_dbt_model.sql` file. 72 | 2. A Excel file exists in `sources/people.xlsx`, which will be read and used by the models. The result will be written to ` first_model.xlsx`. 73 | 3. Boom, you just read from AND wrote to Excel! 74 | 4. Savour this moment, it's precious. 75 | 76 | ### Testimonials: 77 | 78 | **Enterprise Architect:** 79 | 80 | > As an Enterprise Architect, I have been involved in countless failed digital transformation projects. They all make the same mistake. They forget one thing: all data lives in Excel. By moving away from enterprise-grade solutions like Excel to complicated stacks based on FivetPython, dbt and XXX, companies neglect their core competence. 1,048,576 rows ought to be enough for anybody. The only digital winners are the companies that build their stack on top of Excel. 81 | 82 | **CEO:** 83 | 84 | > Companies worldwide are pouring trillions of dollars into digital transformation. But they forget one thing. In the end, all data lives in Excel. Dbt-excel combines the rigour of software engineering from dbt with the flexibility of Excel. True data democratization starts and ends with Excel. 85 | 86 | **Anonymous Analyst:** 87 | > I used to have impostor syndrome working with DBT. With Excel I know I Excel. 88 | 89 | 90 | ### Credits 91 | 92 | Big credits to [Josh Wills' excellent DBT Duckdb adapter](https://github.com/jwills/dbt-duckdb) which inspired this project! 93 | -------------------------------------------------------------------------------- /dbt/include/excel/macros/materializations/external.sql: -------------------------------------------------------------------------------- 1 | {% materialization external, adapter="excel", supported_languages=['sql', 'python'] %} 2 | 3 | {%- set format = render(config.get('format', default='parquet')) -%} 4 | {%- set location = render(config.get('location', default=external_location(this, format))) -%} 5 | {%- set delimiter = render(config.get('delimiter', default=',')) -%} 6 | {%- set glue_register = config.get('glue_register', default=false) -%} 7 | {%- set glue_database = render(config.get('glue_database', default='default')) -%} 8 | 9 | -- set language - python or sql 10 | {%- set language = model['language'] -%} 11 | 12 | {%- set target_relation = this.incorporate(type='view') %} 13 | 14 | -- Continue as normal materialization 15 | {%- set existing_relation = load_cached_relation(this) -%} 16 | {%- set temp_relation = make_intermediate_relation(this.incorporate(type='table'), suffix='__dbt_tmp') -%} 17 | {%- set intermediate_relation = make_intermediate_relation(target_relation, suffix='__dbt_int') -%} 18 | -- the intermediate_relation should not already exist in the database; get_relation 19 | -- will return None in that case. Otherwise, we get a relation that we can drop 20 | -- later, before we try to use this name for the current operation 21 | {%- set preexisting_temp_relation = load_cached_relation(temp_relation) -%} 22 | {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%} 23 | /* 24 | See ../view/view.sql for more information about this relation. 25 | */ 26 | {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%} 27 | {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%} 28 | -- as above, the backup_relation should not already exist 29 | {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%} 30 | -- grab current tables grants config for comparision later on 31 | {% set grant_config = config.get('grants') %} 32 | 33 | -- drop the temp relations if they exist already in the database 34 | {{ drop_relation_if_exists(preexisting_intermediate_relation) }} 35 | {{ drop_relation_if_exists(preexisting_temp_relation) }} 36 | {{ drop_relation_if_exists(preexisting_backup_relation) }} 37 | 38 | {{ run_hooks(pre_hooks, inside_transaction=False) }} 39 | 40 | -- `BEGIN` happens here: 41 | {{ run_hooks(pre_hooks, inside_transaction=True) }} 42 | 43 | -- build model 44 | {% call statement('create_table', language=language) -%} 45 | {{- create_table_as(False, temp_relation, compiled_code, language) }} 46 | {%- endcall %} 47 | 48 | -- write an temp relation into file 49 | {{ write_to_file(temp_relation, location, format, delimiter) }} 50 | -- create a view on top of the location 51 | {% if format == 'xlsx' %} 52 | {% set location = location + '.parquet' %} 53 | {% endif %} 54 | {% call statement('main', language='sql') -%} 55 | create or replace view {{ intermediate_relation.include(database=adapter.use_database()) }} as ( 56 | select * from '{{ location }}' 57 | ); 58 | {%- endcall %} 59 | 60 | -- cleanup 61 | {% if existing_relation is not none %} 62 | {{ adapter.rename_relation(existing_relation, backup_relation) }} 63 | {% endif %} 64 | 65 | {{ adapter.rename_relation(intermediate_relation, target_relation) }} 66 | 67 | {{ run_hooks(post_hooks, inside_transaction=True) }} 68 | 69 | {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} 70 | {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} 71 | 72 | {% do persist_docs(target_relation, model) %} 73 | 74 | -- `COMMIT` happens here 75 | {{ adapter.commit() }} 76 | 77 | -- finally, drop the existing/backup relation after the commit 78 | {{ drop_relation_if_exists(backup_relation) }} 79 | {{ drop_relation_if_exists(temp_relation) }} 80 | 81 | -- register table into glue 82 | {% do register_glue_table(glue_register, glue_database, target_relation, location, format) %} 83 | 84 | {{ run_hooks(post_hooks, inside_transaction=False) }} 85 | 86 | {{ return({'relations': [target_relation]}) }} 87 | 88 | {% endmaterialization %} 89 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_ephemeral.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import pytest 5 | from dbt.tests.adapter.ephemeral.test_ephemeral import ( 6 | BaseEphemeral, 7 | BaseEphemeralMulti, 8 | ephemeral_errors__base__base_copy_sql, 9 | ephemeral_errors__base__base_sql, 10 | ephemeral_errors__dependent_sql, 11 | models_n__ephemeral_level_two_sql, 12 | models_n__ephemeral_sql, 13 | models_n__root_view_sql, 14 | models_n__source_table_sql, 15 | ) 16 | from dbt.tests.util import check_relations_equal, run_dbt 17 | 18 | 19 | class TestEphemeralMulti(BaseEphemeralMulti): 20 | def test_ephemeral_multi(self, project): 21 | run_dbt(["seed"]) 22 | results = run_dbt(["run"]) 23 | assert len(results) == 3 24 | 25 | check_relations_equal(project.adapter, ["seed", "dependent"]) 26 | check_relations_equal(project.adapter, ["seed", "double_dependent"]) 27 | check_relations_equal(project.adapter, ["seed", "super_dependent"]) 28 | assert os.path.exists("./target/run/test/models/double_dependent.sql") 29 | with open("./target/run/test/models/double_dependent.sql", "r") as fp: 30 | sql_file = fp.read() 31 | 32 | sql_file = re.sub(r"\d+", "", sql_file) 33 | expected_sql = ( 34 | 'create view "memory"."test_test_ephemeral"."double_dependent__dbt_tmp" as (' 35 | "with __dbt__cte__base as (" 36 | "select * from test_test_ephemeral.seed" 37 | "), __dbt__cte__base_copy as (" 38 | "select * from __dbt__cte__base" 39 | ")-- base_copy just pulls from base. Make sure the listed" 40 | "-- graph of CTEs all share the same dbt_cte__base cte" 41 | "select * from __dbt__cte__base where gender = 'Male'" 42 | "union all" 43 | "select * from __dbt__cte__base_copy where gender = 'Female'" 44 | ");" 45 | ) 46 | sql_file = "".join(sql_file.split()) 47 | expected_sql = "".join(expected_sql.split()) 48 | assert sql_file == expected_sql 49 | 50 | 51 | class TestEphemeralNested(BaseEphemeral): 52 | @pytest.fixture(scope="class") 53 | def models(self): 54 | return { 55 | "ephemeral_level_two.sql": models_n__ephemeral_level_two_sql, 56 | "root_view.sql": models_n__root_view_sql, 57 | "ephemeral.sql": models_n__ephemeral_sql, 58 | "source_table.sql": models_n__source_table_sql, 59 | } 60 | 61 | def test_ephemeral_nested(self, project): 62 | results = run_dbt(["run"]) 63 | assert len(results) == 2 64 | assert os.path.exists("./target/run/test/models/root_view.sql") 65 | with open("./target/run/test/models/root_view.sql", "r") as fp: 66 | sql_file = fp.read() 67 | 68 | sql_file = re.sub(r"\d+", "", sql_file) 69 | expected_sql = ( 70 | 'create view "memory"."test_test_ephemeral"."root_view__dbt_tmp" as (' 71 | "with __dbt__cte__ephemeral_level_two as (" 72 | 'select * from "memory"."test_test_ephemeral"."source_table"' 73 | "), __dbt__cte__ephemeral as (" 74 | "select * from __dbt__cte__ephemeral_level_two" 75 | ")select * from __dbt__cte__ephemeral" 76 | ");" 77 | ) 78 | 79 | sql_file = "".join(sql_file.split()) 80 | expected_sql = "".join(expected_sql.split()) 81 | assert sql_file == expected_sql 82 | 83 | 84 | class TestEphemeralErrorHandling(BaseEphemeral): 85 | @pytest.fixture(scope="class") 86 | def models(self): 87 | return { 88 | "dependent.sql": ephemeral_errors__dependent_sql, 89 | "base": { 90 | "base.sql": ephemeral_errors__base__base_sql, 91 | "base_copy.sql": ephemeral_errors__base__base_copy_sql, 92 | }, 93 | } 94 | 95 | def test_ephemeral_error_handling(self, project): 96 | results = run_dbt(["run"], expect_pass=False) 97 | assert len(results) == 1 98 | assert results[0].status == "skipped" 99 | assert "Compilation Error" in results[0].message 100 | -------------------------------------------------------------------------------- /tests/functional/adapter/test_external.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dbt.tests.adapter.basic.files import ( 3 | base_table_sql, 4 | model_base, 5 | schema_base_yml, 6 | seeds_base_csv, 7 | ) 8 | from dbt.tests.util import ( 9 | check_relation_types, 10 | check_relations_equal, 11 | check_result_nodes_by_name, 12 | relation_from_name, 13 | run_dbt, 14 | ) 15 | 16 | config_materialized_default = """ 17 | {{ config(materialized='external') }} 18 | """ 19 | 20 | config_materialized_parquet_with_location = """ 21 | {{ config(materialized="external", location="test.parquet") }} 22 | """ 23 | 24 | config_materialized_csv = """ 25 | {{ config(materialized="external", format="csv", location="test.csv") }} 26 | """ 27 | 28 | config_materialized_csv_delim = """ 29 | {{ config(materialized="external", format="csv", location="test_delim.csv", delimiter="|") }} 30 | """ 31 | 32 | config_materialized_excel = """ 33 | {{ config(materialized="external", format="xlsx", location="test_excel.xlsx") }} 34 | """ 35 | 36 | default_external_sql = config_materialized_default + model_base 37 | parquet_table_location_sql = config_materialized_parquet_with_location + model_base 38 | csv_table_sql = config_materialized_csv + model_base 39 | csv_table_delim_sql = config_materialized_csv_delim + model_base 40 | excel_table_sql = config_materialized_excel + model_base 41 | 42 | 43 | class BaseExternalMaterializations: 44 | @pytest.fixture(scope="class") 45 | def models(self): 46 | return { 47 | "table_model.sql": base_table_sql, 48 | "table_default.sql": default_external_sql, 49 | "table_parquet.sql": parquet_table_location_sql, 50 | "table_csv.sql": csv_table_sql, 51 | "table_csv_delim.sql": csv_table_delim_sql, 52 | "table_excel.sql": excel_table_sql, 53 | "schema.yml": schema_base_yml, 54 | } 55 | 56 | @pytest.fixture(scope="class") 57 | def seeds(self): 58 | return { 59 | "base.csv": seeds_base_csv, 60 | } 61 | 62 | @pytest.fixture(scope="class") 63 | def project_config_update(self): 64 | return { 65 | "name": "base", 66 | } 67 | 68 | def test_base(self, project): 69 | 70 | # seed command 71 | results = run_dbt(["seed"]) 72 | # seed result length 73 | assert len(results) == 1 74 | 75 | # run command 76 | results = run_dbt() 77 | # run result length 78 | assert len(results) == 6 79 | 80 | # names exist in result nodes 81 | check_result_nodes_by_name( 82 | results, 83 | [ 84 | "table_model", 85 | "table_default", 86 | "table_parquet", 87 | "table_csv", 88 | "table_csv_delim", 89 | "table_excel", 90 | ], 91 | ) 92 | 93 | # check relation types 94 | expected = { 95 | "base": "table", 96 | "table_default": "view", 97 | "table_parquet": "view", 98 | "table_model": "table", 99 | "table_csv": "view", 100 | "table_csv_delim": "view", 101 | "table_excel": "view", 102 | } 103 | check_relation_types(project.adapter, expected) 104 | 105 | # base table rowcount 106 | relation = relation_from_name(project.adapter, "base") 107 | result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") 108 | assert result[0] == 10 109 | 110 | # relations_equal 111 | check_relations_equal( 112 | project.adapter, 113 | [ 114 | "base", 115 | "table_default", 116 | "table_parquet", 117 | "table_model", 118 | "table_csv", 119 | "table_csv_delim", 120 | "table_excel", 121 | ], 122 | ) 123 | 124 | # check relations in catalog 125 | catalog = run_dbt(["docs", "generate"]) 126 | assert len(catalog.nodes) == 7 127 | assert len(catalog.sources) == 1 128 | 129 | 130 | class TestExternalMaterializations(BaseExternalMaterializations): 131 | pass 132 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: CI pipeline 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | tags: 9 | - '*' 10 | workflow_dispatch: 11 | 12 | jobs: 13 | pre-commit: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - name: Set up Python 3.8 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: 3.8 22 | 23 | - name: Install pre-commit 24 | shell: bash 25 | run: | 26 | python -m pip install --upgrade pip 27 | python -m pip install mypy pre-commit 28 | 29 | - name: Run pre-commit hooks 30 | shell: bash 31 | run: pre-commit run --all-file 32 | 33 | tests: 34 | runs-on: ubuntu-latest 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | python-version: 39 | - "3.7" 40 | - "3.8" 41 | - "3.9" 42 | - "3.10" 43 | 44 | steps: 45 | - uses: actions/checkout@v3 46 | 47 | - name: Set up Python ${{ matrix.python-version }} 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: ${{ matrix.python-version }} 51 | 52 | - name: Install dependencies 53 | run: | 54 | sudo apt-get install libsasl2-dev 55 | python -m pip install --upgrade pip 56 | python -m pip install tox 57 | 58 | - name: Unit tests with tox 59 | run: tox -e unit 60 | 61 | - name: Functional tests with tox 62 | run: tox -e functional 63 | 64 | test-and-build-release: 65 | if: contains(github.ref, 'refs/tags/') 66 | runs-on: ubuntu-latest 67 | needs: 68 | - tests 69 | - "pre-commit" 70 | steps: 71 | - uses: actions/checkout@v3 72 | 73 | - name: Set up Python 3.9 74 | uses: actions/setup-python@v4 75 | with: 76 | python-version: 3.9 77 | 78 | - name: Install dependencies 79 | run: | 80 | python -m pip install --upgrade pip 81 | python -m pip install setuptools wheel twine check-wheel-contents 82 | 83 | - name: Test release 84 | run: | 85 | python setup.py sdist bdist_wheel 86 | pip install dist/dbt-excel-*.tar.gz 87 | pip install dist/dbt_excel-*-py3-none-any.whl 88 | twine check dist/dbt_excel-*-py3-none-any.whl dist/dbt-excel-*.tar.gz 89 | check-wheel-contents dist/*.whl --ignore W002,W007,W008 90 | 91 | - uses: actions/upload-artifact@v3 92 | with: 93 | name: dist 94 | path: dist/ 95 | 96 | github-release: 97 | runs-on: ubuntu-latest 98 | needs: test-and-build-release 99 | steps: 100 | - uses: actions/checkout@v3 101 | 102 | - name: Set up Python 3.9 103 | uses: actions/setup-python@v4 104 | with: 105 | python-version: 3.9 106 | 107 | - name: Get version 108 | id: get_version 109 | run: echo ::set-output name=VERSION::$(python setup.py --version) 110 | 111 | - name: Find release type 112 | id: get_release_type 113 | env: 114 | IS_PRERELEASE: ${{ contains(env.version_number, 'rc') || contains(env.version_number, 'b') }} 115 | run: echo ::set-output name=isPrerelease::$IS_PRERELEASE 116 | 117 | - name: Create GitHub release 118 | uses: actions/create-release@v1 119 | env: 120 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 121 | with: 122 | tag_name: ${{ steps.get_version.outputs.VERSION }} 123 | release_name: dbt-excel ${{ steps.get_version.outputs.VERSION }} 124 | prerelease: ${{ steps.get_release_type.outputs.IS_PRERELEASE }} 125 | body: ${{ github.event.head_commit.message }} 126 | 127 | pypi-release: 128 | runs-on: ubuntu-latest 129 | needs: test-and-build-release 130 | steps: 131 | - uses: actions/checkout@v3 132 | 133 | - name: Set up Python 3.9 134 | uses: actions/setup-python@v4 135 | with: 136 | python-version: 3.9 137 | 138 | - name: Install dependencies 139 | run: | 140 | python -m pip install --upgrade pip 141 | python -m pip install twine 142 | 143 | - name: Get version 144 | id: get_version 145 | run: echo ::set-output name=VERSION::$(python setup.py --version) 146 | 147 | - uses: actions/download-artifact@v3 148 | with: 149 | name: dist 150 | path: dist/ 151 | 152 | - name: Release to pypi 153 | env: 154 | TWINE_USERNAME: __token__ 155 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 156 | run: | 157 | twine upload --skip-existing --non-interactive dist/dbt_excel-${{ steps.get_version.outputs.VERSION }}-py3-none-any.whl dist/dbt-excel-${{ steps.get_version.outputs.VERSION }}.tar.gz 158 | -------------------------------------------------------------------------------- /tests/unit/utils.py: -------------------------------------------------------------------------------- 1 | """Unit test utility functions. 2 | 3 | Note that all imports should be inside the functions to avoid import/mocking 4 | issues. 5 | """ 6 | import os 7 | from unittest import TestCase, mock 8 | 9 | from dbt.config.project import PartialProject 10 | from hologram import ValidationError 11 | 12 | 13 | def normalize(path): 14 | """On windows, neither is enough on its own: 15 | 16 | >>> normcase('C:\\documents/ALL CAPS/subdir\\..') 17 | 'c:\\documents\\all caps\\subdir\\..' 18 | >>> normpath('C:\\documents/ALL CAPS/subdir\\..') 19 | 'C:\\documents\\ALL CAPS' 20 | >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..')) 21 | 'c:\\documents\\all caps' 22 | """ 23 | return os.path.normcase(os.path.normpath(path)) 24 | 25 | 26 | class Obj: 27 | which = "blah" 28 | single_threaded = False 29 | 30 | 31 | def mock_connection(name): 32 | conn = mock.MagicMock() 33 | conn.name = name 34 | return conn 35 | 36 | 37 | def profile_from_dict(profile, profile_name, cli_vars="{}"): 38 | from dbt.config import Profile 39 | from dbt.config.renderer import ProfileRenderer 40 | from dbt.config.utils import parse_cli_vars 41 | from dbt.context.base import generate_base_context 42 | 43 | if not isinstance(cli_vars, dict): 44 | cli_vars = parse_cli_vars(cli_vars) 45 | 46 | renderer = ProfileRenderer(cli_vars) 47 | return Profile.from_raw_profile_info( 48 | profile, 49 | profile_name, 50 | renderer, 51 | ) 52 | 53 | 54 | def project_from_dict(project, profile, packages=None, selectors=None, cli_vars="{}"): 55 | from dbt.config.renderer import DbtProjectYamlRenderer 56 | from dbt.config.utils import parse_cli_vars 57 | 58 | if not isinstance(cli_vars, dict): 59 | cli_vars = parse_cli_vars(cli_vars) 60 | 61 | renderer = DbtProjectYamlRenderer(profile, cli_vars) 62 | 63 | project_root = project.pop("project-root", os.getcwd()) 64 | 65 | partial = PartialProject.from_dicts( 66 | project_root=project_root, 67 | project_dict=project, 68 | packages_dict=packages, 69 | selectors_dict=selectors, 70 | ) 71 | return partial.render(renderer) 72 | 73 | 74 | def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"): 75 | from copy import deepcopy 76 | 77 | from dbt.config import Profile, Project, RuntimeConfig 78 | 79 | if isinstance(project, Project): 80 | profile_name = project.profile_name 81 | else: 82 | profile_name = project.get("profile") 83 | 84 | if not isinstance(profile, Profile): 85 | profile = profile_from_dict( 86 | deepcopy(profile), 87 | profile_name, 88 | cli_vars, 89 | ) 90 | 91 | if not isinstance(project, Project): 92 | project = project_from_dict( 93 | deepcopy(project), 94 | profile, 95 | packages, 96 | selectors, 97 | cli_vars, 98 | ) 99 | 100 | args = Obj() 101 | args.vars = cli_vars 102 | args.profile_dir = "/dev/null" 103 | return RuntimeConfig.from_parts(project=project, profile=profile, args=args) 104 | 105 | 106 | def inject_plugin(plugin): 107 | from dbt.adapters.factory import FACTORY 108 | 109 | key = plugin.adapter.type() 110 | FACTORY.plugins[key] = plugin 111 | 112 | 113 | def inject_adapter(value, plugin): 114 | """Inject the given adapter into the adapter factory, so your hand-crafted 115 | artisanal adapter will be available from get_adapter() as if dbt loaded it. 116 | """ 117 | inject_plugin(plugin) 118 | from dbt.adapters.factory import FACTORY 119 | 120 | key = value.type() 121 | FACTORY.adapters[key] = value 122 | 123 | 124 | class ContractTestCase(TestCase): 125 | ContractType = None 126 | 127 | def setUp(self): 128 | self.maxDiff = None 129 | super().setUp() 130 | 131 | def assert_to_dict(self, obj, dct): 132 | self.assertEqual(obj.to_dict(), dct) 133 | 134 | def assert_from_dict(self, obj, dct, cls=None): 135 | if cls is None: 136 | cls = self.ContractType 137 | self.assertEqual(cls.from_dict(dct), obj) 138 | 139 | def assert_symmetric(self, obj, dct, cls=None): 140 | self.assert_to_dict(obj, dct) 141 | self.assert_from_dict(obj, dct, cls) 142 | 143 | def assert_fails_validation(self, dct, cls=None): 144 | if cls is None: 145 | cls = self.ContractType 146 | 147 | with self.assertRaises(ValidationError): 148 | cls.from_dict(dct) 149 | 150 | 151 | def generate_name_macros(package): 152 | from dbt.contracts.graph.parsed import ParsedMacro 153 | from dbt.node_types import NodeType 154 | 155 | name_sql = {} 156 | for component in ("database", "schema", "alias"): 157 | if component == "alias": 158 | source = "node.name" 159 | else: 160 | source = f"target.{component}" 161 | name = f"generate_{component}_name" 162 | sql = f"{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}" 163 | name_sql[name] = sql 164 | 165 | all_sql = "\n".join(name_sql.values()) 166 | for name, sql in name_sql.items(): 167 | pm = ParsedMacro( 168 | name=name, 169 | resource_type=NodeType.Macro, 170 | unique_id=f"macro.{package}.{name}", 171 | package_name=package, 172 | original_file_path=normalize("macros/macro.sql"), 173 | root_path="./dbt_modules/root", 174 | path=normalize("macros/macro.sql"), 175 | raw_sql=all_sql, 176 | macro_sql=sql, 177 | ) 178 | yield pm 179 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------