├── .github └── workflows │ ├── release.yaml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── dask_databricks ├── __init__.py ├── cli.py ├── databrickscluster.py └── tests │ └── test_databricks.py └── pyproject.toml /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | on: 3 | push: 4 | tags: 5 | - "*.*.*" 6 | jobs: 7 | publish: 8 | runs-on: ubuntu-latest 9 | environment: 10 | name: pypi 11 | url: https://pypi.org/p/dask-databricks 12 | permissions: 13 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 14 | steps: 15 | - uses: actions/checkout@v3 16 | with: 17 | fetch-depth: 0 18 | - name: Build package 19 | run: pipx install hatch && hatch build 20 | - name: Publish 21 | uses: pypa/gh-action-pypi-publish@release/v1 22 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: "Test" 2 | on: 3 | pull_request: 4 | push: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | timeout-minutes: 45 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.9", "3.10", "3.11"] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install hatch 25 | run: pipx install hatch 26 | - name: Run tests 27 | run: hatch run test:run 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | _version.py 163 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # NOTE: autoupdate does not pick up flake8-bugbear since it is a transitive 2 | # dependency. Make sure to update flake8-bugbear manually on a regular basis. 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 23.11.0 6 | hooks: 7 | - id: black 8 | language_version: python3 9 | exclude: versioneer.py 10 | args: 11 | - --target-version=py39 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | # Ruff version. 14 | rev: "v0.1.5" 15 | hooks: 16 | - id: ruff 17 | language_version: python3 18 | args: [--fix, --exit-non-zero-on-fix] 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Dask Developers 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dask-databricks 2 | 3 | Cluster tools for running Dask on Databricks multi-node clusters. 4 | 5 | 6 | ## Quickstart 7 | 8 | To launch a Dask cluster on Databricks you need to create an [init script](https://docs.databricks.com/en/init-scripts/index.html) with the following contents and configure your multi-node cluster to use it. 9 | 10 | ```bash 11 | #!/bin/bash 12 | 13 | # Install Dask + Dask Databricks 14 | /databricks/python/bin/pip install --upgrade dask[complete] dask-databricks 15 | 16 | # Start Dask cluster components 17 | dask databricks run 18 | ``` 19 | 20 | Then from your Databricks Notebook you can quickly connect a Dask `Client` to the scheduler running on the Spark Driver Node. 21 | 22 | ```python 23 | import dask_databricks 24 | 25 | client = dask_databricks.get_client() 26 | ``` 27 | 28 | Now you can submit work from your notebook to the multi-node Dask cluster. 29 | 30 | ```python 31 | def inc(x): 32 | return x + 1 33 | 34 | x = client.submit(inc, 10) 35 | x.result() 36 | ``` 37 | 38 | ### Dashboard 39 | 40 | You can access the [Dask dashboard](https://docs.dask.org/en/latest/dashboard.html) via the Databricks driver-node proxy. The link can be found in `Client` or `DatabricksCluster` repr or via `client.dashboard_link`. 41 | 42 | ```python 43 | >>> print(client.dashboard_link) 44 | https://dbc-dp-xxxx.cloud.databricks.com/driver-proxy/o/xxxx/xx-xxx-xxxx/8087/status 45 | ``` 46 | 47 | ![](https://user-images.githubusercontent.com/1610850/281442274-450d41c6-2eb6-42a1-8de6-c4a1a1b84193.png) 48 | 49 | ![](https://user-images.githubusercontent.com/1610850/281441285-9b84d5f1-d58a-45dc-9354-7385e1599d1f.png) 50 | 51 | ## Releasing 52 | 53 | Releases of this project are automated using [GitHub Actions and the `pypa/gh-action-pypi-publish` action](https://github.com/dask-contrib/dask-databricks/blob/main/.github/workflows/release.yaml). 54 | 55 | To create a new release push a tag to the upstream repo in the format `x.x.x`. The package will be built and pushed to PyPI automatically and then later picked up by conda-forge. 56 | 57 | ```bash 58 | # Make sure you have an upstream remote 59 | git remote add upstream git@github.com:dask-contrib/dask-databricks.git 60 | 61 | # Create a tag and push it upstream 62 | git tag x.x.x && git push upstream main --tags 63 | ``` 64 | -------------------------------------------------------------------------------- /dask_databricks/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023-present Dask Developers 2 | # 3 | # SPDX-License-Identifier: BSD-3 4 | 5 | from .databrickscluster import DatabricksCluster, get_client # noqa 6 | 7 | # Define the variable '__version__': 8 | try: 9 | # If setuptools_scm is installed (e.g. in a development environment with 10 | # an editable install), then use it to determine the version dynamically. 11 | from setuptools_scm import get_version 12 | 13 | # This will fail with LookupError if the package is not installed in 14 | # editable mode or if Git is not installed. 15 | __version__ = get_version(root="..", relative_to=__file__) 16 | except (ImportError, LookupError): 17 | # As a fallback, use the version that is hard-coded in the file. 18 | try: 19 | from dask_databricks._version import __version__ # noqa: F401 20 | except ModuleNotFoundError: 21 | # The user is probably trying to run this without having installed 22 | # the package, so complain. 23 | raise RuntimeError("dask-databricks is not correctly installed. " "Please install it with pip.") 24 | -------------------------------------------------------------------------------- /dask_databricks/cli.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import socket 5 | import subprocess 6 | import sys 7 | import time 8 | 9 | import click 10 | from rich.logging import RichHandler 11 | 12 | 13 | def get_logger(): 14 | logging.basicConfig(level="INFO", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]) 15 | return logging.getLogger("dask_databricks") 16 | 17 | 18 | @click.group(name="databricks") 19 | def main(): 20 | """Tools to launch Dask on Databricks.""" 21 | 22 | 23 | @main.command() 24 | @click.option('--worker-command', help='Custom worker command') 25 | @click.option('--worker-args', help='Additional worker arguments') 26 | @click.option( 27 | "--cuda", 28 | is_flag=True, 29 | show_default=True, 30 | default=False, 31 | help="Use `dask cuda worker` from the dask-cuda package when starting workers.", 32 | ) 33 | def run(worker_command, worker_args, cuda): 34 | """Run Dask processes on a Databricks cluster.""" 35 | log = get_logger() 36 | 37 | log.info("Setting up Dask on a Databricks cluster.") 38 | 39 | DB_IS_DRIVER = os.getenv("DB_IS_DRIVER") 40 | DB_DRIVER_IP = os.getenv("DB_DRIVER_IP") 41 | 42 | if DB_DRIVER_IP is None or DB_IS_DRIVER is None: 43 | log.error( 44 | "Unable to find expected environment variables DB_IS_DRIVER and DB_DRIVER_IP. " 45 | "Are you running this command on a Databricks multi-node cluster?" 46 | ) 47 | sys.exit(1) 48 | 49 | if DB_IS_DRIVER == "TRUE": 50 | log.info("This node is the Dask scheduler.") 51 | scheduler_process = subprocess.Popen(["dask", "scheduler", "--dashboard-address", ":8087"]) 52 | time.sleep(5) # give the scheduler time to start 53 | if scheduler_process.poll() is not None: 54 | log.error("Scheduler process has exited prematurely.") 55 | sys.exit(1) 56 | else: 57 | # Specify the same port for all workers 58 | worker_port = 8786 59 | log.info("This node is a Dask worker.") 60 | log.info(f"Connecting to Dask scheduler at {DB_DRIVER_IP}:{worker_port}") 61 | while True: 62 | try: 63 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 64 | sock.connect((DB_DRIVER_IP, worker_port)) 65 | sock.close() 66 | break 67 | except ConnectionRefusedError: 68 | log.info("Scheduler not available yet. Waiting...") 69 | time.sleep(1) 70 | 71 | # Construct the worker command 72 | if worker_command: 73 | worker_command = worker_command.split() 74 | elif cuda: 75 | worker_command = ["dask", "cuda", "worker"] 76 | else: 77 | worker_command = ["dask", "worker"] 78 | 79 | if worker_args: 80 | try: 81 | # Try to decode the JSON-encoded worker_args 82 | worker_args_list = json.loads(worker_args) 83 | if not isinstance(worker_args_list, list): 84 | raise ValueError("The JSON-encoded worker_args must be a list.") 85 | except json.JSONDecodeError: 86 | # If decoding as JSON fails, split worker_args by spaces 87 | worker_args_list = worker_args.split() 88 | 89 | worker_command.extend(worker_args_list) 90 | worker_command.append(f"tcp://{DB_DRIVER_IP}:{worker_port}") 91 | 92 | worker_process = subprocess.Popen(worker_command) 93 | time.sleep(5) # give the worker time to start 94 | if worker_process.poll() is not None: 95 | log.error("Worker process has exited prematurely.") 96 | sys.exit(1) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /dask_databricks/databrickscluster.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | from typing import Optional 4 | 5 | from distributed.core import rpc 6 | from distributed.deploy.cluster import Cluster 7 | from tornado.ioloop import IOLoop 8 | 9 | # Databricks Notebooks injects the `spark` session variable but we need to create it ourselves 10 | try: 11 | from pyspark.sql import SparkSession 12 | 13 | spark = SparkSession.getActiveSession() 14 | except ImportError: 15 | spark = None 16 | 17 | 18 | class DatabricksCluster(Cluster): 19 | """Connect to a Dask cluster deployed via databricks.""" 20 | 21 | def __init__( 22 | self, 23 | loop: Optional[IOLoop] = None, 24 | asynchronous: bool = False, 25 | ): 26 | self.spark_local_ip = os.environ.get("SPARK_LOCAL_IP") 27 | if self.spark_local_ip is None: 28 | raise KeyError( 29 | "Unable to find expected environment variable SPARK_LOCAL_IP. " 30 | "Are you running this on a Databricks driver node?" 31 | ) 32 | if os.environ.get("MASTER") and "local[" in os.environ.get("MASTER"): 33 | raise EnvironmentError( 34 | "You appear to be trying to run a multi-node Dask cluster on a " 35 | "single-node databricks cluster. Maybe you want " 36 | "`dask.distributed.LocalCluster().get_client()` instead" 37 | 38 | ) 39 | try: 40 | name = spark.conf.get("spark.databricks.clusterUsageTags.clusterId") 41 | except AttributeError: 42 | name = "unknown-databricks-" + uuid.uuid4().hex[:10] 43 | super().__init__(name=name, loop=loop, asynchronous=asynchronous) 44 | 45 | if not self.called_from_running_loop: 46 | self._loop_runner.start() 47 | self.sync(self._start) 48 | 49 | async def _start(self): 50 | self.scheduler_comm = rpc(f"{self.spark_local_ip}:8786") 51 | await super()._start() 52 | 53 | @property 54 | def dashboard_link(self): 55 | cluster_id = spark.conf.get("spark.databricks.clusterUsageTags.clusterId") 56 | org_id = spark.conf.get("spark.databricks.clusterUsageTags.orgId") 57 | workspace_url = spark.conf.get("spark.databricks.workspaceUrl") 58 | return f"https://{workspace_url}/driver-proxy/o/{org_id}/{cluster_id}/8087/status" 59 | 60 | 61 | def get_client(): 62 | """Get a Dask client connected to a Databricks cluster.""" 63 | return DatabricksCluster().get_client() 64 | -------------------------------------------------------------------------------- /dask_databricks/tests/test_databricks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dask.distributed import Client 5 | from distributed.deploy import Cluster, LocalCluster 6 | 7 | from dask_databricks import DatabricksCluster, get_client 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def dask_cluster(): 12 | """Start a LocalCluster to simulate the cluster that would be started on Databricks.""" 13 | return LocalCluster(scheduler_port=8786) 14 | 15 | 16 | @pytest.fixture 17 | def remove_spark_local_ip(): 18 | original_spark_local_ip = os.getenv("SPARK_LOCAL_IP") 19 | if original_spark_local_ip: 20 | del os.environ["SPARK_LOCAL_IP"] 21 | yield None 22 | if original_spark_local_ip: 23 | os.environ["SPARK_LOCAL_IP"] = original_spark_local_ip 24 | 25 | 26 | @pytest.fixture 27 | def set_spark_local_ip(): 28 | original_spark_local_ip = os.getenv("SPARK_LOCAL_IP") 29 | os.environ["SPARK_LOCAL_IP"] = "127.0.0.1" 30 | yield None 31 | if original_spark_local_ip: 32 | os.environ["SPARK_LOCAL_IP"] = original_spark_local_ip 33 | else: 34 | del os.environ["SPARK_LOCAL_IP"] 35 | 36 | 37 | def test_databricks_cluster_raises_key_error_when_initialised_outside_of_databricks(remove_spark_local_ip): 38 | with pytest.raises(KeyError): 39 | DatabricksCluster() 40 | 41 | def test_databricks_cluster_raises_environment_error_when_master_variable_implies_single_node( 42 | monkeypatch, 43 | set_spark_local_ip, 44 | dask_cluster, 45 | ): 46 | monkeypatch.setenv("MASTER", "local[8]") 47 | with pytest.raises(EnvironmentError): 48 | DatabricksCluster() 49 | 50 | def test_databricks_cluster_create(set_spark_local_ip, dask_cluster): 51 | cluster = DatabricksCluster() 52 | assert isinstance(cluster, Cluster) 53 | 54 | 55 | def test_databricks_cluster_create_client(set_spark_local_ip, dask_cluster): 56 | cluster = DatabricksCluster() 57 | client = Client(cluster) 58 | assert isinstance(client, Client) 59 | assert client.submit(sum, (10, 1)).result() == 11 60 | 61 | 62 | def test_get_client(set_spark_local_ip, dask_cluster): 63 | client = get_client() 64 | assert isinstance(client, Client) 65 | assert isinstance(client.cluster, DatabricksCluster) 66 | assert client.submit(sum, (10, 1)).result() == 11 67 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "dask-databricks" 7 | dynamic = ["version"] 8 | description = '' 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | license = {text = "BSD-3-Clause"} 12 | keywords = [] 13 | authors = [ 14 | { name = "Jacob Tomlinson", email = "jacob@tomlinson.email" }, 15 | ] 16 | classifiers = [ 17 | "Development Status :: 4 - Beta", 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: Implementation :: CPython", 23 | "Programming Language :: Python :: Implementation :: PyPy", 24 | ] 25 | dependencies = [ 26 | "bokeh<=3.2.2", # Necessary until https://github.com/dask/distributed/issues/8333 is resolved 27 | "click>=8.1", 28 | "dask", 29 | "rich", 30 | "distributed", 31 | ] 32 | 33 | [project.urls] 34 | Documentation = "https://github.com/dask-contrib/dask-databricks#readme" 35 | Issues = "https://github.com/dask-contrib/dask-databricks/issues" 36 | Source = "https://github.com/dask-contrib/dask-databricks" 37 | 38 | [project.entry-points.dask_cli] 39 | databricks = "dask_databricks.cli:main" 40 | 41 | [tool] 42 | rye = { dev-dependencies = [ 43 | "ipy>=1.1", 44 | "pytest>=7.4.3", 45 | ] } 46 | 47 | [tool.hatch.metadata] 48 | allow-direct-references = true 49 | 50 | [tool.hatch.version] 51 | source = "vcs" 52 | 53 | [tool.hatch.build.hooks.vcs] 54 | version-file = "dask_databricks/_version.py" 55 | 56 | [tool.hatch.envs.default] 57 | dependencies = [ 58 | "coverage[toml]>=6.5", 59 | "pytest", 60 | ] 61 | [tool.hatch.envs.default.scripts] 62 | test = "pytest {args:tests}" 63 | test-cov = "coverage run -m pytest {args:tests}" 64 | cov-report = [ 65 | "- coverage combine", 66 | "coverage report", 67 | ] 68 | cov = [ 69 | "test-cov", 70 | "cov-report", 71 | ] 72 | 73 | [[tool.hatch.envs.all.matrix]] 74 | python = ["3.7", "3.8", "3.9", "3.10", "3.11"] 75 | 76 | [tool.hatch.envs.lint] 77 | detached = true 78 | dependencies = [ 79 | "black>=23.1.0", 80 | "mypy>=1.0.0", 81 | "ruff>=0.0.243", 82 | ] 83 | [tool.hatch.envs.lint.scripts] 84 | typing = "mypy --install-types --non-interactive {args:dask_databricks}" 85 | style = [ 86 | "ruff {args:.}", 87 | "black --check --diff {args:.}", 88 | ] 89 | fmt = [ 90 | "black {args:.}", 91 | "ruff --fix {args:.}", 92 | "style", 93 | ] 94 | all = [ 95 | "style", 96 | "typing", 97 | ] 98 | 99 | [tool.hatch.envs.test] 100 | dependencies = [ 101 | "pytest>=7.2.2", 102 | "pytest-timeout>=2.1.0", 103 | ] 104 | 105 | [tool.hatch.envs.test.scripts] 106 | run = "pytest" 107 | 108 | [tool.black] 109 | target-version = ["py37"] 110 | line-length = 120 111 | skip-string-normalization = true 112 | 113 | [tool.ruff] 114 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. 115 | select = ["E", "F", "I"] 116 | ignore = [] 117 | 118 | # Allow autofix for all enabled rules (when `--fix`) is provided. 119 | fixable = ["I"] 120 | # unfixable = [] 121 | 122 | # Exclude a variety of commonly ignored directories. 123 | exclude = [ 124 | ".bzr", 125 | ".direnv", 126 | ".eggs", 127 | ".git", 128 | ".hg", 129 | ".mypy_cache", 130 | ".nox", 131 | ".pants.d", 132 | ".pytype", 133 | ".ruff_cache", 134 | ".svn", 135 | ".tox", 136 | ".venv", 137 | "__pypackages__", 138 | "_build", 139 | "buck-out", 140 | "build", 141 | "dist", 142 | "node_modules", 143 | "venv", 144 | ] 145 | 146 | line-length = 120 147 | 148 | # Allow unused variables when underscore-prefixed. 149 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 150 | 151 | # Assume Python 3.10. 152 | target-version = "py310" 153 | 154 | [tool.ruff.isort] 155 | known-first-party = ["dask_databricks"] 156 | 157 | [tool.ruff.flake8-tidy-imports] 158 | ban-relative-imports = "all" 159 | 160 | [tool.ruff.per-file-ignores] 161 | # Tests can use magic values, assertions, and relative imports 162 | "tests/**/*" = ["PLR2004", "S101", "TID252"] 163 | 164 | [tool.coverage.run] 165 | source_pkgs = ["dask_databricks", "tests"] 166 | branch = true 167 | parallel = true 168 | omit = [ 169 | "dask_databricks/__about__.py", 170 | ] 171 | 172 | [tool.coverage.paths] 173 | dask_databricks = ["dask_databricks", "*/dask-databricks/dask_databricks"] 174 | tests = ["tests", "*/dask-databricks/tests"] 175 | 176 | [tool.coverage.report] 177 | exclude_lines = [ 178 | "no cov", 179 | "if __name__ == .__main__.:", 180 | "if TYPE_CHECKING:", 181 | ] 182 | --------------------------------------------------------------------------------