├── .github
    └── workflows
    │   ├── release.yaml
    │   └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── dask_databricks
    ├── __init__.py
    ├── cli.py
    ├── databrickscluster.py
    └── tests
    │   └── test_databricks.py
└── pyproject.toml


/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - "*.*.*"
 6 | jobs:
 7 |   publish:
 8 |     runs-on: ubuntu-latest
 9 |     environment:
10 |       name: pypi
11 |       url: https://pypi.org/p/dask-databricks
12 |     permissions:
13 |       id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |         with:
17 |           fetch-depth: 0
18 |       - name: Build package
19 |         run: pipx install hatch && hatch build
20 |       - name: Publish
21 |         uses: pypa/gh-action-pypi-publish@release/v1
22 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: "Test"
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 | 
 6 | concurrency:
 7 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     timeout-minutes: 45
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: ["3.9", "3.10", "3.11"]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - uses: actions/setup-python@v2
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install hatch
25 |         run: pipx install hatch
26 |       - name: Run tests
27 |         run: hatch run test:run
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | _version.py
163 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # NOTE: autoupdate does not pick up flake8-bugbear since it is a transitive
 2 | #  dependency. Make sure to update flake8-bugbear manually on a regular basis.
 3 | repos:
 4 |   - repo: https://github.com/psf/black
 5 |     rev: 23.11.0
 6 |     hooks:
 7 |       - id: black
 8 |         language_version: python3
 9 |         exclude: versioneer.py
10 |         args:
11 |           - --target-version=py39
12 |   - repo: https://github.com/astral-sh/ruff-pre-commit
13 |     # Ruff version.
14 |     rev: "v0.1.5"
15 |     hooks:
16 |       - id: ruff
17 |         language_version: python3
18 |         args: [--fix, --exit-non-zero-on-fix]
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, Dask Developers
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dask-databricks
 2 | 
 3 | Cluster tools for running Dask on Databricks multi-node clusters.
 4 | 
 5 | 
 6 | ## Quickstart
 7 | 
 8 | To launch a Dask cluster on Databricks you need to create an [init script](https://docs.databricks.com/en/init-scripts/index.html) with the following contents and configure your multi-node cluster to use it.
 9 | 
10 | ```bash
11 | #!/bin/bash
12 | 
13 | # Install Dask + Dask Databricks
14 | /databricks/python/bin/pip install --upgrade dask[complete] dask-databricks
15 | 
16 | # Start Dask cluster components
17 | dask databricks run
18 | ```
19 | 
20 | Then from your Databricks Notebook you can quickly connect a Dask `Client` to the scheduler running on the Spark Driver Node.
21 | 
22 | ```python
23 | import dask_databricks
24 | 
25 | client = dask_databricks.get_client()
26 | ```
27 | 
28 | Now you can submit work from your notebook to the multi-node Dask cluster.
29 | 
30 | ```python
31 | def inc(x):
32 |     return x + 1
33 | 
34 | x = client.submit(inc, 10)
35 | x.result()
36 | ```
37 | 
38 | ### Dashboard
39 | 
40 | You can access the [Dask dashboard](https://docs.dask.org/en/latest/dashboard.html) via the Databricks driver-node proxy. The link can be found in `Client` or `DatabricksCluster` repr or via `client.dashboard_link`.
41 | 
42 | ```python
43 | >>> print(client.dashboard_link)
44 | https://dbc-dp-xxxx.cloud.databricks.com/driver-proxy/o/xxxx/xx-xxx-xxxx/8087/status
45 | ```
46 | 
47 | ![](https://user-images.githubusercontent.com/1610850/281442274-450d41c6-2eb6-42a1-8de6-c4a1a1b84193.png)
48 | 
49 | ![](https://user-images.githubusercontent.com/1610850/281441285-9b84d5f1-d58a-45dc-9354-7385e1599d1f.png)
50 | 
51 | ## Releasing
52 | 
53 | Releases of this project are automated using [GitHub Actions and the `pypa/gh-action-pypi-publish` action](https://github.com/dask-contrib/dask-databricks/blob/main/.github/workflows/release.yaml).
54 | 
55 | To create a new release push a tag to the upstream repo in the format `x.x.x`. The package will be built and pushed to PyPI automatically and then later picked up by conda-forge.
56 | 
57 | ```bash
58 | # Make sure you have an upstream remote
59 | git remote add upstream git@github.com:dask-contrib/dask-databricks.git
60 | 
61 | # Create a tag and push it upstream
62 | git tag x.x.x && git push upstream main --tags
63 | ```
64 | 


--------------------------------------------------------------------------------
/dask_databricks/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2023-present Dask Developers
 2 | #
 3 | # SPDX-License-Identifier: BSD-3
 4 | 
 5 | from .databrickscluster import DatabricksCluster, get_client  # noqa
 6 | 
 7 | # Define the variable '__version__':
 8 | try:
 9 |     # If setuptools_scm is installed (e.g. in a development environment with
10 |     # an editable install), then use it to determine the version dynamically.
11 |     from setuptools_scm import get_version
12 | 
13 |     # This will fail with LookupError if the package is not installed in
14 |     # editable mode or if Git is not installed.
15 |     __version__ = get_version(root="..", relative_to=__file__)
16 | except (ImportError, LookupError):
17 |     # As a fallback, use the version that is hard-coded in the file.
18 |     try:
19 |         from dask_databricks._version import __version__  # noqa: F401
20 |     except ModuleNotFoundError:
21 |         # The user is probably trying to run this without having installed
22 |         # the package, so complain.
23 |         raise RuntimeError("dask-databricks is not correctly installed. " "Please install it with pip.")
24 | 


--------------------------------------------------------------------------------
/dask_databricks/cli.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import socket
  5 | import subprocess
  6 | import sys
  7 | import time
  8 | 
  9 | import click
 10 | from rich.logging import RichHandler
 11 | 
 12 | 
 13 | def get_logger():
 14 |     logging.basicConfig(level="INFO", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()])
 15 |     return logging.getLogger("dask_databricks")
 16 | 
 17 | 
 18 | @click.group(name="databricks")
 19 | def main():
 20 |     """Tools to launch Dask on Databricks."""
 21 | 
 22 | 
 23 | @main.command()
 24 | @click.option('--worker-command', help='Custom worker command')
 25 | @click.option('--worker-args', help='Additional worker arguments')
 26 | @click.option(
 27 |     "--cuda",
 28 |     is_flag=True,
 29 |     show_default=True,
 30 |     default=False,
 31 |     help="Use `dask cuda worker` from the dask-cuda package when starting workers.",
 32 | )
 33 | def run(worker_command, worker_args, cuda):
 34 |     """Run Dask processes on a Databricks cluster."""
 35 |     log = get_logger()
 36 | 
 37 |     log.info("Setting up Dask on a Databricks cluster.")
 38 | 
 39 |     DB_IS_DRIVER = os.getenv("DB_IS_DRIVER")
 40 |     DB_DRIVER_IP = os.getenv("DB_DRIVER_IP")
 41 | 
 42 |     if DB_DRIVER_IP is None or DB_IS_DRIVER is None:
 43 |         log.error(
 44 |             "Unable to find expected environment variables DB_IS_DRIVER and DB_DRIVER_IP. "
 45 |             "Are you running this command on a Databricks multi-node cluster?"
 46 |         )
 47 |         sys.exit(1)
 48 | 
 49 |     if DB_IS_DRIVER == "TRUE":
 50 |         log.info("This node is the Dask scheduler.")
 51 |         scheduler_process = subprocess.Popen(["dask", "scheduler", "--dashboard-address", ":8087"])
 52 |         time.sleep(5)  # give the scheduler time to start
 53 |         if scheduler_process.poll() is not None:
 54 |             log.error("Scheduler process has exited prematurely.")
 55 |             sys.exit(1)
 56 |     else:
 57 |         # Specify the same port for all workers
 58 |         worker_port = 8786
 59 |         log.info("This node is a Dask worker.")
 60 |         log.info(f"Connecting to Dask scheduler at {DB_DRIVER_IP}:{worker_port}")
 61 |         while True:
 62 |             try:
 63 |                 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 64 |                 sock.connect((DB_DRIVER_IP, worker_port))
 65 |                 sock.close()
 66 |                 break
 67 |             except ConnectionRefusedError:
 68 |                 log.info("Scheduler not available yet. Waiting...")
 69 |                 time.sleep(1)
 70 | 
 71 |         # Construct the worker command
 72 |         if worker_command:
 73 |             worker_command = worker_command.split()
 74 |         elif cuda:
 75 |             worker_command = ["dask", "cuda", "worker"]
 76 |         else:
 77 |             worker_command = ["dask", "worker"]
 78 | 
 79 |         if worker_args:
 80 |             try:
 81 |                 # Try to decode the JSON-encoded worker_args
 82 |                 worker_args_list = json.loads(worker_args)
 83 |                 if not isinstance(worker_args_list, list):
 84 |                     raise ValueError("The JSON-encoded worker_args must be a list.")
 85 |             except json.JSONDecodeError:
 86 |                 # If decoding as JSON fails, split worker_args by spaces
 87 |                 worker_args_list = worker_args.split()
 88 | 
 89 |             worker_command.extend(worker_args_list)
 90 |         worker_command.append(f"tcp://{DB_DRIVER_IP}:{worker_port}")
 91 | 
 92 |         worker_process = subprocess.Popen(worker_command)
 93 |         time.sleep(5)  # give the worker time to start
 94 |         if worker_process.poll() is not None:
 95 |             log.error("Worker process has exited prematurely.")
 96 |             sys.exit(1)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/dask_databricks/databrickscluster.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import uuid
 3 | from typing import Optional
 4 | 
 5 | from distributed.core import rpc
 6 | from distributed.deploy.cluster import Cluster
 7 | from tornado.ioloop import IOLoop
 8 | 
 9 | # Databricks Notebooks injects the `spark` session variable but we need to create it ourselves
10 | try:
11 |     from pyspark.sql import SparkSession
12 | 
13 |     spark = SparkSession.getActiveSession()
14 | except ImportError:
15 |     spark = None
16 | 
17 | 
18 | class DatabricksCluster(Cluster):
19 |     """Connect to a Dask cluster deployed via databricks."""
20 | 
21 |     def __init__(
22 |         self,
23 |         loop: Optional[IOLoop] = None,
24 |         asynchronous: bool = False,
25 |     ):
26 |         self.spark_local_ip = os.environ.get("SPARK_LOCAL_IP")
27 |         if self.spark_local_ip is None:
28 |             raise KeyError(
29 |                 "Unable to find expected environment variable SPARK_LOCAL_IP. "
30 |                 "Are you running this on a Databricks driver node?"
31 |             )
32 |         if os.environ.get("MASTER") and "local[" in os.environ.get("MASTER"):
33 |             raise EnvironmentError(
34 |                 "You appear to be trying to run a multi-node Dask cluster on a "
35 |                 "single-node databricks cluster. Maybe you want "
36 |                 "`dask.distributed.LocalCluster().get_client()` instead"
37 | 
38 |             )
39 |         try:
40 |             name = spark.conf.get("spark.databricks.clusterUsageTags.clusterId")
41 |         except AttributeError:
42 |             name = "unknown-databricks-" + uuid.uuid4().hex[:10]
43 |         super().__init__(name=name, loop=loop, asynchronous=asynchronous)
44 | 
45 |         if not self.called_from_running_loop:
46 |             self._loop_runner.start()
47 |             self.sync(self._start)
48 | 
49 |     async def _start(self):
50 |         self.scheduler_comm = rpc(f"{self.spark_local_ip}:8786")
51 |         await super()._start()
52 | 
53 |     @property
54 |     def dashboard_link(self):
55 |         cluster_id = spark.conf.get("spark.databricks.clusterUsageTags.clusterId")
56 |         org_id = spark.conf.get("spark.databricks.clusterUsageTags.orgId")
57 |         workspace_url = spark.conf.get("spark.databricks.workspaceUrl")
58 |         return f"https://{workspace_url}/driver-proxy/o/{org_id}/{cluster_id}/8087/status"
59 | 
60 | 
61 | def get_client():
62 |     """Get a Dask client connected to a Databricks cluster."""
63 |     return DatabricksCluster().get_client()
64 | 


--------------------------------------------------------------------------------
/dask_databricks/tests/test_databricks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from dask.distributed import Client
 5 | from distributed.deploy import Cluster, LocalCluster
 6 | 
 7 | from dask_databricks import DatabricksCluster, get_client
 8 | 
 9 | 
10 | @pytest.fixture(scope="session")
11 | def dask_cluster():
12 |     """Start a LocalCluster to simulate the cluster that would be started on Databricks."""
13 |     return LocalCluster(scheduler_port=8786)
14 | 
15 | 
16 | @pytest.fixture
17 | def remove_spark_local_ip():
18 |     original_spark_local_ip = os.getenv("SPARK_LOCAL_IP")
19 |     if original_spark_local_ip:
20 |         del os.environ["SPARK_LOCAL_IP"]
21 |     yield None
22 |     if original_spark_local_ip:
23 |         os.environ["SPARK_LOCAL_IP"] = original_spark_local_ip
24 | 
25 | 
26 | @pytest.fixture
27 | def set_spark_local_ip():
28 |     original_spark_local_ip = os.getenv("SPARK_LOCAL_IP")
29 |     os.environ["SPARK_LOCAL_IP"] = "127.0.0.1"
30 |     yield None
31 |     if original_spark_local_ip:
32 |         os.environ["SPARK_LOCAL_IP"] = original_spark_local_ip
33 |     else:
34 |         del os.environ["SPARK_LOCAL_IP"]
35 | 
36 | 
37 | def test_databricks_cluster_raises_key_error_when_initialised_outside_of_databricks(remove_spark_local_ip):
38 |     with pytest.raises(KeyError):
39 |         DatabricksCluster()
40 | 
41 | def test_databricks_cluster_raises_environment_error_when_master_variable_implies_single_node(
42 |     monkeypatch,
43 |     set_spark_local_ip,
44 |     dask_cluster,
45 | ):
46 |     monkeypatch.setenv("MASTER", "local[8]")
47 |     with pytest.raises(EnvironmentError):
48 |         DatabricksCluster()
49 | 
50 | def test_databricks_cluster_create(set_spark_local_ip, dask_cluster):
51 |     cluster = DatabricksCluster()
52 |     assert isinstance(cluster, Cluster)
53 | 
54 | 
55 | def test_databricks_cluster_create_client(set_spark_local_ip, dask_cluster):
56 |     cluster = DatabricksCluster()
57 |     client = Client(cluster)
58 |     assert isinstance(client, Client)
59 |     assert client.submit(sum, (10, 1)).result() == 11
60 | 
61 | 
62 | def test_get_client(set_spark_local_ip, dask_cluster):
63 |     client = get_client()
64 |     assert isinstance(client, Client)
65 |     assert isinstance(client.cluster, DatabricksCluster)
66 |     assert client.submit(sum, (10, 1)).result() == 11
67 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling", "hatch-vcs"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "dask-databricks"
  7 | dynamic = ["version"]
  8 | description = ''
  9 | readme = "README.md"
 10 | requires-python = ">=3.9"
 11 | license = {text = "BSD-3-Clause"}
 12 | keywords = []
 13 | authors = [
 14 |   { name = "Jacob Tomlinson", email = "jacob@tomlinson.email" },
 15 | ]
 16 | classifiers = [
 17 |   "Development Status :: 4 - Beta",
 18 |   "Programming Language :: Python",
 19 |   "Programming Language :: Python :: 3.9",
 20 |   "Programming Language :: Python :: 3.10",
 21 |   "Programming Language :: Python :: 3.11",
 22 |   "Programming Language :: Python :: Implementation :: CPython",
 23 |   "Programming Language :: Python :: Implementation :: PyPy",
 24 | ]
 25 | dependencies = [
 26 |   "bokeh<=3.2.2",  # Necessary until https://github.com/dask/distributed/issues/8333 is resolved
 27 |   "click>=8.1",
 28 |   "dask",
 29 |   "rich",
 30 |   "distributed",
 31 | ]
 32 | 
 33 | [project.urls]
 34 | Documentation = "https://github.com/dask-contrib/dask-databricks#readme"
 35 | Issues = "https://github.com/dask-contrib/dask-databricks/issues"
 36 | Source = "https://github.com/dask-contrib/dask-databricks"
 37 | 
 38 | [project.entry-points.dask_cli]
 39 | databricks = "dask_databricks.cli:main"
 40 | 
 41 | [tool]
 42 | rye = { dev-dependencies = [
 43 |     "ipy>=1.1",
 44 |     "pytest>=7.4.3",
 45 | ] }
 46 | 
 47 | [tool.hatch.metadata]
 48 | allow-direct-references = true
 49 | 
 50 | [tool.hatch.version]
 51 | source = "vcs"
 52 | 
 53 | [tool.hatch.build.hooks.vcs]
 54 | version-file = "dask_databricks/_version.py"
 55 | 
 56 | [tool.hatch.envs.default]
 57 | dependencies = [
 58 |   "coverage[toml]>=6.5",
 59 |   "pytest",
 60 | ]
 61 | [tool.hatch.envs.default.scripts]
 62 | test = "pytest {args:tests}"
 63 | test-cov = "coverage run -m pytest {args:tests}"
 64 | cov-report = [
 65 |   "- coverage combine",
 66 |   "coverage report",
 67 | ]
 68 | cov = [
 69 |   "test-cov",
 70 |   "cov-report",
 71 | ]
 72 | 
 73 | [[tool.hatch.envs.all.matrix]]
 74 | python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
 75 | 
 76 | [tool.hatch.envs.lint]
 77 | detached = true
 78 | dependencies = [
 79 |   "black>=23.1.0",
 80 |   "mypy>=1.0.0",
 81 |   "ruff>=0.0.243",
 82 | ]
 83 | [tool.hatch.envs.lint.scripts]
 84 | typing = "mypy --install-types --non-interactive {args:dask_databricks}"
 85 | style = [
 86 |   "ruff {args:.}",
 87 |   "black --check --diff {args:.}",
 88 | ]
 89 | fmt = [
 90 |   "black {args:.}",
 91 |   "ruff --fix {args:.}",
 92 |   "style",
 93 | ]
 94 | all = [
 95 |   "style",
 96 |   "typing",
 97 | ]
 98 | 
 99 | [tool.hatch.envs.test]
100 | dependencies = [
101 |     "pytest>=7.2.2",
102 |     "pytest-timeout>=2.1.0",
103 | ]
104 | 
105 | [tool.hatch.envs.test.scripts]
106 | run = "pytest"
107 | 
108 | [tool.black]
109 | target-version = ["py37"]
110 | line-length = 120
111 | skip-string-normalization = true
112 | 
113 | [tool.ruff]
114 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
115 | select = ["E", "F", "I"]
116 | ignore = []
117 | 
118 | # Allow autofix for all enabled rules (when `--fix`) is provided.
119 | fixable = ["I"]
120 | # unfixable = []
121 | 
122 | # Exclude a variety of commonly ignored directories.
123 | exclude = [
124 |     ".bzr",
125 |     ".direnv",
126 |     ".eggs",
127 |     ".git",
128 |     ".hg",
129 |     ".mypy_cache",
130 |     ".nox",
131 |     ".pants.d",
132 |     ".pytype",
133 |     ".ruff_cache",
134 |     ".svn",
135 |     ".tox",
136 |     ".venv",
137 |     "__pypackages__",
138 |     "_build",
139 |     "buck-out",
140 |     "build",
141 |     "dist",
142 |     "node_modules",
143 |     "venv",
144 | ]
145 | 
146 | line-length = 120
147 | 
148 | # Allow unused variables when underscore-prefixed.
149 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
150 | 
151 | # Assume Python 3.10.
152 | target-version = "py310"
153 | 
154 | [tool.ruff.isort]
155 | known-first-party = ["dask_databricks"]
156 | 
157 | [tool.ruff.flake8-tidy-imports]
158 | ban-relative-imports = "all"
159 | 
160 | [tool.ruff.per-file-ignores]
161 | # Tests can use magic values, assertions, and relative imports
162 | "tests/**/*" = ["PLR2004", "S101", "TID252"]
163 | 
164 | [tool.coverage.run]
165 | source_pkgs = ["dask_databricks", "tests"]
166 | branch = true
167 | parallel = true
168 | omit = [
169 |   "dask_databricks/__about__.py",
170 | ]
171 | 
172 | [tool.coverage.paths]
173 | dask_databricks = ["dask_databricks", "*/dask-databricks/dask_databricks"]
174 | tests = ["tests", "*/dask-databricks/tests"]
175 | 
176 | [tool.coverage.report]
177 | exclude_lines = [
178 |   "no cov",
179 |   "if __name__ == .__main__.:",
180 |   "if TYPE_CHECKING:",
181 | ]
182 | 


--------------------------------------------------------------------------------