├── .flake8 ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── build-publish-docs.yaml │ ├── build-publish-helm-chart.yaml │ ├── build-publish-python-packages.yaml │ ├── refreeze-dockerfile-requirements-txt.yaml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.rst ├── RELEASE.md ├── continuous_integration ├── docker │ ├── README.md │ ├── base │ │ ├── Dockerfile │ │ └── files │ │ │ └── etc │ │ │ └── sudoers.d │ │ │ └── preserve_path │ ├── hadoop │ │ ├── Dockerfile │ │ ├── _install.sh │ │ ├── _print_logs.sh │ │ ├── _test.sh │ │ ├── files │ │ │ ├── etc │ │ │ │ ├── hadoop │ │ │ │ │ ├── conf.kerberos │ │ │ │ │ │ ├── capacity-scheduler.xml │ │ │ │ │ │ ├── container-executor.cfg │ │ │ │ │ │ ├── core-site.xml │ │ │ │ │ │ ├── hdfs-site.xml │ │ │ │ │ │ └── yarn-site.xml │ │ │ │ │ └── conf.simple │ │ │ │ │ │ ├── core-site.xml │ │ │ │ │ │ └── hdfs-site.xml │ │ │ │ ├── krb5.conf │ │ │ │ ├── supervisord.conf │ │ │ │ └── supervisord.d │ │ │ │ │ ├── hdfs-datanode.conf │ │ │ │ │ ├── hdfs-namenode.conf │ │ │ │ │ ├── kerberos.conf │ │ │ │ │ ├── yarn-nodemanager.conf │ │ │ │ │ └── yarn-resourcemanager.conf │ │ │ ├── scripts │ │ │ │ ├── init-hdfs.sh │ │ │ │ ├── setup-hadoop.sh │ │ │ │ └── setup-kerb.sh │ │ │ └── var │ │ │ │ └── kerberos │ │ │ │ └── krb5kdc │ │ │ │ ├── kadm5.acl │ │ │ │ └── kdc.conf │ │ ├── install.sh │ │ ├── print_logs.sh │ │ ├── start.sh │ │ └── test.sh │ ├── pbs │ │ ├── Dockerfile │ │ ├── _install.sh │ │ ├── _test.sh │ │ ├── files │ │ │ ├── etc │ │ │ │ └── sudoers.d │ │ │ │ │ └── dask │ │ │ └── scripts │ │ │ │ └── start.sh │ │ ├── install.sh │ │ ├── print_logs.sh │ │ ├── start.sh │ │ └── test.sh │ └── slurm │ │ ├── Dockerfile │ │ ├── _install.sh │ │ ├── _print_logs.sh │ │ ├── _test.sh │ │ ├── files │ │ ├── etc │ │ │ ├── slurm │ │ │ │ ├── cgroup.conf │ │ │ │ ├── slurm.conf │ │ │ │ └── slurmdbd.conf │ │ │ ├── sudoers.d │ │ │ │ └── dask │ │ │ ├── supervisord.conf │ │ │ └── supervisord.d │ │ │ │ └── slurm.conf │ │ └── scripts │ │ │ └── init-mysql.sh │ │ ├── install.sh │ │ ├── print_logs.sh │ │ ├── start.sh │ │ └── test.sh └── kubernetes │ ├── build-publish-helm-chart.sh │ └── k3d-create.sh ├── dask-gateway-server ├── .dockerignore ├── .gitignore ├── Dockerfile ├── Dockerfile.requirements.in ├── Dockerfile.requirements.txt ├── LICENSE ├── README.rst ├── dask-gateway-proxy │ ├── .gitignore │ ├── README.md │ ├── cmd │ │ └── dask-gateway-proxy │ │ │ └── main.go │ ├── go.mod │ ├── go.sum │ ├── internal │ │ └── logging │ │ │ └── logging.go │ └── pkg │ │ ├── router │ │ ├── router.go │ │ └── router_test.go │ │ └── sni │ │ └── sni.go ├── dask_gateway_server │ ├── __init__.py │ ├── __main__.py │ ├── _version.py │ ├── app.py │ ├── auth.py │ ├── backends │ │ ├── __init__.py │ │ ├── base.py │ │ ├── db_base.py │ │ ├── inprocess.py │ │ ├── jobqueue │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── launcher.py │ │ │ ├── pbs.py │ │ │ └── slurm.py │ │ ├── kubernetes │ │ │ ├── __init__.py │ │ │ ├── backend.py │ │ │ ├── controller.py │ │ │ └── utils.py │ │ ├── local.py │ │ └── yarn.py │ ├── models.py │ ├── options.py │ ├── proxy │ │ ├── __init__.py │ │ └── core.py │ ├── routes.py │ ├── tls.py │ ├── traitlets.py │ ├── utils.py │ └── workqueue.py ├── hatch_build.py └── pyproject.toml ├── dask-gateway ├── .dockerignore ├── .gitignore ├── Dockerfile ├── Dockerfile.requirements.in ├── Dockerfile.requirements.txt ├── LICENSE ├── README.rst ├── dask_gateway │ ├── __init__.py │ ├── _version.py │ ├── auth.py │ ├── client.py │ ├── comm.py │ ├── config.py │ ├── gateway.yaml │ ├── options.py │ ├── scheduler_preload.py │ └── utils.py └── pyproject.toml ├── dev-environment.yaml ├── docs ├── Makefile ├── requirements.txt └── source │ ├── _images │ ├── adapt-widget.png │ ├── architecture-k8s.svg │ ├── architecture.svg │ ├── options-widget.png │ └── scale-widget.png │ ├── api-client.rst │ ├── api-server.rst │ ├── authentication.rst │ ├── changelog.md │ ├── cluster-options.rst │ ├── conf.py │ ├── configuration-user.rst │ ├── develop.rst │ ├── index.rst │ ├── install-hadoop.rst │ ├── install-jobqueue.rst │ ├── install-kube.rst │ ├── install-local.rst │ ├── install-user.rst │ ├── resource-limits.rst │ ├── security.rst │ └── usage.rst ├── pyproject.toml ├── resources ├── README.rst └── helm │ ├── README.rst │ ├── chartpress.yaml │ ├── dask-gateway │ ├── .helmignore │ ├── Chart.yaml │ ├── crds │ │ ├── daskclusters.yaml │ │ └── traefik.yaml │ ├── extensions │ │ ├── README.rst │ │ └── gateway │ │ │ └── .gitkeep │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── controller │ │ │ ├── configmap.yaml │ │ │ ├── deployment.yaml │ │ │ └── rbac.yaml │ │ ├── gateway │ │ │ ├── configmap.yaml │ │ │ ├── deployment.yaml │ │ │ ├── ingressroute.yaml │ │ │ ├── middleware.yaml │ │ │ ├── rbac.yaml │ │ │ ├── secret.yaml │ │ │ └── service.yaml │ │ └── traefik │ │ │ ├── dashboard.yaml │ │ │ ├── deployment.yaml │ │ │ ├── rbac.yaml │ │ │ └── service.yaml │ ├── values.schema.yaml │ └── values.yaml │ ├── testing │ ├── chart-install-values.yaml │ └── skaffold.yaml │ └── tools │ ├── compare-values-schema-content.py │ ├── generate-json-schema.py │ └── validate-against-schema.py ├── skaffold.yaml └── tests ├── __init__.py ├── conftest.py ├── kubernetes ├── __init__.py ├── test_helm.py ├── test_integration.py └── test_methods.py ├── requirements.txt ├── test_auth.py ├── test_cli.py ├── test_client.py ├── test_db_backend.py ├── test_local_backend.py ├── test_options.py ├── test_pbs_backend.py ├── test_proxies.py ├── test_slurm_backend.py ├── test_traitlets.py ├── test_utils.py ├── test_workqueue.py ├── test_yarn_backend.py └── utils_test.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | __init__.py, 4 | _version.py 5 | ignore = 6 | # Import formatting 7 | E4, 8 | # Space before : 9 | E203, 10 | # Comparing types instead of isinstance 11 | E721, 12 | # Assign a lambda 13 | E731, 14 | # Ambiguous variable names 15 | E741, 16 | # Allow breaks before/after binary operators 17 | W503, 18 | W504 19 | 20 | # black is set to 88, but isn't a strict limit so we add some wiggle room for 21 | # flake8 testing. 22 | max-line-length = 100 23 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | dask-gateway/dask_gateway/_version.py export-subst 2 | dask-gateway-server/dask_gateway_server/_version.py export-subst 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # dependabot.yml reference: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates 2 | # 3 | # Notes: 4 | # - Status and logs from dependabot are provided at 5 | # https://github.com/dask/dask-gateway/network/updates. 6 | # - YAML anchors are not supported here or in GitHub Workflows. 7 | # 8 | version: 2 9 | updates: 10 | # Update actions in our workflows to their latest releases 11 | - package-ecosystem: github-actions 12 | directory: / 13 | schedule: 14 | interval: monthly 15 | time: "05:00" 16 | timezone: Etc/UTC 17 | labels: 18 | - ci 19 | -------------------------------------------------------------------------------- /.github/workflows/build-publish-docs.yaml: -------------------------------------------------------------------------------- 1 | # This is a GitHub workflow defining a set of jobs with a set of steps. 2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions 3 | # 4 | name: Build and publish documentation 5 | 6 | on: 7 | pull_request: 8 | paths: 9 | - "docs/**" 10 | - "dask-gateway/**" 11 | - "dask-gateway-server/**" 12 | - ".github/workflows/build-publish-docs.yaml" 13 | push: 14 | paths: 15 | - "docs/**" 16 | - "dask-gateway/**" 17 | - "dask-gateway-server/**" 18 | - ".github/workflows/build-publish-docs.yaml" 19 | branches: ["main"] 20 | tags: ["**"] 21 | workflow_dispatch: 22 | 23 | env: 24 | commit_msg: ${{ github.event.head_commit.message }} 25 | 26 | jobs: 27 | build-and-publish-docs: 28 | name: Build and publish documentation 29 | runs-on: ubuntu-24.04 30 | 31 | # permissions requested for secrets.github_token in order to push to the 32 | # gh-pages branch, available for push and workflow_dispatch triggers. 33 | permissions: 34 | contents: write 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | - uses: actions/setup-python@v5 39 | with: 40 | python-version: "3.11" 41 | # ref https://github.com/dask/dask-sphinx-theme/issues/68 42 | 43 | - name: Install Python docs requirements 44 | run: | 45 | DASK_GATEWAY_SERVER__NO_PROXY=true pip install -r docs/requirements.txt 46 | 47 | - name: Build docs (make html) 48 | run: | 49 | cd docs 50 | make html SPHINXOPTS='--color -W --keep-going' 51 | 52 | - name: Push built docs to gh-pages branch 53 | uses: JamesIves/github-pages-deploy-action@releases/v4 54 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 55 | with: 56 | branch: gh-pages 57 | folder: docs/_build/html/ 58 | 59 | linkcheck-docs: 60 | name: Test links in docs 61 | runs-on: ubuntu-24.04 62 | 63 | steps: 64 | - uses: actions/checkout@v4 65 | - uses: actions/setup-python@v5 66 | with: 67 | python-version: "3.11" 68 | # ref https://github.com/dask/dask-sphinx-theme/issues/68 69 | - name: Install Python docs requirements 70 | run: | 71 | DASK_GATEWAY_SERVER__NO_PROXY=true pip install -r docs/requirements.txt 72 | 73 | - name: Linkcheck docs (make linkcheck) 74 | run: | 75 | cd docs 76 | make linkcheck SPHINXOPTS='--color -W --keep-going' 77 | -------------------------------------------------------------------------------- /.github/workflows/build-publish-helm-chart.yaml: -------------------------------------------------------------------------------- 1 | # This is a GitHub workflow defining a set of jobs with a set of steps. 2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions 3 | # 4 | name: Build and publish Helm chart 5 | 6 | on: 7 | push: 8 | tags: ["**"] 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build-publish-helm-chart: 13 | name: "Build and publish Helm chart" 14 | runs-on: ubuntu-24.04 15 | 16 | # permissions requested for secrets.github_token in order to push to the 17 | # container registry, available for push and workflow_dispatch triggers. 18 | permissions: 19 | contents: read 20 | packages: write 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | with: 25 | # chartpress requires git history to set chart version and image tags 26 | # correctly 27 | fetch-depth: 0 28 | - uses: actions/setup-python@v5 29 | with: 30 | python-version: "3.13" 31 | 32 | - name: Install chart publishing dependencies (chartpress, pyyaml, helm) 33 | run: | 34 | pip install chartpress pyyaml 35 | pip list 36 | 37 | echo "Helm is already installed" 38 | helm version 39 | 40 | - name: Set up QEMU (for docker buildx) 41 | uses: docker/setup-qemu-action@v3 42 | 43 | - name: Set up Docker Buildx (for multi-arch builds) 44 | uses: docker/setup-buildx-action@v3 45 | 46 | - name: Login to container registry 47 | run: echo "${{ secrets.github_token }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin 48 | 49 | # chartpress pushes a packages Helm chart to dask/helm-chart's gh-pages 50 | # branch, so we need to have a git user.email and user.name configured 51 | - name: Configure a git user 52 | run: | 53 | git config --global user.email "github-actions@example.local" 54 | git config --global user.name "GitHub Actions user" 55 | 56 | - name: Generate values.schema.json from YAML equivalent 57 | run: resources/helm/tools/generate-json-schema.py 58 | 59 | - name: Build and publish Helm chart with chartpress 60 | env: 61 | # chartpress can make use of a personal access token by setting these 62 | # environment variables like this, for details see: 63 | # https://github.com/jupyterhub/chartpress/blob/d4e2346d50f0724f6bee387f4f8aebc108afb648/chartpress.py#L118-L128 64 | # 65 | GITHUB_ACTOR: "" 66 | GITHUB_TOKEN: "${{ secrets.dask_bot_token }}" 67 | # DOCKER_BUILDKIT is required for building images with --mount flags, 68 | # as used in dask-gateway/Dockerfile. 69 | DOCKER_BUILDKIT: "1" 70 | run: continuous_integration/kubernetes/build-publish-helm-chart.sh 71 | -------------------------------------------------------------------------------- /.github/workflows/refreeze-dockerfile-requirements-txt.yaml: -------------------------------------------------------------------------------- 1 | # This is a GitHub workflow defining a set of jobs with a set of steps. 2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions 3 | # 4 | name: Refreeze Dockerfile.requirements.txt 5 | 6 | on: 7 | push: 8 | paths: 9 | - "**/Dockerfile" 10 | - "**/Dockerfile.requirements.in" 11 | - "**/Dockerfile.requirements.txt" 12 | - ".github/workflows/refreeze-dockerfile-requirements-txt.yaml" 13 | branches: ["main"] 14 | workflow_dispatch: 15 | 16 | jobs: 17 | refreeze-dockerfile-requirements-txt: 18 | name: Refreeze Dockerfile.requirements.txt 19 | 20 | # Don't run this job on forks 21 | if: github.repository == 'dask/dask-gateway' 22 | runs-on: ubuntu-24.04 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | include: 28 | - image: dask-gateway 29 | - image: dask-gateway-server 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - name: Refreeze Dockerfile.requirements.txt based on Dockerfile.requirements.in 35 | run: | 36 | cd ${{ matrix.image }} 37 | docker run --rm \ 38 | --env=CUSTOM_COMPILE_COMMAND='Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml' \ 39 | --env=DASK_GATEWAY_SERVER__NO_PROXY=1 \ 40 | --volume=$PWD:/opt/${{ matrix.image }} \ 41 | --workdir=/opt/${{ matrix.image }} \ 42 | --user=root \ 43 | python:3.13-slim-bullseye \ 44 | sh -c 'pip install pip-tools==7.* && pip-compile --allow-unsafe --strip-extras --upgrade --output-file=Dockerfile.requirements.txt Dockerfile.requirements.in' 45 | 46 | - name: git diff 47 | run: git --no-pager diff --color=always 48 | 49 | # ref: https://github.com/peter-evans/create-pull-request 50 | - name: Create a PR 51 | uses: peter-evans/create-pull-request@v7 52 | with: 53 | token: "${{ secrets.dask_bot_token }}" 54 | author: Dask Bot Account <65357765+dask-bot@users.noreply.github.com> 55 | committer: Dask Bot Account <65357765+dask-bot@users.noreply.github.com> 56 | branch: update-image-${{ matrix.image }} 57 | labels: dependencies 58 | commit-message: Refreeze ${{ matrix.image}}/Dockerfile.requirements.txt 59 | title: Refreeze ${{ matrix.image}}/Dockerfile.requirements.txt 60 | body: >- 61 | ${{ matrix.image}}/Dockerfile.requirements.txt has been refrozen 62 | based on ${{ matrix.image}}/Dockerfile.requirements.in. 63 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # pre-commit is a tool to perform a predefined set of tasks manually and/or 2 | # automatically before git commits are made. 3 | # 4 | # Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level 5 | # 6 | # Common tasks 7 | # 8 | # - Run on all files: pre-commit run --all-files 9 | # - Register git hooks: pre-commit install --install-hooks 10 | # 11 | # About pre-commit.ci 12 | # 13 | # pre-commit.ci is a service that is enabled for this repo via 14 | # https://github.com/organizations/dask/settings/installations to do the 15 | # following: 16 | # 17 | # 1. Automatically keep the pinned versions in this file updated by opening PRs. 18 | # 2. Automatically run a pre-commit test like a GitHub workflow also could do. 19 | # 3. Automatically add a commit with autoformatting changes to PRs if they have 20 | # forgot to run configured autoformatters. 21 | # 22 | repos: 23 | # Autoformat: Python code, syntax patterns are modernized 24 | - repo: https://github.com/asottile/pyupgrade 25 | rev: v3.19.1 26 | hooks: 27 | - id: pyupgrade 28 | args: 29 | - --py310-plus 30 | 31 | # Autoformat: Python code 32 | - repo: https://github.com/PyCQA/isort 33 | rev: "6.0.1" 34 | hooks: 35 | - id: isort 36 | 37 | # Autoformat: Python code 38 | - repo: https://github.com/psf/black 39 | rev: "25.1.0" 40 | hooks: 41 | - id: black 42 | 43 | # Autoformat: general small fixes 44 | - repo: https://github.com/pre-commit/pre-commit-hooks 45 | rev: v5.0.0 46 | hooks: 47 | - id: end-of-file-fixer 48 | exclude_types: ["svg"] 49 | - id: trailing-whitespace 50 | 51 | # Lint: Python code 52 | - repo: https://github.com/PyCQA/flake8 53 | rev: "7.2.0" 54 | hooks: 55 | - id: flake8 56 | 57 | # pre-commit.ci config reference: https://pre-commit.ci/#configuration 58 | ci: 59 | autoupdate_schedule: monthly 60 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Dask is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more. 2 | 3 | Please see https://gateway.dask.org/develop.html for more information. 4 | 5 | Also for general information on how to contribute to Dask projects see https://docs.dask.org/en/latest/develop.html. 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Jim Crist-Harif 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | dask-gateway 2 | ============ 3 | 4 | |github-actions-tests| |github-actions-docs| |pypi-dask-gateway| |pypi-dask-gateway-server| |conda-dask-gateway| |conda-dask-gateway-server| 5 | 6 | A multi-tenant server for securely deploying and managing Dask clusters. See 7 | `the documentation `__ for more information. 8 | 9 | LICENSE 10 | ------- 11 | 12 | New BSD. See the `License File 13 | `_. 14 | 15 | .. |github-actions-tests| image:: https://github.com/dask/dask-gateway/actions/workflows/test.yaml/badge.svg 16 | :target: https://github.com/dask/dask-gateway/actions/workflows/test.yaml 17 | .. |github-actions-docs| image:: https://github.com/dask/dask-gateway/actions/workflows/build-publish-docs.yaml/badge.svg 18 | :target: https://gateway.dask.org/ 19 | .. |pypi-dask-gateway| image:: https://img.shields.io/pypi/v/dask-gateway.svg?label=dask-gateway 20 | :target: https://pypi.org/project/dask-gateway/ 21 | .. |pypi-dask-gateway-server| image:: https://img.shields.io/pypi/v/dask-gateway-server.svg?label=dask-gateway-server 22 | :target: https://pypi.org/project/dask-gateway-server/ 23 | .. |conda-dask-gateway| image:: https://img.shields.io/conda/v/conda-forge/dask-gateway?color=blue&label=dask-gateway 24 | :target: https://anaconda.org/conda-forge/dask-gateway 25 | .. |conda-dask-gateway-server| image:: https://img.shields.io/conda/v/conda-forge/dask-gateway-server?color=blue&label=dask-gateway-server 26 | :target: https://anaconda.org/conda-forge/dask-gateway-server 27 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # How to make a release 2 | 3 | `dask-gateway` and `dask-gateway-server` are packages available on [PyPI] and 4 | [conda-forge], and `dask-gateway` is a Helm chart available at [helm.dask.org] 5 | which is both a user facing website and a Helm chart repository by having an 6 | [index.yaml] file read by `helm` the CLI linking to packaged Helm charts. 7 | 8 | These are instructions on how to make a release. 9 | 10 | ## Pre-requisites 11 | 12 | - Push rights to [dask/dask-gateway] 13 | - Push rights to [conda-forge/dask-gateway-feedstock] 14 | 15 | ## Steps to make a release 16 | 17 | 1. Refreeze Dockerfile.requirements.txt files by running the [refreeze workflow] 18 | and merging created PRs. 19 | 20 | [refreeze workflow]: https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml 21 | 22 | 1. Create a PR updating `docs/source/changelog.md` with [github-activity] and 23 | continue only when its merged. 24 | 25 | ```shell 26 | pip install github-activity 27 | 28 | github-activity --heading-level=2 dask/dask-gateway 29 | ``` 30 | 31 | - Visit and label all uncategorized PRs appropriately with: `maintenance`, 32 | `enhancement`, `new`, `breaking`, `bug`, or `documentation`. 33 | - Generate a list of PRs again and add it to the changelog 34 | - Highlight breaking changes 35 | - Summarize the release changes 36 | 37 | 2. Checkout main and make sure it is up to date. 38 | 39 | ```shell 40 | git checkout main 41 | git fetch origin main 42 | git reset --hard origin/main 43 | git clean -xfd 44 | ``` 45 | 46 | 3. Update the version, make commits, and push a git tag with `tbump`. 47 | 48 | ```shell 49 | pip install tbump 50 | tbump --dry-run ${VERSION} 51 | 52 | tbump ${VERSION} 53 | ``` 54 | 55 | Following this, the [CI system] will build and publish the PyPI packages and 56 | Helm chart. 57 | 58 | 4. Following the release to PyPI, an automated PR should arrive to 59 | [conda-forge/dask-gateway-feedstock] with instructions. 60 | 61 | [pypi]: https://pypi.org/project/dask-gateway/ 62 | [conda-forge]: https://anaconda.org/conda-forge/dask-gateway 63 | [helm.dask.org]: https://helm.dask.org/ 64 | [index.yaml]: https://helm.dask.org/index.yaml 65 | [dask/dask-gateway]: https://github.com/dask/dask-gateway 66 | [conda-forge/dask-gateway-feedstock]: https://github.com/conda-forge/dask-gateway-feedstock 67 | [github-activity]: https://github.com/executablebooks/github-activity 68 | [ci system]: https://github.com/dask/dask-gateway/actions 69 | -------------------------------------------------------------------------------- /continuous_integration/docker/base/Dockerfile: -------------------------------------------------------------------------------- 1 | # See continuous_integration/docker/README.md for details about this and other 2 | # Dockerfiles under the continuous_integration/docker folder on their purpose 3 | # and how to work with them. 4 | # 5 | # centos:8 reached end-of-life 31 Dec 2021 6 | # centos:7 reach end-of-life 30 Jun 2024 7 | # 8 | FROM centos:7 9 | 10 | ARG python_version="3.11" 11 | # go_version was 1.19 until it was updated to 1.23.6 (2025-02-07) by adding a 12 | # layer on top of the previous image, as it is no longer able to build and it 13 | # was an easy way to update the golang version. 14 | ARG go_version="1.23.6" 15 | 16 | # Set labels based on the Open Containers Initiative (OCI): 17 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys 18 | # 19 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway" 20 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/base/Dockerfile" 21 | 22 | # Configure yum to error on missing packages 23 | RUN echo "skip_missing_names_on_install=False" >> /etc/yum.conf 24 | 25 | # Install common yum packages 26 | RUN yum install -y \ 27 | sudo \ 28 | # sudo is used to run commands as various other users 29 | git \ 30 | # git is a requirement for golang to fetch dependencies during 31 | # compilation of the golang code we have in 32 | # dask-gateway-server/dask-gateway-proxy. 33 | && yum clean all \ 34 | && rm -rf /var/cache/yum 35 | 36 | # Install python and the following utilities: 37 | # 38 | # - tini: can wrap an container entrypoint to avoid misc issues, see 39 | # https://github.com/krallin/tini#readme 40 | # - psutil: provides misc tools of relevance for debugging, see 41 | # https://psutil.readthedocs.io/en/latest/#about 42 | # 43 | # NOTE: micromamba is a slimmed mamba/conda executable functioning without a 44 | # pre-installed Python environment we use to install a Python version of 45 | # choice to not first need to install a full Python environment to then 46 | # install another Python environment. 47 | # 48 | # See https://github.com/mamba-org/mamba#micromamba. 49 | # 50 | RUN yum install -y bzip2 \ 51 | \ 52 | && curl -sL https://micromamba.snakepit.net/api/micromamba/linux-64/latest \ 53 | | tar --extract --verbose --bzip2 bin/micromamba --strip-components=1 \ 54 | && ./micromamba install \ 55 | --channel=conda-forge \ 56 | --root-prefix="/opt/python" \ 57 | --prefix="/opt/python" \ 58 | python="${python_version}" \ 59 | mamba \ 60 | psutil \ 61 | tini \ 62 | && rm ./micromamba \ 63 | && /opt/python/bin/mamba clean -af \ 64 | && find /opt/python/ -type f -name '*.a' -delete \ 65 | && find /opt/python/ -type f -name '*.pyc' -delete \ 66 | \ 67 | && yum remove -y bzip2 \ 68 | && yum clean all \ 69 | && rm -rf /var/cache/yum 70 | 71 | # Install go 72 | RUN curl -sL https://go.dev/dl/go${go_version}.linux-amd64.tar.gz \ 73 | | tar --extract --verbose --gzip --directory=/opt/ 74 | 75 | # Put Python and Go environments on PATH 76 | # 77 | # NOTE: This PATH environment will be preserved if sudo is used to switch to 78 | # other users thanks to changes to /etc/sudoers.d/preserve_path. 79 | # 80 | ENV PATH=/opt/python/bin:/opt/go/bin:$PATH 81 | COPY ./files/etc /etc/ 82 | 83 | # Make a few user accounts and a user group for later use 84 | RUN useradd --create-home dask \ 85 | && useradd --create-home alice \ 86 | && useradd --create-home bob \ 87 | && groupadd dask_users \ 88 | && usermod --append --groups dask_users alice \ 89 | && usermod --append --groups dask_users bob 90 | -------------------------------------------------------------------------------- /continuous_integration/docker/base/files/etc/sudoers.d/preserve_path: -------------------------------------------------------------------------------- 1 | # This config ensures that the PATH environment variable this only-for-testing 2 | # container is started with is preserved when changing to other users with sudo. 3 | # 4 | # NOTES: 5 | # 6 | # - `sudo` is used to execute commands as other users. What then happens to the 7 | # environment will be determined by configuration in /etc/sudoers and 8 | # /etc/sudoers.d/* as well as flags we pass to the sudo command. The behavior 9 | # can be inspected with `sudo -V` run as root. 10 | # 11 | # ref: `man sudo` https://linux.die.net/man/8/sudo 12 | # ref: `man sudoers` https://www.sudo.ws/man/1.8.15/sudoers.man.html 13 | # 14 | # - We disable the `secure_path` which is set by default in /etc/sudoers as it 15 | # would override the PATH variable. 16 | Defaults !secure_path 17 | # 18 | # - We can use the `-E` or `--preserve-env` flag to pass through most 19 | # environment variables, but understand that exceptions are caused by the 20 | # sudoers configuration: `env_delete`, `env_check`, and `secure_path`. 21 | # 22 | # - We reduce the `env_delete` list of default variables to be deleted. It has 23 | # higher priority than the `--preserve-env` flag and `env_keep` configuration. 24 | Defaults env_delete -= "PATH" 25 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | 6 | # FIXME: pip should be installed to a modern version in the base image instead 7 | # of being upgraded here. It isn't because of 8 | # https://github.com/dask/dask-gateway/issues/837. 9 | pip install "pip==24.*" 10 | 11 | # pykerberos needs to compile c++ code that depends on system libraries, by 12 | # installing it from conda-forge, we avoid such hassle. 13 | # 14 | mamba install -c conda-forge pykerberos 15 | 16 | # This installs everything else we need for tests 17 | pip install -r tests/requirements.txt 18 | 19 | pip list 20 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/_print_logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Bold high intensity green 4 | G='\033[1;92m' 5 | # No color 6 | NC='\033[0m' 7 | 8 | printf "\n${G}supervisorctl status${NC}\n" 9 | supervisorctl status 10 | 11 | printf "\n${G}cat /var/log/supervisor/krb5kdc.log${NC}\n" 12 | cat /var/log/supervisor/krb5kdc.log 13 | printf "\n${G}cat /var/log/supervisor/kadmind.log${NC}\n" 14 | cat /var/log/supervisor/kadmind.log 15 | printf "\n${G}cat /var/log/supervisor/yarn-nodemanager.log${NC}\n" 16 | cat /var/log/supervisor/yarn-nodemanager.log 17 | printf "\n${G}cat /var/log/supervisor/yarn-resourcemanager.log${NC}\n" 18 | cat /var/log/supervisor/yarn-resourcemanager.log 19 | printf "\n${G}cat /var/log/supervisor/hdfs-namenode.log${NC}\n" 20 | cat /var/log/supervisor/hdfs-namenode.log 21 | printf "\n${G}cat /var/log/supervisor/hdfs-datanode.log${NC}\n" 22 | cat /var/log/supervisor/hdfs-datanode.log 23 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | pytest -v \ 6 | tests/test_yarn_backend.py \ 7 | tests/test_auth.py \ 8 | -k "yarn or kerberos" 9 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/capacity-scheduler.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | yarn.scheduler.capacity.root.queues 13 | default,fruit 14 | 15 | 16 | 17 | yarn.scheduler.capacity.root.fruit.queues 18 | apples,bananas,oranges 19 | 20 | 21 | 22 | yarn.scheduler.capacity.maximum-am-resource-percent 23 | 0.75 24 | 25 | 26 | 27 | 28 | yarn.scheduler.capacity.root.default.capacity 29 | 60.0 30 | 31 | 32 | 33 | yarn.scheduler.capacity.root.default.maximum-capacity 34 | 100.0 35 | 36 | 37 | 38 | 39 | yarn.scheduler.capacity.root.fruit.capacity 40 | 40.0 41 | 42 | 43 | 44 | yarn.scheduler.capacity.root.fruit.maximum-capacity 45 | 50.0 46 | 47 | 48 | 49 | 50 | yarn.scheduler.capacity.root.fruit.apples.capacity 51 | 50.0 52 | 53 | 54 | 55 | yarn.scheduler.capacity.root.fruit.apples.maximum-capacity 56 | 100.0 57 | 58 | 59 | 60 | 61 | yarn.scheduler.capacity.root.fruit.bananas.capacity 62 | 25.0 63 | 64 | 65 | 66 | yarn.scheduler.capacity.root.fruit.bananas.maximum-capacity 67 | 50.0 68 | 69 | 70 | 71 | 72 | yarn.scheduler.capacity.root.fruit.oranges.capacity 73 | 25.0 74 | 75 | 76 | 77 | yarn.scheduler.capacity.root.fruit.oranges.maximum-capacity 78 | 60.0 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/container-executor.cfg: -------------------------------------------------------------------------------- 1 | # Some configuration of yarn.nodemanager is duplicated to yarn-site.xml, as 2 | # documented it needs to be: 3 | # https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/SecureContainer.html#Configuration 4 | # 5 | yarn.nodemanager.delete.debug-delay-sec=3600 6 | yarn.nodemanager.local-dirs=/var/tmp/hadoop-yarn/local 7 | yarn.nodemanager.log-dirs=/var/tmp/hadoop-yarn/log 8 | yarn.nodemanager.linux-container-executor.group=yarn 9 | 10 | banned.users=hdfs,yarn,mapred,bin 11 | min.user.id=1000 12 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop.tmp.dir 5 | /var/tmp/ 6 | 7 | 8 | 9 | fs.defaultFS 10 | hdfs://master.example.com:9000 11 | 12 | 13 | 14 | hadoop.proxyuser.dask.hosts 15 | * 16 | 17 | 18 | 19 | hadoop.proxyuser.dask.users 20 | * 21 | 22 | 23 | 24 | hadoop.security.authentication 25 | kerberos 26 | 27 | 28 | 29 | hadoop.security.authorization 30 | true 31 | 32 | 33 | 37 | 38 | hadoop.http.filter.initializers 39 | org.apache.hadoop.security.AuthenticationFilterInitializer 40 | 41 | 42 | 43 | hadoop.http.authentication.type 44 | simple 45 | 46 | 47 | 48 | hadoop.http.authentication.signature.secret.file 49 | /opt/hadoop/etc/hadoop/http-secret-file 50 | 51 | 52 | 53 | hadoop.http.authentication.cookie.domain 54 | .example.com 55 | 56 | 57 | 58 | hadoop.http.authentication.simple.anonymous.allowed 59 | true 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dfs.replication 6 | 1 7 | 8 | 9 | 10 | dfs.permissions.enabled 11 | true 12 | 13 | 14 | 15 | dfs.webhdfs.enabled 16 | true 17 | 18 | 19 | 20 | dfs.block.access.token.enable 21 | true 22 | 23 | 24 | 25 | 26 | dfs.namenode.keytab.file 27 | /opt/hadoop/etc/hadoop/master-keytabs/hdfs.keytab 28 | 29 | 30 | 31 | dfs.namenode.kerberos.principal 32 | hdfs/master.example.com@EXAMPLE.COM 33 | 34 | 35 | 36 | dfs.namenode.kerberos.internal.spnego.principal 37 | HTTP/master.example.com@EXAMPLE.COM 38 | 39 | 40 | 41 | dfs.datanode.keytab.file 42 | /opt/hadoop/etc/hadoop/master-keytabs/hdfs.keytab 43 | 44 | 45 | 46 | dfs.datanode.kerberos.principal 47 | hdfs/master.example.com@EXAMPLE.COM 48 | 49 | 50 | 51 | dfs.web.authentication.kerberos.principal 52 | HTTP/master.example.com@EXAMPLE.COM 53 | 54 | 55 | 56 | dfs.web.authentication.kerberos.keytab 57 | /opt/hadoop/etc/hadoop/master-keytabs/HTTP.keytab 58 | 59 | 60 | 61 | 62 | ignore.secure.ports.for.testing 63 | true 64 | 65 | 66 | 67 | dfs.http.policy 68 | HTTP_ONLY 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | yarn.resourcemanager.hostname 6 | master.example.com 7 | 8 | 9 | 14 | 15 | yarn.application.classpath 16 | 17 | $HADOOP_CONF_DIR, 18 | $HADOOP_COMMON_HOME/share/hadoop/common/*, 19 | $HADOOP_COMMON_HOME/share/hadoop/common/lib/*, 20 | $HADOOP_HDFS_HOME/share/hadoop/hdfs/*, 21 | $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*, 22 | $HADOOP_YARN_HOME/share/hadoop/yarn/*, 23 | $HADOOP_YARN_HOME/share/hadoop/yarn/lib/* 24 | 25 | 26 | 27 | 28 | 29 | yarn.nodemanager.local-dirs 30 | file:///var/tmp/hadoop-yarn/local 31 | 32 | 33 | 34 | yarn.nodemanager.log-dirs 35 | file:///var/tmp/hadoop-yarn/log 36 | 37 | 38 | 39 | yarn.log-aggregation-enable 40 | true 41 | 42 | 43 | 44 | yarn.nodemanager.remote-app-log-dir 45 | hdfs://master.example.com:9000/var/log/hadoop-yarn/apps 46 | 47 | 48 | 49 | 50 | yarn.resourcemanager.scheduler.class 51 | org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler 52 | 53 | 54 | 55 | yarn.scheduler.minimum-allocation-mb 56 | 32 57 | 58 | 59 | 60 | yarn.resource-types.memory-mb.increment-allocation 61 | ${yarn.scheduler.minimum-allocation-mb} 62 | 63 | 64 | 65 | yarn.scheduler.increment-allocation-mb 66 | ${yarn.scheduler.minimum-allocation-mb} 67 | 68 | 69 | 70 | 71 | yarn.resourcemanager.keytab 72 | /opt/hadoop/etc/hadoop/master-keytabs/yarn.keytab 73 | 74 | 75 | 76 | yarn.resourcemanager.principal 77 | yarn/master.example.com@EXAMPLE.COM 78 | 79 | 80 | 81 | yarn.nodemanager.keytab 82 | /opt/hadoop/etc/hadoop/master-keytabs/yarn.keytab 83 | 84 | 85 | 86 | yarn.nodemanager.principal 87 | yarn/master.example.com@EXAMPLE.COM 88 | 89 | 90 | 91 | 92 | yarn.nodemanager.container-executor.class 93 | org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor 94 | 95 | 96 | 97 | yarn.nodemanager.linux-container-executor.path 98 | /opt/hadoop/bin/container-executor 99 | 100 | 101 | 102 | yarn.nodemanager.linux-container-executor.group 103 | yarn 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.simple/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hadoop.tmp.dir 5 | /var/tmp/ 6 | 7 | 8 | 9 | fs.defaultFS 10 | hdfs://master.example.com:9000 11 | 12 | 13 | 14 | hadoop.security.authentication 15 | simple 16 | 17 | 18 | 19 | hadoop.security.authorization 20 | true 21 | 22 | 23 | 24 | hadoop.http.filter.initializers 25 | org.apache.hadoop.security.AuthenticationFilterInitializer 26 | 27 | 28 | 29 | hadoop.http.authentication.type 30 | simple 31 | 32 | 33 | 34 | hadoop.http.authentication.signature.secret.file 35 | /opt/hadoop/etc/hadoop/http-secret-file 36 | 37 | 38 | 39 | hadoop.http.authentication.cookie.domain 40 | .example.com 41 | 42 | 43 | 44 | hadoop.http.authentication.simple.anonymous.allowed 45 | false 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/hadoop/conf.simple/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dfs.replication 5 | 1 6 | 7 | 8 | 9 | dfs.permissions.enabled 10 | true 11 | 12 | 13 | 14 | dfs.webhdfs.enabled 15 | true 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/krb5.conf: -------------------------------------------------------------------------------- 1 | # krb5.conf is a configuration for Kerberos. supervisord is configured to start 2 | # a Kerberos Key Distribution Center (KDC) influenced by this configuration. 3 | # 4 | # krb5.conf reference: 5 | # https://web.mit.edu/kerberos/krb5-1.19/doc/admin/conf_files/krb5_conf.html 6 | # 7 | [logging] 8 | default = FILE:/var/log/supervisor/krb5libs.log 9 | kdc = FILE:/var/log/supervisor/krb5kdc.log 10 | admin_server = FILE:/var/log/supervisor/kadmind.log 11 | 12 | [libdefaults] 13 | default_realm = EXAMPLE.COM 14 | dns_lookup_realm = false 15 | dns_lookup_kdc = false 16 | ticket_lifetime = 24h 17 | forwardable = true 18 | 19 | [realms] 20 | EXAMPLE.COM = { 21 | kdc = master.example.com 22 | admin_server = master.example.com 23 | } 24 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.conf: -------------------------------------------------------------------------------- 1 | # supervisord starts other "programs" declared in the additional configuration 2 | # files found in the /etc/supervisor.d folder. 3 | # 4 | # supervisord configuration reference: 5 | # http://supervisord.org/configuration.html#configuration-file 6 | # 7 | [supervisord] 8 | strip_ansi = true 9 | nodaemon = true 10 | logfile = /var/log/supervisord.log 11 | pidfile = /var/run/supervisord.pid 12 | 13 | [unix_http_server] 14 | file = /tmp/supervisor.sock 15 | 16 | [rpcinterface:supervisor] 17 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 18 | 19 | [supervisorctl] 20 | serverurl = unix:///tmp/supervisor.sock 21 | prompt = hadoop 22 | 23 | [include] 24 | files = /etc/supervisord.d/*.conf 25 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.d/hdfs-datanode.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | [program:hdfs-datanode] 5 | user = hdfs 6 | command = hdfs datanode 7 | stdout_logfile = /var/log/supervisor/hdfs-datanode.log 8 | redirect_stderr = true 9 | autostart = true 10 | autorestart = false 11 | startsecs = 3 12 | stopwaitsecs = 10 13 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.d/hdfs-namenode.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | [program:hdfs-namenode] 5 | user = hdfs 6 | command = hdfs namenode 7 | stdout_logfile = /var/log/supervisor/hdfs-namenode.log 8 | redirect_stderr = true 9 | autostart = true 10 | autorestart = false 11 | startsecs = 3 12 | stopwaitsecs = 10 13 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.d/kerberos.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | # krb5kdc or kadmind aren't emitting logs to stdout but writing logs directly to 5 | # files as configured in /etc/krb5.conf 6 | # 7 | [program:krb5kdc] 8 | user = root 9 | command = /usr/sbin/krb5kdc -r EXAMPLE.COM -P /var/run/krb5kdc.pid -n 10 | stdout_logfile = /dev/stdout 11 | stdout_logfile_maxbytes = 0 12 | autostart = true 13 | autorestart = true 14 | 15 | [program:kadmind] 16 | user = root 17 | command = /usr/sbin/kadmind -r EXAMPLE.COM -P /var/run/kadmind.pid -nofork 18 | stdout_logfile = /dev/stdout 19 | stdout_logfile_maxbytes = 0 20 | autostart = true 21 | autorestart = true 22 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.d/yarn-nodemanager.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | [program:yarn-nodemanager] 5 | user = yarn 6 | command = yarn nodemanager 7 | stdout_logfile = /var/log/supervisor/yarn-nodemanager.log 8 | redirect_stderr = true 9 | autostart = true 10 | autorestart = false 11 | startsecs = 3 12 | stopwaitsecs = 10 13 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/etc/supervisord.d/yarn-resourcemanager.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | [program:yarn-resourcemanager] 5 | user = yarn 6 | command = yarn resourcemanager 7 | stdout_logfile = /var/log/supervisor/yarn-resourcemanager.log 8 | redirect_stderr = true 9 | autostart = true 10 | autorestart = false 11 | startsecs = 3 12 | stopwaitsecs = 10 13 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/scripts/init-hdfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | # Exponential backoff on testing hdfs status, then run init script 5 | echo "Waiting to connect to HDFS" 6 | timeout=2 7 | exit_code=0 8 | for attempt in {1..5}; do 9 | hdfs dfs -ls / 10 | exit_code=$? 11 | 12 | if [[ $exit_code == 0 ]]; then 13 | break 14 | fi 15 | 16 | echo "Retrying in $timeout.." 1>&2 17 | sleep $timeout 18 | timeout=$[$timeout * 2] 19 | done 20 | 21 | if [[ $exit_code != 0 ]]; then 22 | echo "Failed to connect to HDFS" 23 | exit $exit_code 24 | fi 25 | echo "HDFS connected, initializing directory structure" 26 | 27 | hdfs dfs -mkdir -p /tmp \ 28 | && hdfs dfs -chmod -R 1777 /tmp \ 29 | && hdfs dfs -mkdir -p /var/log \ 30 | && hdfs dfs -chmod -R 1775 /var/log \ 31 | && hdfs dfs -chown yarn:hadoop /var/log \ 32 | && hdfs dfs -mkdir -p /tmp/hadoop-yarn \ 33 | && hdfs dfs -chown -R mapred:hadoop /tmp/hadoop-yarn \ 34 | && hdfs dfs -mkdir -p /tmp/hadoop-yarn/staging/history/done_intermediate \ 35 | && hdfs dfs -chown -R mapred:hadoop /tmp/hadoop-yarn/staging \ 36 | && hdfs dfs -chmod -R 1777 /tmp \ 37 | && hdfs dfs -mkdir -p /var/log/hadoop-yarn/apps \ 38 | && hdfs dfs -chmod -R 1777 /var/log/hadoop-yarn/apps \ 39 | && hdfs dfs -chown yarn:hadoop /var/log/hadoop-yarn/apps \ 40 | && hdfs dfs -mkdir -p /user \ 41 | && hdfs dfs -mkdir -p /user/root \ 42 | && hdfs dfs -chmod -R 777 /user/root \ 43 | && hdfs dfs -chown root /user/root \ 44 | && hdfs dfs -mkdir -p /user/history \ 45 | && hdfs dfs -chmod -R 1777 /user/history \ 46 | && hdfs dfs -chown mapred:hadoop /user/history \ 47 | && hdfs dfs -mkdir -p /user/dask \ 48 | && hdfs dfs -chown dask /user/dask \ 49 | && hdfs dfs -mkdir -p /user/alice \ 50 | && hdfs dfs -chown alice /user/alice \ 51 | && hdfs dfs -mkdir -p /user/bob \ 52 | && hdfs dfs -chown bob /user/bob 53 | 54 | exit_code=$? 55 | if [[ $exit_code != 0 ]]; then 56 | echo "Failed to initialize HDFS" 57 | exit $exit_code 58 | fi 59 | echo "Initialized HDFS" 60 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/scripts/setup-hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | # Tweak hadoop configuration and permissions: 5 | # 6 | # - hadoop is unpacked with default configuration in etc/hadoop, we relocate 7 | # that to /etc/hadoop/conf.empty. 8 | # 9 | mv /opt/hadoop/etc/hadoop /etc/hadoop/conf.empty 10 | # 11 | # - log4j.properties is a requirement to have in the hadoop configuration 12 | # directory that we don't wan't to redefine, so we copy it from the default 13 | # configuration to our configurations. 14 | # 15 | cp /etc/hadoop/conf.empty/log4j.properties /etc/hadoop/conf.simple/ 16 | cp /etc/hadoop/conf.empty/log4j.properties /etc/hadoop/conf.kerberos/ 17 | # 18 | # - Create /opt/hadoop/logs directory with high group permissions to ensure it 19 | # isn't created with narrow permissions later when running "hdfs namenode". 20 | # 21 | mkdir -p /opt/hadoop/logs 22 | chmod g+w /opt/hadoop/logs 23 | # 24 | # - Create /var/tmp directory with permissions to ensure the hadoop group is 25 | # propegated and have right to create new directories. Note that the hdfs user 26 | # will later create /var/tmp/dfs but then get to own it even though it will be 27 | # owned also by the hadoop group due to the 2xxx part of these permissions. 28 | # 29 | chown -R root:hadoop /var/tmp 30 | chmod -R 2770 /var/tmp 31 | # 32 | # - Generate a key to authenticate web access during the brief time we use the 33 | # /etc/hadoop/conf.simple configuration as part of building the docker image. 34 | # 35 | dd if=/dev/urandom bs=64 count=1 > /etc/hadoop/conf.simple/http-secret-file 36 | chown root:hadoop /etc/hadoop/conf.simple/http-secret-file 37 | chmod 440 /etc/hadoop/conf.simple/http-secret-file 38 | # 39 | # - Declare HDFS configuration to use temporarily, let /opt/hadoop/etc/hadoop 40 | # point to /etc/hadoop/conf.simple. 41 | # 42 | alternatives --install /opt/hadoop/etc/hadoop hadoop-conf /etc/hadoop/conf.simple 50 43 | alternatives --set hadoop-conf /etc/hadoop/conf.simple 44 | 45 | 46 | 47 | 48 | # Initialize HDFS filesystem with content to test against 49 | # 50 | # 1. Delete all hdfs files and start with a clean slate. 51 | # 52 | sudo --preserve-env --user hdfs \ 53 | hdfs namenode -format -force 54 | # 55 | # 2. Add to hosts to resolve a domain name, /etc/hosts will be cleared when the 56 | # container starts though, see https://stackoverflow.com/a/25613983. This 57 | # container is supposed to start with "--hostname master.example.com". 58 | # 59 | echo "127.0.0.1 master.example.com" >> /etc/hosts 60 | # 61 | # 3. Start "hdfs namenode" and "hdfs datanode" but detach with "&" to continue 62 | # doing other things. 63 | # 64 | sudo --preserve-env --user hdfs \ 65 | hdfs namenode & 66 | sudo --preserve-env --user hdfs \ 67 | hdfs datanode & 68 | # 69 | # 4. Run a script to bootstrap the HDFS filesystem with content for testing. 70 | # 71 | sudo --preserve-env --user hdfs \ 72 | /scripts/init-hdfs.sh 73 | # 74 | # 5. Shut down started "hdfs namenode" and "hdfs datanode" processes. 75 | # 76 | pkill java 77 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/var/kerberos/krb5kdc/kadm5.acl: -------------------------------------------------------------------------------- 1 | root/admin@EXAMPLE.COM ex 2 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/files/var/kerberos/krb5kdc/kdc.conf: -------------------------------------------------------------------------------- 1 | [kdcdefaults] 2 | kdc_ports = 88 3 | kdc_tcp_ports = 88 4 | 5 | [realms] 6 | EXAMPLE.COM = { 7 | acl_file = /var/kerberos/krb5kdc/kadm5.acl 8 | dict_file = /usr/share/dict/words 9 | admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab 10 | supported_enctypes = des3-hmac-sha1:normal arcfour-hmac:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal 11 | } 12 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_install.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/print_logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_print_logs.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | ci_docker_hadoop="$(dirname "${BASH_SOURCE[0]}")" 5 | full_path_ci_docker_hadoop="$(cd "${ci_docker_hadoop}" && pwd)" 6 | git_root="$(cd "${full_path_ci_docker_hadoop}/../../.." && pwd)" 7 | 8 | docker run --rm -d \ 9 | --name hadoop \ 10 | -h master.example.com \ 11 | -v "$git_root":/working \ 12 | -p 8000:8000 \ 13 | -p 8786:8786 \ 14 | -p 8088:8088 \ 15 | ghcr.io/dask/dask-gateway-ci-hadoop 16 | 17 | # The hadoop container's systemd process emits logs about the progress of 18 | # starting up declared services that we will await. 19 | # 20 | # We do it to avoid getting OOMKilled by peaking memory needs during startup, 21 | # which is prone to happen if we run pip install at the same time. 22 | # 23 | # Practically, we await "entered RUNNING state" to be observed exactly 6 times, 24 | # which represents our 6 systemd services. 25 | # 26 | # INFO success: kadmind entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 27 | # INFO success: krb5kdc entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 28 | # INFO success: hdfs-namenode entered RUNNING state, process has stayed up for > than 3 seconds (startsecs) 29 | # INFO success: hdfs-datanode entered RUNNING state, process has stayed up for > than 3 seconds (startsecs) 30 | # INFO success: yarn-resourcemanager entered RUNNING state, process has stayed up for > than 3 seconds (startsecs) 31 | # INFO success: yarn-nodemanager entered RUNNING state, process has stayed up for > than 3 seconds (startsecs) 32 | # 33 | set +x 34 | await_startup() { 35 | i=0; while [ $i -ne 30 ]; do 36 | docker logs hadoop | grep "entered RUNNING state" | wc -l 2>/dev/null | grep --silent "6" \ 37 | && start_script_finishing=true && break \ 38 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for hadoop container startup ($i seconds)" 39 | done 40 | if [ "$start_script_finishing" != "true" ]; then 41 | echo "WARNING: /script/start.sh was slow to finish!" 42 | exit 1 43 | fi 44 | 45 | echo "hadoop container started!" 46 | } 47 | await_startup 48 | -------------------------------------------------------------------------------- /continuous_integration/docker/hadoop/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_test.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/Dockerfile: -------------------------------------------------------------------------------- 1 | # See continuous_integration/docker/README.md for details about this and other 2 | # Dockerfiles under the continuous_integration/docker folder on their purpose 3 | # and how to work with them. 4 | # 5 | FROM ghcr.io/dask/dask-gateway-ci-base:latest 6 | 7 | # Set labels based on the Open Containers Initiative (OCI): 8 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys 9 | # 10 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/pbs/Dockerfile" 11 | 12 | # Notify dask-gateway tests that PBS is available 13 | ENV TEST_DASK_GATEWAY_PBS true 14 | 15 | # Install openpbs 16 | # 17 | # 1. Download and install .rpm 18 | # 19 | # OpenPBS versions: https://github.com/openpbs/openpbs/releases 20 | # 21 | # We use an old version because there isn't a modern one pre-built for 22 | # centos:7 as used in the base image. The old version was called propbs, so 23 | # there is a change needed in the download url related to that if switching 24 | # to a newwer version. 25 | # 26 | RUN INSTALL_OPENPBS_VERSION=19.1.3 \ 27 | && yum install -y unzip \ 28 | \ 29 | && curl -sL -o /tmp/openpbs.zip https://github.com/openpbs/openpbs/releases/download/v${INSTALL_OPENPBS_VERSION}/pbspro_${INSTALL_OPENPBS_VERSION}.centos_7.zip \ 30 | && unzip /tmp/openpbs.zip -d /opt/openpbs \ 31 | && rm /tmp/openpbs.zip \ 32 | && yum install -y \ 33 | /opt/openpbs/*pbs*/*-server-*.rpm \ 34 | \ 35 | && yum remove -y unzip \ 36 | && yum clean all \ 37 | && rm -rf /var/cache/yum 38 | # 39 | # 2. Update PATH environment variable 40 | # 41 | # Note that this PATH environment will be preserved when sudo is used to 42 | # switch to other users thanks to changes to /etc/sudoers.d/preserve_path, 43 | # which is configured in the base Dockerfile. 44 | # 45 | ENV PATH=/opt/pbs/bin:$PATH 46 | 47 | # Copy over files 48 | COPY ./files / 49 | 50 | ENTRYPOINT ["/opt/python/bin/tini", "-g", "--"] 51 | CMD ["/scripts/start.sh"] 52 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | 6 | # FIXME: pip should be installed to a modern version in the base image instead 7 | # of being upgraded here. It isn't because of 8 | # https://github.com/dask/dask-gateway/issues/837. 9 | pip install "pip==24.*" 10 | 11 | # This installs everything we need for tests 12 | pip install -r tests/requirements.txt 13 | 14 | pip list 15 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | pytest -v tests/test_pbs_backend.py 6 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/files/etc/sudoers.d/dask: -------------------------------------------------------------------------------- 1 | Cmnd_Alias DASK_GATEWAY_JOBQUEUE_LAUNCHER = /opt/python/bin/dask-gateway-jobqueue-launcher 2 | 3 | %dask_users ALL=(dask) /usr/bin/sudo 4 | dask ALL=(%dask_users) NOPASSWD:DASK_GATEWAY_JOBQUEUE_LAUNCHER 5 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/files/scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | 4 | PBS_CONF_FILE=/etc/pbs.conf 5 | MOM_CONF_FILE=/var/spool/pbs/mom_priv/config 6 | HOSTNAME=$(hostname) 7 | 8 | # Configure PBS to run all on one node 9 | # 10 | # Configuration references: 11 | # - https://github.com/openpbs/openpbs/blob/master/doc/man8/pbs.conf.8B 12 | # - https://github.com/openpbs/openpbs/blob/HEAD/doc/man8/pbs_comm.8B 13 | # - https://github.com/openpbs/openpbs/blob/master/doc/man8/pbs_mom.8B 14 | # 15 | sed -i "s/PBS_SERVER=.*/PBS_SERVER=$HOSTNAME/" $PBS_CONF_FILE 16 | sed -i "s/PBS_START_MOM=.*/PBS_START_MOM=1/" $PBS_CONF_FILE 17 | sed -i "s/\$clienthost .*/\$clienthost $HOSTNAME/" $MOM_CONF_FILE 18 | echo "\$usecp *:/ /" >> $MOM_CONF_FILE 19 | 20 | # Reduce the memory footprint by using less threads to avoid the OOMKiller in 21 | # GitHub Actions as observed with exit code 137. 22 | # 23 | echo "PBS_COMM_THREADS=2" >> $PBS_CONF_FILE 24 | 25 | # Start PBS 26 | /etc/init.d/pbs start 27 | 28 | # Reduce time between PBS scheduling and add history 29 | /opt/pbs/bin/qmgr -c "set server scheduler_iteration = 20" 30 | /opt/pbs/bin/qmgr -c "set server job_history_enable = True" 31 | /opt/pbs/bin/qmgr -c "set server job_history_duration = 24:00:00" 32 | /opt/pbs/bin/qmgr -c "set node pbs queue=workq" 33 | /opt/pbs/bin/qmgr -c "set server operators += dask@pbs" 34 | 35 | # "Entering sleep" can be used as a signal in logs that we have passed the 36 | # initialization phase where the memory needs may peak and expose us to the 37 | # OOMKiller and 137 exit codes. 38 | # 39 | echo "Entering sleep" 40 | sleep infinity 41 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec pbs /working/continuous_integration/docker/pbs/_install.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/print_logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker logs pbs 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | this_dir="$(dirname "${BASH_SOURCE[0]}")" 5 | full_path_this_dir="$(cd "${this_dir}" && pwd)" 6 | git_root="$(cd "${full_path_this_dir}/../../.." && pwd)" 7 | 8 | docker run --rm -d \ 9 | --name pbs \ 10 | -h pbs \ 11 | -v "$git_root":/working \ 12 | -p 8000:8000 \ 13 | -p 8786:8786 \ 14 | -p 8088:8088 \ 15 | --cap-add=SYS_RESOURCE \ 16 | ghcr.io/dask/dask-gateway-ci-pbs 17 | 18 | # The pbs container's entrypoint, files/scripts/start.sh, emits a log message 19 | # that we will await. 20 | # 21 | # We do it to avoid getting OOMKilled by peaking memory needs during startup, 22 | # which is prone to happen if we run pip install at the same time. 23 | # 24 | set +x 25 | await_startup() { 26 | i=0; while [ $i -ne 30 ]; do 27 | docker logs pbs 2>/dev/null | grep --silent "Entering sleep" \ 28 | && start_script_finishing=true && break \ 29 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for pbs container startup ($i seconds)" 30 | done 31 | if [ "$start_script_finishing" != "true" ]; then 32 | echo "WARNING: /script/start.sh was slow to finish!" 33 | exit 1 34 | fi 35 | echo "pbs container started!" 36 | 37 | # We add some seconds of precautionary sleep to avoid unknown and hard to 38 | # debug issues. 39 | sleep 3 40 | } 41 | await_startup 42 | -------------------------------------------------------------------------------- /continuous_integration/docker/pbs/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec -u dask pbs /working/continuous_integration/docker/pbs/_test.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/Dockerfile: -------------------------------------------------------------------------------- 1 | # See continuous_integration/docker/README.md for details about this and other 2 | # Dockerfiles under the continuous_integration/docker folder on their purpose 3 | # and how to work with them. 4 | # 5 | FROM ghcr.io/dask/dask-gateway-ci-base:latest 6 | 7 | # Set labels based on the Open Containers Initiative (OCI): 8 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys 9 | # 10 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/slurm/Dockerfile" 11 | 12 | # Notify dask-gateway tests that Slurm is available 13 | ENV TEST_DASK_GATEWAY_SLURM true 14 | 15 | # Install Slurm 16 | # 17 | # 1. Download and compile slurm 18 | # 19 | # Slurm versions: https://download.schedmd.com/slurm/ 20 | # Slurm release notes: https://github.com/SchedMD/slurm/blame/HEAD/RELEASE_NOTES 21 | # 22 | RUN INSTALL_SLURM_VERSION=22.05.5 \ 23 | && yum install -y \ 24 | # required to install supervisor (and more?) 25 | epel-release \ 26 | && yum install -y \ 27 | # temporary installation dependencies later uninstalled 28 | bzip2 \ 29 | gcc \ 30 | mariadb-devel \ 31 | munge-devel \ 32 | ncurses-devel \ 33 | openssl-devel \ 34 | readline-devel \ 35 | # persistent installation dependencies 36 | man2html \ 37 | mariadb-server \ 38 | munge \ 39 | openssl \ 40 | perl \ 41 | supervisor \ 42 | \ 43 | && curl -sL https://download.schedmd.com/slurm/slurm-${INSTALL_SLURM_VERSION}.tar.bz2 \ 44 | | tar --extract --verbose --bzip2 --directory=/tmp \ 45 | && cd /tmp/slurm-* \ 46 | && ./configure \ 47 | --sysconfdir=/etc/slurm \ 48 | --with-mysql_config=/usr/bin \ 49 | --libdir=/usr/lib64 \ 50 | && make install \ 51 | && rm -rf /tmp/slurm-* \ 52 | \ 53 | && yum remove -y \ 54 | bzip2 \ 55 | gcc \ 56 | mariadb-devel \ 57 | munge-devel \ 58 | ncurses-devel \ 59 | openssl-devel \ 60 | readline-devel \ 61 | && yum clean all \ 62 | && rm -rf /var/cache/yum 63 | # 64 | # 2. Setup Slurm 65 | # 66 | RUN groupadd --system slurm \ 67 | && useradd --system --gid slurm slurm \ 68 | && mkdir \ 69 | /etc/sysconfig/slurm \ 70 | /var/lib/slurmd \ 71 | /var/log/slurm \ 72 | /var/run/slurmd \ 73 | /var/spool/slurmd \ 74 | && chown slurm:slurm \ 75 | /var/lib/slurmd \ 76 | /var/log/slurm \ 77 | /var/run/slurmd \ 78 | /var/spool/slurmd \ 79 | && /sbin/create-munge-key 80 | # 81 | # 3. Copy misc configuration files 82 | # 83 | COPY --chown=slurm:slurm ./files/etc/slurm /etc/slurm/ 84 | COPY ./files/etc/sudoers.d /etc/sudoers.d/ 85 | COPY ./files/etc/supervisord.conf /etc/ 86 | COPY ./files/etc/supervisord.d /etc/supervisord.d/ 87 | RUN chmod 644 /etc/slurm/slurm.conf \ 88 | && chmod 600 /etc/slurm/slurmdbd.conf \ 89 | && chmod 440 /etc/sudoers.d/dask \ 90 | && chmod 644 /etc/supervisord.conf \ 91 | && chmod 644 /etc/supervisord.d/* 92 | # 93 | # 4. Initialize a Slurm database 94 | # 95 | COPY ./files/scripts /scripts/ 96 | RUN /scripts/init-mysql.sh 97 | 98 | ENTRYPOINT ["/usr/bin/supervisord", "--configuration", "/etc/supervisord.conf"] 99 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | 6 | # FIXME: pip should be installed to a modern version in the base image instead 7 | # of being upgraded here. It isn't because of 8 | # https://github.com/dask/dask-gateway/issues/837. 9 | pip install "pip==24.*" 10 | 11 | # This installs everything we need for tests 12 | pip install -r tests/requirements.txt 13 | 14 | pip list 15 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/_print_logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Bold high intensity green 4 | G='\033[1;92m' 5 | # No color 6 | NC='\033[0m' 7 | 8 | printf "\n${G}supervisorctl status${NC}\n" 9 | supervisorctl status 10 | 11 | printf "\n${G}cat /var/log/supervisord.log${NC}\n" 12 | cat /var/log/supervisord.log 13 | printf "\n${G}cat /var/log/supervisor/slurmdbd.log${NC}\n" 14 | cat /var/log/supervisor/slurmdbd.log 15 | printf "\n${G}cat /var/log/supervisor/slurmctld.log${NC}\n" 16 | cat /var/log/supervisor/slurmctld.log 17 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | cd /working 5 | pytest -v tests/test_slurm_backend.py 6 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/slurm/cgroup.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: https://slurm.schedmd.com/cgroup.conf.html 2 | # 3 | # This file was added as a workaround added when upgrading to from surm 4 | # 2021.08.6 to 22.05.5, where slurmd failed to start with an error message 5 | # logged in /var/log/slurm/slurmd.log saying: 6 | # 7 | # error: Couldn't find the specified plugin name for cgroup/v2 looking at all files 8 | # error: cannot find cgroup plugin for cgroup/v2 9 | # error: cannot create cgroup context for cgroup/v2 10 | # error: Unable to initialize cgroup plugin 11 | # error: slurmd initialization failed 12 | # 13 | CgroupPlugin=cgroup/v1 14 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/slurm/slurm.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: https://slurm.schedmd.com/slurm.conf.html 2 | # 3 | ClusterName=linux 4 | ControlMachine=slurm 5 | SlurmUser=slurm 6 | SlurmctldPort=6817 7 | SlurmdPort=6818 8 | AuthType=auth/munge 9 | StateSaveLocation=/var/lib/slurmd 10 | SlurmdSpoolDir=/var/spool/slurmd 11 | SwitchType=switch/none 12 | MpiDefault=none 13 | SlurmctldPidFile=/var/run/slurmd/slurmctld.pid 14 | SlurmdPidFile=/var/run/slurmd/slurmd.pid 15 | ProctrackType=proctrack/pgid 16 | ReturnToService=0 17 | SlurmctldTimeout=300 18 | SlurmdTimeout=300 19 | InactiveLimit=0 20 | MinJobAge=300 21 | KillWait=30 22 | Waittime=0 23 | SchedulerType=sched/backfill 24 | SelectType=select/cons_res 25 | SelectTypeParameters=CR_CPU_Memory 26 | SlurmctldDebug=3 27 | SlurmctldLogFile=/var/log/slurm/slurmctld.log 28 | SlurmdDebug=3 29 | SlurmdLogFile=/var/log/slurm/slurmd.log 30 | JobCompType=jobcomp/none 31 | AccountingStorageType=accounting_storage/slurmdbd 32 | # Nodes 33 | SlurmdParameters=config_overrides 34 | NodeName=slurm RealMemory=4096 Sockets=4 CoresPerSocket=4 ThreadsPerCore=4 35 | # Partitions 36 | PartitionName=DEFAULT Nodes=ALL OverSubscribe=FORCE:8 MaxTime=INFINITE State=UP 37 | PartitionName=dev Priority=10 Default=YES 38 | PartitionName=prod Priority=20 Default=NO 39 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/slurm/slurmdbd.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Example slurmdbd.conf file. 3 | # 4 | # See the slurmdbd.conf man page for more information. 5 | # 6 | # Archive info 7 | #ArchiveJobs=yes 8 | #ArchiveDir="/tmp" 9 | #ArchiveSteps=yes 10 | #ArchiveScript= 11 | #JobPurge=12 12 | #StepPurge=1 13 | # 14 | # Authentication info 15 | AuthType=auth/munge 16 | #AuthInfo=/var/run/munge/munge.socket.2 17 | # 18 | # slurmDBD info 19 | DbdAddr=localhost 20 | DbdHost=localhost 21 | #DbdPort=7031 22 | SlurmUser=slurm 23 | #MessageTimeout=300 24 | DebugLevel=4 25 | #DefaultQOS=normal,standby 26 | LogFile=/var/log/slurm/slurmdbd.log 27 | PidFile=/var/run/slurmdbd.pid 28 | #PluginDir=/usr/lib/slurm 29 | #PrivateData=accounts,users,usage,jobs 30 | #TrackWCKey=yes 31 | # 32 | # Database info 33 | StorageType=accounting_storage/mysql 34 | StorageHost=localhost 35 | StoragePass=password 36 | StorageUser=slurm 37 | StorageLoc=slurm_acct_db 38 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/sudoers.d/dask: -------------------------------------------------------------------------------- 1 | Cmnd_Alias DASK_GATEWAY_JOBQUEUE_LAUNCHER = /opt/python/bin/dask-gateway-jobqueue-launcher 2 | 3 | %dask_users ALL=(dask) /usr/bin/sudo 4 | dask ALL=(%dask_users) NOPASSWD:DASK_GATEWAY_JOBQUEUE_LAUNCHER 5 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/supervisord.conf: -------------------------------------------------------------------------------- 1 | # supervisord starts other "programs" declared in the additional configuration 2 | # files found in the /etc/supervisor.d folder. 3 | # 4 | # supervisord configuration reference: 5 | # http://supervisord.org/configuration.html#configuration-file 6 | # 7 | [supervisord] 8 | strip_ansi = true 9 | nodaemon = true 10 | logfile = /var/log/supervisord.log 11 | pidfile = /var/run/supervisord.pid 12 | 13 | [unix_http_server] 14 | file = /tmp/supervisor.sock 15 | 16 | [rpcinterface:supervisor] 17 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 18 | 19 | [supervisorctl] 20 | serverurl = unix:///tmp/supervisor.sock 21 | prompt = slurm 22 | 23 | [include] 24 | files = /etc/supervisord.d/*.conf 25 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/etc/supervisord.d/slurm.conf: -------------------------------------------------------------------------------- 1 | # Configuration reference: 2 | # http://supervisord.org/configuration.html#program-x-section-settings 3 | # 4 | [program:munged] 5 | user=munge 6 | command=/usr/sbin/munged -F 7 | autostart=true 8 | autorestart=true 9 | startsecs=5 10 | startretries=2 11 | exitcodes=0,1,2 12 | stdout_logfile=/var/log/supervisor/munged.log 13 | stdout_logfile_maxbytes=1MB 14 | stdout_logfile_backups=5 15 | stderr_logfile=/var/log/supervisor/munged.log 16 | stderr_logfile_maxbytes=1MB 17 | stderr_logfile_backups=5 18 | priority=1 19 | 20 | [program:mysqld] 21 | command=/usr/bin/pidproxy /var/run/mariadb/mariadb.pid /usr/bin/mysqld_safe 22 | stdout_logfile=/var/log/supervisor/mysqld.log 23 | stdout_logfile_maxbytes=1MB 24 | stdout_logfile_backups=5 25 | stderr_logfile=/var/log/supervisor/mysqld.log 26 | stderr_logfile_maxbytes=1MB 27 | stderr_logfile_backups=5 28 | exitcodes=0,1,2 29 | autostart=true 30 | autorestart=false 31 | priority=2 32 | 33 | [program:slurmdbd] 34 | user=root 35 | command=/bin/bash -c "until echo 'SELECT 1' | mysql -h localhost -uslurm -ppassword &> /dev/null; do sleep 1; done && /usr/local/sbin/slurmdbd -Dvvv" 36 | autostart=true 37 | autorestart=false 38 | exitcodes=0,1,2 39 | stdout_logfile=/var/log/supervisor/slurmdbd.log 40 | stdout_logfile_maxbytes=1MB 41 | stdout_logfile_backups=5 42 | stderr_logfile=/var/log/supervisor/slurmdbd.log 43 | stderr_logfile_maxbytes=1MB 44 | stderr_logfile_backups=5 45 | priority=10 46 | 47 | [program:slurmctld] 48 | user=root 49 | command=/bin/bash -c "until 2>/dev/null >/dev/tcp/localhost/6819; do sleep 1; done && /usr/local/sbin/slurmctld -Dvvv" 50 | autostart=true 51 | autorestart=false 52 | startsecs=3 53 | exitcodes=0,1,2 54 | stdout_logfile=/var/log/supervisor/slurmctld.log 55 | stdout_logfile_maxbytes=1MB 56 | stdout_logfile_backups=5 57 | stderr_logfile=/var/log/supervisor/slurmctld.log 58 | stderr_logfile_maxbytes=1MB 59 | stderr_logfile_backups=5 60 | priority=50 61 | 62 | [program:slurmd] 63 | user=root 64 | command=/bin/bash -c "until 2>/dev/null >/dev/tcp/localhost/6817; do sleep 1; done && /usr/local/sbin/slurmd -Dvvv" 65 | autostart=true 66 | autorestart=false 67 | exitcodes=0,1,2 68 | stdout_logfile=/var/log/supervisor/slurmd.log 69 | stdout_logfile_maxbytes=1MB 70 | stdout_logfile_backups=5 71 | stderr_logfile=/var/log/supervisor/slurmd.log 72 | stderr_logfile_maxbytes=1MB 73 | stderr_logfile_backups=5 74 | priority=100 75 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/files/scripts/init-mysql.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ ! -f "/var/lib/mysql/ibdata1" ]; then 4 | echo "- Initializing database" 5 | /usr/bin/mysql_install_db &> /dev/null 6 | echo "- Database initialized" 7 | echo "- Updating MySQL directory permissions" 8 | chown -R mysql:mysql /var/lib/mysql 9 | chown -R mysql:mysql /var/run/mariadb 10 | fi 11 | 12 | if [ ! -d "/var/lib/mysql/slurm_acct_db" ]; then 13 | /usr/bin/mysqld_safe --datadir='/var/lib/mysql' & 14 | 15 | for count in {30..0}; do 16 | if echo "SELECT 1" | mysql &> /dev/null; then 17 | break 18 | fi 19 | echo "- Starting MariaDB to create Slurm account database" 20 | sleep 1 21 | done 22 | 23 | if [[ "$count" -eq 0 ]]; then 24 | echo >&2 "MariaDB did not start" 25 | exit 1 26 | fi 27 | 28 | echo "- Creating Slurm acct database" 29 | mysql -NBe "CREATE DATABASE slurm_acct_db" 30 | mysql -NBe "CREATE USER 'slurm'@'localhost'" 31 | mysql -NBe "SET PASSWORD for 'slurm'@'localhost' = password('password')" 32 | mysql -NBe "GRANT USAGE ON *.* to 'slurm'@'localhost'" 33 | mysql -NBe "GRANT ALL PRIVILEGES on slurm_acct_db.* to 'slurm'@'localhost'" 34 | mysql -NBe "FLUSH PRIVILEGES" 35 | echo "- Slurm acct database created. Stopping MariaDB" 36 | pkill mysqld 37 | for count in {30..0}; do 38 | if echo "SELECT 1" | mysql &> /dev/null; then 39 | sleep 1 40 | else 41 | break 42 | fi 43 | done 44 | if [[ "$count" -eq 0 ]]; then 45 | echo >&2 "MariaDB did not stop" 46 | exit 1 47 | fi 48 | fi 49 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec slurm /working/continuous_integration/docker/slurm/_install.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/print_logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec slurm /working/continuous_integration/docker/slurm/_print_logs.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xe 3 | 4 | this_dir="$(dirname "${BASH_SOURCE[0]}")" 5 | full_path_this_dir="$(cd "${this_dir}" && pwd)" 6 | git_root="$(cd "${full_path_this_dir}/../../.." && pwd)" 7 | 8 | docker run --rm -d \ 9 | --name slurm \ 10 | -h slurm \ 11 | -v "$git_root":/working \ 12 | -p 8000:8000 \ 13 | -p 8786:8786 \ 14 | -p 8088:8088 \ 15 | ghcr.io/dask/dask-gateway-ci-slurm 16 | 17 | # The slurm container's systemd process emits logs about the progress of 18 | # starting up declared services that we will await. 19 | # 20 | # We do it to avoid getting OOMKilled by peaking memory needs during startup, 21 | # which is prone to happen if we run pip install at the same time. 22 | # 23 | # Practically, we await "entered RUNNING state" to be observed exactly 5 times, 24 | # which represents our 5 systemd services. 25 | # 26 | # INFO success: mysqld entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 27 | # INFO success: slurmdbd entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 28 | # INFO success: slurmd entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 29 | # INFO success: slurmctld entered RUNNING state, process has stayed up for > than 3 seconds (startsecs) 30 | # INFO success: munged entered RUNNING state, process has stayed up for > than 5 seconds (startsecs) 31 | # 32 | set +x 33 | await_startup() { 34 | i=0; while [ $i -ne 30 ]; do 35 | docker logs slurm | grep "entered RUNNING state" | wc -l 2>/dev/null | grep --silent "5" \ 36 | && start_script_finishing=true && break \ 37 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for slurm container startup ($i seconds)" 38 | done 39 | if [ "$start_script_finishing" != "true" ]; then 40 | echo "WARNING: /script/start.sh was slow to finish!" 41 | exit 1 42 | fi 43 | 44 | echo "slurm container started!" 45 | } 46 | await_startup 47 | -------------------------------------------------------------------------------- /continuous_integration/docker/slurm/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker exec -u dask slurm /working/continuous_integration/docker/slurm/_test.sh 3 | -------------------------------------------------------------------------------- /continuous_integration/kubernetes/build-publish-helm-chart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script publishes the Helm chart to the Dask Helm chart repo and pushes 3 | # associated built docker images to our container registry using chartpress. 4 | # -------------------------------------------------------------------------- 5 | 6 | # Exit on errors, assert env vars, log commands 7 | set -eux 8 | 9 | PUBLISH_ARGS="--push --publish-chart \ 10 | --builder=docker-buildx \ 11 | --platform=linux/amd64 \ 12 | --platform=linux/arm64 \ 13 | " 14 | 15 | # chartpress needs to run next to resources/helm/chartpress.yaml 16 | cd resources/helm 17 | 18 | # chartpress use git to push to our Helm chart repository, which is the gh-pages 19 | # branch of dask/helm-chart. We assume permissions to the docker registry are 20 | # already configured. 21 | if [[ $GITHUB_REF != refs/tags/* ]]; then 22 | # Using --extra-message, we help readers of merged PRs to know what version 23 | # they need to bump to in order to make use of the PR. This is enabled by a 24 | # GitHub notificaiton in the PR like "Github Action user pushed a commit to 25 | # dask/helm-chart that referenced this pull request..." 26 | # 27 | # ref: https://github.com/jupyterhub/chartpress#usage 28 | # 29 | # NOTE: GitHub merge commits contain a PR reference like #123. `sed` is used 30 | # to extract a PR reference like #123 or a commit hash reference like 31 | # @123abcd. Combined with GITHUB_REPOSITORY we craft a commit message 32 | # like dask/dask-gateway#123 or dask/dask-gateway@123abcd. 33 | PR_OR_HASH=$(git log -1 --pretty=%h-%B | head -n1 | sed 's/^.*\(#[0-9]*\).*/\1/' | sed 's/^\([0-9a-f]*\)-.*/@\1/') 34 | LATEST_COMMIT_TITLE=$(git log -1 --pretty=%B | head -n1) 35 | EXTRA_MESSAGE="${GITHUB_REPOSITORY}${PR_OR_HASH} ${LATEST_COMMIT_TITLE}" 36 | 37 | # shellcheck disable=SC2086 38 | chartpress $PUBLISH_ARGS --extra-message "${EXTRA_MESSAGE}" 39 | else 40 | # Setting a tag explicitly enforces a rebuild if this tag had already been 41 | # built and we wanted to override it. 42 | 43 | # shellcheck disable=SC2086 44 | chartpress $PUBLISH_ARGS --tag "${GITHUB_REF:10}" 45 | fi 46 | 47 | # Let us log the changes chartpress did, it should include replacements for 48 | # fields in values.yaml, such as what tag for various images we are using. 49 | git --no-pager diff --color 50 | -------------------------------------------------------------------------------- /continuous_integration/kubernetes/k3d-create.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This script can be used during local development to setup a k8s cluster. 3 | # 4 | # Note that if you are using k3d you must also "load" images so that your pods 5 | # in the k3d cluster can access them. What docker images are available on your 6 | # machine are different from those in the k3d sandbox. 7 | # -------------------------------------------------------------------------- 8 | set -e 9 | 10 | this_dir="$(dirname "${BASH_SOURCE[0]}")" 11 | full_path_this_dir="$(cd "${this_dir}" && pwd)" 12 | git_root="$(cd "${full_path_this_dir}/../.." && pwd)" 13 | 14 | echo "Starting k3d" 15 | k3d create \ 16 | --publish 30200:30200 \ 17 | --api-port 6444 \ 18 | --name k3s-default 19 | 20 | echo "Waiting for k3d access..." 21 | for i in {1..10}; do 22 | export KUBECONFIG="$(k3d get-kubeconfig --name='k3s-default')" 23 | if [[ $KUBECONFIG != "" ]]; then 24 | break; 25 | fi 26 | sleep 1 27 | done 28 | 29 | echo "Waiting for k3d nodes..." 30 | JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' 31 | until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do 32 | sleep 0.5 33 | done 34 | 35 | echo "k3d is running!" 36 | 37 | kubectl get nodes 38 | -------------------------------------------------------------------------------- /dask-gateway-server/.dockerignore: -------------------------------------------------------------------------------- 1 | dask-gateway-proxy/ 2 | build/ 3 | dist/ 4 | *.pyc 5 | .eggs/ 6 | *.egg-info 7 | .cache/ 8 | .pytest_cache/ 9 | -------------------------------------------------------------------------------- /dask-gateway-server/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | dask-gateway-proxy/dask-gateway-proxy* 4 | dask_gateway_server/proxy/dask-gateway-proxy 5 | -------------------------------------------------------------------------------- /dask-gateway-server/Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile and image, ghcr.io/dask/dask-gateway-server, is used by the 2 | # dask-gateway Helm chart, by the api pod and the controller pod. 3 | # 4 | # The pods are started with different commands: 5 | # 6 | # - api pod command: dask-gateway-server ... 7 | # - controller pod command: dask-gateway-server kube-controller ... 8 | # 9 | FROM python:3.13-slim-bullseye 10 | 11 | # Set labels based on the Open Containers Initiative (OCI): 12 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys 13 | # 14 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway" 15 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/dask-gateway-server/Dockerfile" 16 | 17 | # Install tini and upgrade linux packages are updated to patch known 18 | # vulnerabilities. 19 | RUN apt-get update \ 20 | && apt-get upgrade -y \ 21 | && apt-get install -y \ 22 | tini \ 23 | && rm -rf /var/lib/apt/lists/* 24 | 25 | # Create a non-root user to run as 26 | RUN useradd --create-home --user-group --uid 1000 dask 27 | USER dask:dask 28 | ENV PATH=/home/dask/.local/bin:$PATH 29 | WORKDIR /home/dask/ 30 | 31 | # Install dask-gateway-server 32 | # 33 | # The Golang proxy binary isn't built as the dask-gateway Helm chart relies on 34 | # Traefik as a proxy instead to run in its dedicated pod. 35 | # 36 | COPY --chown=dask:dask . /opt/dask-gateway-server 37 | RUN DASK_GATEWAY_SERVER__NO_PROXY=true pip install --no-cache-dir \ 38 | -r /opt/dask-gateway-server/Dockerfile.requirements.txt 39 | 40 | ENTRYPOINT ["tini", "-g", "--"] 41 | CMD ["dask-gateway-server", "--config", "/etc/dask-gateway/dask_gateway_config.py"] 42 | -------------------------------------------------------------------------------- /dask-gateway-server/Dockerfile.requirements.in: -------------------------------------------------------------------------------- 1 | # These are the requirements we know we want to install in the Dockerfile, and 2 | # then we freeze them ahead of time to provide a clear description of the 3 | # dependencies we have installed. 4 | # 5 | 6 | # Install dask-gateway-server[kubernetes], which is the only thing needed for 7 | # our CI test suite. 8 | # 9 | .[kubernetes] 10 | -------------------------------------------------------------------------------- /dask-gateway-server/Dockerfile.requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.13 3 | # by the following command: 4 | # 5 | # Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml 6 | # 7 | aiohappyeyeballs==2.6.1 8 | # via aiohttp 9 | aiohttp==3.11.16 10 | # via 11 | # dask-gateway-server 12 | # kubernetes-asyncio 13 | aiosignal==1.3.2 14 | # via aiohttp 15 | attrs==25.3.0 16 | # via aiohttp 17 | certifi==2025.1.31 18 | # via kubernetes-asyncio 19 | cffi==1.17.1 20 | # via cryptography 21 | colorlog==6.9.0 22 | # via dask-gateway-server 23 | cryptography==44.0.2 24 | # via dask-gateway-server 25 | dask-gateway-server @ file:///opt/dask-gateway-server 26 | # via file:///opt/dask-gateway-server 27 | frozenlist==1.5.0 28 | # via 29 | # aiohttp 30 | # aiosignal 31 | idna==3.10 32 | # via yarl 33 | kubernetes-asyncio==32.3.0 34 | # via dask-gateway-server 35 | multidict==6.4.2 36 | # via 37 | # aiohttp 38 | # yarl 39 | propcache==0.3.1 40 | # via 41 | # aiohttp 42 | # yarl 43 | pycparser==2.22 44 | # via cffi 45 | python-dateutil==2.9.0.post0 46 | # via kubernetes-asyncio 47 | pyyaml==6.0.2 48 | # via kubernetes-asyncio 49 | six==1.17.0 50 | # via 51 | # kubernetes-asyncio 52 | # python-dateutil 53 | traitlets==5.14.3 54 | # via dask-gateway-server 55 | urllib3==2.3.0 56 | # via kubernetes-asyncio 57 | yarl==1.19.0 58 | # via aiohttp 59 | -------------------------------------------------------------------------------- /dask-gateway-server/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Jim Crist-Harif 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /dask-gateway-server/README.rst: -------------------------------------------------------------------------------- 1 | dask-gateway-server 2 | =================== 3 | 4 | A multi-tenant server for securely deploying and managing Dask clusters. See 5 | `the documentation `__ for more information. 6 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/README.md: -------------------------------------------------------------------------------- 1 | ## dask-gateway-proxy 2 | 3 | A configurable TLS proxy, that dispatches to different routes based on the 4 | connection's [Server Name 5 | Indication](https://en.wikipedia.org/wiki/Server_Name_Indication). Routes can 6 | be added and removed at runtime using the provided REST API. 7 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dask/dask-gateway/dask-gateway-proxy 2 | 3 | go 1.22 4 | 5 | require github.com/stretchr/testify v1.10.0 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/internal/logging/logging.go: -------------------------------------------------------------------------------- 1 | // Our tiny custom logging framework. 2 | // 3 | // Provides common log levels, and quick functions for formatting and writing 4 | // output at those levels. 5 | package logging 6 | 7 | import ( 8 | "fmt" 9 | "io" 10 | "os" 11 | "sync" 12 | "time" 13 | ) 14 | 15 | type LogLevel int8 16 | 17 | const ( 18 | ERROR LogLevel = iota - 1 19 | WARN 20 | INFO 21 | DEBUG 22 | ) 23 | 24 | func ParseLevel(s string) LogLevel { 25 | switch s { 26 | case "ERROR", "error": 27 | return ERROR 28 | case "WARN", "warn": 29 | return WARN 30 | case "INFO", "info": 31 | return INFO 32 | case "DEBUG", "debug": 33 | return DEBUG 34 | } 35 | panic("Couldn't parse log level " + s) 36 | } 37 | 38 | func (l LogLevel) Char() byte { 39 | switch l { 40 | case ERROR: 41 | return 'E' 42 | case WARN: 43 | return 'W' 44 | case INFO: 45 | return 'I' 46 | case DEBUG: 47 | return 'D' 48 | } 49 | return '?' 50 | } 51 | 52 | type Logger struct { 53 | sync.Mutex 54 | Name string 55 | Level LogLevel 56 | Out io.Writer 57 | Buf []byte 58 | } 59 | 60 | func NewLogger(name string, level LogLevel) *Logger { 61 | return &Logger{Name: name, Level: level, Out: os.Stderr} 62 | } 63 | 64 | func (l *Logger) logMsg(level LogLevel, msg string) { 65 | if l.Level >= level { 66 | now := time.Now() // get this early. 67 | l.Lock() 68 | defer l.Unlock() 69 | l.Buf = l.Buf[:0] 70 | l.Buf = append(l.Buf, '[') 71 | l.Buf = append(l.Buf, level.Char()) 72 | l.Buf = append(l.Buf, ' ') 73 | l.Buf = now.AppendFormat(l.Buf, "2006-01-02 15:04:05.000") 74 | l.Buf = append(l.Buf, ' ') 75 | l.Buf = append(l.Buf, l.Name...) 76 | l.Buf = append(l.Buf, "] "...) 77 | l.Buf = append(l.Buf, msg...) 78 | l.Buf = append(l.Buf, '\n') 79 | l.Out.Write(l.Buf) 80 | } 81 | } 82 | 83 | func (l *Logger) logF(level LogLevel, format string, args ...interface{}) { 84 | if l.Level >= level { 85 | l.logMsg(level, fmt.Sprintf(format, args...)) 86 | } 87 | } 88 | 89 | func (l *Logger) Debug(msg string) { 90 | l.logMsg(DEBUG, msg) 91 | } 92 | 93 | func (l *Logger) Debugf(format string, args ...interface{}) { 94 | l.logF(DEBUG, format, args...) 95 | } 96 | 97 | func (l *Logger) Info(msg string) { 98 | l.logMsg(INFO, msg) 99 | } 100 | 101 | func (l *Logger) Infof(format string, args ...interface{}) { 102 | l.logF(INFO, format, args...) 103 | } 104 | 105 | func (l *Logger) Warn(msg string) { 106 | l.logMsg(WARN, msg) 107 | } 108 | 109 | func (l *Logger) Warnf(format string, args ...interface{}) { 110 | l.logF(WARN, format, args...) 111 | } 112 | 113 | func (l *Logger) Error(msg string) { 114 | l.logMsg(ERROR, msg) 115 | } 116 | 117 | func (l *Logger) Errorf(format string, args ...interface{}) { 118 | l.logF(ERROR, format, args...) 119 | } 120 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/pkg/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "encoding/json" 5 | "net/url" 6 | "strings" 7 | ) 8 | 9 | func normalizePath(path string) (string, int) { 10 | offset := 0 11 | if path == "" || path == "/" { 12 | return "", offset 13 | } 14 | if path[0] == '/' { 15 | path = path[1:] 16 | offset = 1 17 | } 18 | if path[len(path)-1] == '/' { 19 | path = path[:len(path)-1] 20 | } 21 | return path, offset 22 | } 23 | 24 | func getSegment(path string, start int) (segment string, next int) { 25 | if len(path) == 0 { 26 | return path, -1 27 | } 28 | end := strings.IndexRune(path[start:], '/') 29 | if end == -1 { 30 | return path[start:], -1 31 | } 32 | return path[start : start+end], start + end + 1 33 | } 34 | 35 | type Router struct { 36 | url *url.URL 37 | branches map[string]*Router 38 | } 39 | 40 | func (r *Router) isLeaf() bool { 41 | return len(r.branches) == 0 42 | } 43 | 44 | func NewRouter() *Router { 45 | return &Router{} 46 | } 47 | 48 | func (router *Router) HasMatch(path string) bool { 49 | if router.url != nil { 50 | return true 51 | } 52 | path, _ = normalizePath(path) 53 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) { 54 | router = router.branches[part] 55 | if router == nil { 56 | break 57 | } 58 | if router.url != nil { 59 | return true 60 | } 61 | if i == -1 { 62 | break 63 | } 64 | } 65 | return false 66 | } 67 | 68 | func (router *Router) Match(path string) (*url.URL, string) { 69 | path2, offset := normalizePath(path) 70 | node := router 71 | out := node.url 72 | n := 0 73 | offset2 := 0 74 | for { 75 | part, i := getSegment(path2, n) 76 | node = node.branches[part] 77 | if node == nil { 78 | break 79 | } 80 | if node.url != nil { 81 | out = node.url 82 | if i == -1 { 83 | offset2 = len(path2) 84 | } else { 85 | offset2 = i 86 | } 87 | } 88 | if i == -1 { 89 | break 90 | } 91 | n = i 92 | } 93 | if out == nil { 94 | return nil, "" 95 | } 96 | return out, path[offset+offset2:] 97 | } 98 | 99 | func (router *Router) Put(path string, url *url.URL) { 100 | path, _ = normalizePath(path) 101 | if path == "" { 102 | router.url = url 103 | return 104 | } 105 | node := router 106 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) { 107 | child, _ := node.branches[part] 108 | if child == nil { 109 | child = NewRouter() 110 | if node.branches == nil { 111 | node.branches = make(map[string]*Router) 112 | } 113 | node.branches[part] = child 114 | } 115 | node = child 116 | if i == -1 { 117 | break 118 | } 119 | } 120 | node.url = url 121 | } 122 | 123 | func (router *Router) Delete(path string) { 124 | path, _ = normalizePath(path) 125 | 126 | if path == "" { 127 | // Handle root node 128 | router.url = nil 129 | return 130 | } 131 | 132 | type record struct { 133 | node *Router 134 | part string 135 | } 136 | 137 | var paths []record 138 | node := router 139 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) { 140 | paths = append(paths, record{part: part, node: node}) 141 | node = node.branches[part] 142 | if node == nil { 143 | return 144 | } 145 | if i == -1 { 146 | break 147 | } 148 | } 149 | node.url = nil 150 | if node.isLeaf() { 151 | for i := len(paths) - 1; i >= 0; i-- { 152 | parent := paths[i].node 153 | part := paths[i].part 154 | delete(parent.branches, part) 155 | // If completely empty, deallocate whole map 156 | if len(parent.branches) == 0 { 157 | parent.branches = nil 158 | } 159 | if parent.url != nil || !parent.isLeaf() { 160 | break 161 | } 162 | } 163 | } 164 | } 165 | 166 | func (r *Router) traverse(prefix string, f func(prefix string, value *url.URL)) { 167 | if r.url != nil { 168 | f(prefix, r.url) 169 | } 170 | prefix = prefix + "/" 171 | for path, node := range r.branches { 172 | node.traverse(prefix+path, f) 173 | } 174 | } 175 | 176 | func (r *Router) MarshalJSON() ([]byte, error) { 177 | out := make(map[string]string) 178 | r.traverse("", func(prefix string, value *url.URL) { 179 | out[prefix] = value.String() 180 | }) 181 | b, err := json.Marshal(out) 182 | if err != nil { 183 | return nil, err 184 | } 185 | return b, nil 186 | } 187 | -------------------------------------------------------------------------------- /dask-gateway-server/dask-gateway-proxy/pkg/sni/sni.go: -------------------------------------------------------------------------------- 1 | package sni 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "crypto/tls" 7 | "io" 8 | "net" 9 | ) 10 | 11 | // hideWriteTo is a workaround introduced to make the code functional in 1.22+, 12 | // where io.Copy would no longer make use of peekedTCPConn.Read after 13 | // net.TCPConn.WriteTo was added, so the workaround is to hide it again. 14 | // 15 | // The workaround was developed inspecting: 16 | // https://github.com/golang/go/commit/f664031bc17629080332a1c7bede38d67fd32e47 17 | // 18 | type hideWriteTo struct{} 19 | func (hideWriteTo) WriteTo(io.Writer) (int64, error) { 20 | panic("can't happen") 21 | } 22 | 23 | type TcpConn interface { 24 | net.Conn 25 | CloseWrite() error 26 | CloseRead() error 27 | } 28 | 29 | type peekedTCPConn struct { 30 | peeked []byte 31 | hideWriteTo 32 | *net.TCPConn 33 | } 34 | 35 | func (c *peekedTCPConn) Read(p []byte) (n int, err error) { 36 | if len(c.peeked) > 0 { 37 | n = copy(p, c.peeked) 38 | c.peeked = c.peeked[n:] 39 | if len(c.peeked) == 0 { 40 | c.peeked = nil 41 | } 42 | return n, nil 43 | } 44 | return c.TCPConn.Read(p) 45 | } 46 | 47 | func wrapPeeked(inConn *net.TCPConn, br *bufio.Reader) TcpConn { 48 | peeked, _ := br.Peek(br.Buffered()) 49 | return &peekedTCPConn{TCPConn: inConn, peeked: peeked} 50 | } 51 | 52 | type readonly struct { 53 | r io.Reader 54 | net.Conn 55 | } 56 | 57 | func (c readonly) Read(p []byte) (int, error) { return c.r.Read(p) } 58 | func (readonly) Write(p []byte) (int, error) { return 0, io.EOF } 59 | 60 | func ReadSNI(inConn *net.TCPConn) (string, bool, TcpConn, error) { 61 | br := bufio.NewReader(inConn) 62 | hdr, err := br.Peek(1) 63 | if err != nil { 64 | return "", false, nil, err 65 | } 66 | 67 | if hdr[0] != 0x16 { 68 | // Not a TLS handshake 69 | return "", false, wrapPeeked(inConn, br), nil 70 | } 71 | 72 | const headerLen = 5 73 | hdr, err = br.Peek(headerLen) 74 | if err != nil { 75 | return "", false, wrapPeeked(inConn, br), nil 76 | } 77 | 78 | recLen := int(hdr[3])<<8 | int(hdr[4]) 79 | helloBytes, err := br.Peek(headerLen + recLen) 80 | if err != nil { 81 | return "", true, wrapPeeked(inConn, br), nil 82 | } 83 | 84 | sni := "" 85 | server := tls.Server(readonly{r: bytes.NewReader(helloBytes)}, &tls.Config{ 86 | GetConfigForClient: func(hello *tls.ClientHelloInfo) (*tls.Config, error) { 87 | sni = hello.ServerName 88 | return nil, nil 89 | }, 90 | }) 91 | server.Handshake() 92 | 93 | return sni, true, wrapPeeked(inConn, br), nil 94 | } 95 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import __version__ 2 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/__main__.py: -------------------------------------------------------------------------------- 1 | from .app import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "2025.4.1-0.dev" 2 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Backend, ClusterConfig 2 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/inprocess.py: -------------------------------------------------------------------------------- 1 | from dask_gateway.scheduler_preload import GatewaySchedulerService, make_gateway_client 2 | from distributed import Scheduler, Security, Worker 3 | from distributed.core import Status 4 | from distributed.utils import TimeoutError 5 | 6 | from .local import UnsafeLocalBackend 7 | 8 | __all__ = ("InProcessBackend",) 9 | 10 | 11 | class InProcessBackend(UnsafeLocalBackend): 12 | """A backend that runs everything in the same process""" 13 | 14 | def get_security(self, cluster): 15 | cert_path, key_path = self.get_tls_paths(cluster) 16 | return Security( 17 | tls_ca_file=cert_path, 18 | tls_scheduler_cert=cert_path, 19 | tls_scheduler_key=key_path, 20 | tls_worker_cert=cert_path, 21 | tls_worker_key=key_path, 22 | ) 23 | 24 | def get_gateway_client(self, cluster): 25 | return make_gateway_client( 26 | cluster_name=cluster.name, api_token=cluster.token, api_url=self.api_url 27 | ) 28 | 29 | def _check_status(self, objs, mapping): 30 | out = [] 31 | for x in objs: 32 | x = mapping.get(x.name) 33 | ok = x is not None and not x.status != Status.closed 34 | out.append(ok) 35 | return out 36 | 37 | async def do_setup(self): 38 | self.schedulers = {} 39 | self.workers = {} 40 | 41 | async def do_start_cluster(self, cluster): 42 | workdir = self.setup_working_directory(cluster) 43 | yield {"workdir": workdir} 44 | 45 | security = self.get_security(cluster) 46 | gateway_client = self.get_gateway_client(cluster) 47 | 48 | self.schedulers[cluster.name] = scheduler = Scheduler( 49 | protocol="tls", 50 | host="127.0.0.1", 51 | port=0, 52 | dashboard_address="127.0.0.1:0", 53 | security=security, 54 | services={ 55 | ("gateway", ":0"): ( 56 | GatewaySchedulerService, 57 | { 58 | "gateway": gateway_client, 59 | "heartbeat_period": self.cluster_heartbeat_period, 60 | "adaptive_period": cluster.config.adaptive_period, 61 | "idle_timeout": cluster.config.idle_timeout, 62 | }, 63 | ) 64 | }, 65 | ) 66 | await scheduler 67 | yield {"workdir": workdir, "started": True} 68 | 69 | async def do_stop_cluster(self, cluster): 70 | scheduler = self.schedulers.pop(cluster.name) 71 | 72 | await scheduler.close() 73 | scheduler.stop() 74 | 75 | workdir = cluster.state.get("workdir") 76 | if workdir is not None: 77 | self.cleanup_working_directory(workdir) 78 | 79 | async def do_check_clusters(self, clusters): 80 | return self._check_status(clusters, self.schedulers) 81 | 82 | async def do_start_worker(self, worker): 83 | security = self.get_security(worker.cluster) 84 | workdir = worker.cluster.state["workdir"] 85 | self.workers[worker.name] = worker = Worker( 86 | worker.cluster.scheduler_address, 87 | nthreads=worker.cluster.config.worker_threads, 88 | memory_limit=0, 89 | security=security, 90 | name=worker.name, 91 | local_directory=workdir, 92 | ) 93 | await worker 94 | yield {"started": True} 95 | 96 | async def do_stop_worker(self, worker): 97 | worker = self.workers.pop(worker.name, None) 98 | if worker is None: 99 | return 100 | try: 101 | await worker.close(timeout=1) 102 | except TimeoutError: 103 | pass 104 | 105 | async def do_check_workers(self, workers): 106 | return self._check_status(workers, self.workers) 107 | 108 | async def worker_status(self, worker_name, worker_state, cluster_state): 109 | worker = self.workers.get(worker_name) 110 | if worker is None: 111 | return False 112 | return not worker.status != Status.closed 113 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/jobqueue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/dask-gateway-server/dask_gateway_server/backends/jobqueue/__init__.py -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/jobqueue/launcher.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import subprocess 5 | import sys 6 | 7 | 8 | def finish(**kwargs): 9 | json.dump(kwargs, sys.stdout) 10 | sys.stdout.flush() 11 | 12 | 13 | def run_command(cmd, env, stdin=None): 14 | if stdin is not None: 15 | stdin = stdin.encode("utf8") 16 | STDIN = subprocess.PIPE 17 | else: 18 | STDIN = None 19 | 20 | proc = subprocess.Popen( 21 | cmd, 22 | env=env, 23 | cwd=os.path.expanduser("~"), 24 | stdout=subprocess.PIPE, 25 | stderr=subprocess.PIPE, 26 | stdin=STDIN, 27 | ) 28 | 29 | stdout, stderr = proc.communicate(stdin) 30 | 31 | finish( 32 | ok=True, 33 | returncode=proc.returncode, 34 | stdout=stdout.decode("utf8", "replace"), 35 | stderr=stderr.decode("utf8", "replace"), 36 | ) 37 | 38 | 39 | def start(cmd, env, stdin=None, staging_dir=None, files=None): 40 | if staging_dir: 41 | try: 42 | os.makedirs(staging_dir, mode=0o700, exist_ok=False) 43 | for name, value in files.items(): 44 | with open(os.path.join(staging_dir, name), "w") as f: 45 | f.write(value) 46 | except Exception as exc: 47 | finish( 48 | ok=False, 49 | error=f"Error setting up staging directory {staging_dir}: {exc}", 50 | ) 51 | return 52 | run_command(cmd, env, stdin=stdin) 53 | 54 | 55 | def stop(cmd, env, staging_dir=None): 56 | if staging_dir: 57 | if not os.path.exists(staging_dir): 58 | return 59 | try: 60 | shutil.rmtree(staging_dir) 61 | except Exception as exc: 62 | finish( 63 | ok=False, 64 | error=f"Error removing staging directory {staging_dir}: {exc}", 65 | ) 66 | return 67 | run_command(cmd, env) 68 | 69 | 70 | def main(): 71 | try: 72 | kwargs = json.load(sys.stdin) 73 | except ValueError as exc: 74 | finish(ok=False, error=str(exc)) 75 | return 76 | 77 | action = kwargs.pop("action", None) 78 | if action == "start": 79 | start(**kwargs) 80 | elif action == "stop": 81 | stop(**kwargs) 82 | else: 83 | finish(ok=False, error="Valid actions are 'start' and 'stop'") 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/jobqueue/slurm.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import shutil 4 | 5 | from traitlets import Unicode, default 6 | 7 | from ...traitlets import Type 8 | from .base import JobQueueBackend, JobQueueClusterConfig 9 | 10 | __all__ = ("SlurmBackend", "SlurmClusterConfig") 11 | 12 | 13 | def slurm_format_memory(n): 14 | """Format memory in bytes for use with slurm.""" 15 | if n >= 10 * (1024**3): 16 | return "%dG" % math.ceil(n / (1024**3)) 17 | if n >= 10 * (1024**2): 18 | return "%dM" % math.ceil(n / (1024**2)) 19 | if n >= 10 * 1024: 20 | return "%dK" % math.ceil(n / 1024) 21 | return "1K" 22 | 23 | 24 | class SlurmClusterConfig(JobQueueClusterConfig): 25 | """Dask cluster configuration options when running on SLURM""" 26 | 27 | partition = Unicode("", help="The partition to submit jobs to.", config=True) 28 | 29 | qos = Unicode("", help="QOS string associated with each job.", config=True) 30 | 31 | account = Unicode("", help="Account string associated with each job.", config=True) 32 | 33 | 34 | class SlurmBackend(JobQueueBackend): 35 | """A backend for deploying Dask on a Slurm cluster.""" 36 | 37 | cluster_config_class = Type( 38 | "dask_gateway_server.backends.jobqueue.slurm.SlurmClusterConfig", 39 | klass="dask_gateway_server.backends.base.ClusterConfig", 40 | help="The cluster config class to use", 41 | config=True, 42 | ) 43 | 44 | @default("submit_command") 45 | def _default_submit_command(self): 46 | return shutil.which("sbatch") or "sbatch" 47 | 48 | @default("cancel_command") 49 | def _default_cancel_command(self): 50 | return shutil.which("scancel") or "scancel" 51 | 52 | @default("status_command") 53 | def _default_status_command(self): 54 | return shutil.which("squeue") or "squeue" 55 | 56 | def get_submit_cmd_env_stdin(self, cluster, worker=None): 57 | cmd = [self.submit_command, "--parsable"] 58 | cmd.append("--job-name=dask-gateway") 59 | if cluster.config.partition: 60 | cmd.append("--partition=" + cluster.config.partition) 61 | if cluster.config.account: 62 | cmd.append("--account=" + cluster.config.account) 63 | if cluster.config.qos: 64 | cmd.append("--qos=" + cluster.config.qos) 65 | 66 | if worker: 67 | cpus = cluster.config.worker_cores 68 | mem = slurm_format_memory(cluster.config.worker_memory) 69 | log_file = "dask-worker-%s.log" % worker.name 70 | script = "\n".join( 71 | [ 72 | "#!/bin/sh", 73 | cluster.config.worker_setup, 74 | " ".join(self.get_worker_command(cluster, worker.name)), 75 | ] 76 | ) 77 | env = self.get_worker_env(cluster) 78 | else: 79 | cpus = cluster.config.scheduler_cores 80 | mem = slurm_format_memory(cluster.config.scheduler_memory) 81 | log_file = "dask-scheduler-%s.log" % cluster.name 82 | script = "\n".join( 83 | [ 84 | "#!/bin/sh", 85 | cluster.config.scheduler_setup, 86 | " ".join(self.get_scheduler_command(cluster)), 87 | ] 88 | ) 89 | env = self.get_scheduler_env(cluster) 90 | 91 | staging_dir = self.get_staging_directory(cluster) 92 | 93 | cmd.extend( 94 | [ 95 | "--chdir=" + staging_dir, 96 | "--output=" + os.path.join(staging_dir, log_file), 97 | "--cpus-per-task=%d" % cpus, 98 | "--mem=%s" % mem, 99 | "--export=%s" % (",".join(sorted(env))), 100 | ] 101 | ) 102 | 103 | return cmd, env, script 104 | 105 | def get_stop_cmd_env(self, job_id): 106 | return [self.cancel_command, job_id], {} 107 | 108 | def get_status_cmd_env(self, job_ids): 109 | cmd = [self.status_command, "-h", "--job=%s" % ",".join(job_ids), "-o", "%i %t"] 110 | return cmd, {} 111 | 112 | def parse_job_states(self, stdout): 113 | states = {} 114 | for l in stdout.splitlines(): 115 | job_id, state = l.split() 116 | states[job_id] = state in ("R", "CG", "PD", "CF") 117 | return states 118 | 119 | def parse_job_id(self, stdout): 120 | return stdout.strip() 121 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/backends/kubernetes/__init__.py: -------------------------------------------------------------------------------- 1 | from .backend import KubeBackend, KubeClusterConfig 2 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import Proxy 2 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/tls.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta, timezone 2 | 3 | from cryptography import x509 4 | from cryptography.hazmat.backends import default_backend 5 | from cryptography.hazmat.primitives import hashes, serialization 6 | from cryptography.hazmat.primitives.asymmetric import rsa 7 | from cryptography.x509.oid import NameOID 8 | 9 | 10 | def new_keypair(sni): 11 | """Create a new self-signed certificate & key pair with the given SNI. 12 | 13 | Parameters 14 | ---------- 15 | sni : str 16 | The SNI name to use. 17 | 18 | Returns 19 | ------- 20 | cert_bytes : bytes 21 | key_bytes : bytes 22 | """ 23 | key = rsa.generate_private_key( 24 | public_exponent=65537, key_size=2048, backend=default_backend() 25 | ) 26 | key_bytes = key.private_bytes( 27 | encoding=serialization.Encoding.PEM, 28 | format=serialization.PrivateFormat.PKCS8, 29 | encryption_algorithm=serialization.NoEncryption(), 30 | ) 31 | 32 | dask_internal = x509.Name( 33 | [x509.NameAttribute(NameOID.COMMON_NAME, "dask-internal")] 34 | ) 35 | altnames = x509.SubjectAlternativeName( 36 | [ 37 | x509.DNSName(sni), 38 | x509.DNSName("dask-internal"), 39 | # allow skein appmaster and dask to share credentials 40 | x509.DNSName("skein-internal"), 41 | ] 42 | ) 43 | now = datetime.now(timezone.utc) 44 | cert = ( 45 | x509.CertificateBuilder() 46 | .subject_name(dask_internal) 47 | .issuer_name(dask_internal) 48 | .add_extension(altnames, critical=False) 49 | .public_key(key.public_key()) 50 | .serial_number(x509.random_serial_number()) 51 | .not_valid_before(now) 52 | .not_valid_after(now + timedelta(days=365)) 53 | .sign(key, hashes.SHA256(), default_backend()) 54 | ) 55 | 56 | cert_bytes = cert.public_bytes(serialization.Encoding.PEM) 57 | 58 | return cert_bytes, key_bytes 59 | -------------------------------------------------------------------------------- /dask-gateway-server/dask_gateway_server/traitlets.py: -------------------------------------------------------------------------------- 1 | from traitlets import Integer, List, TraitError, TraitType 2 | from traitlets import Type as _Type 3 | from traitlets import Unicode 4 | from traitlets.config import Application 5 | 6 | # We replace the class of the default formatter used via a configuration change. 7 | # 8 | # References: 9 | # 10 | # - Traitlets' Application.logging_config defaults: 11 | # https://github.com/ipython/traitlets/blob/e2c731ef72dd41d4be527d4d93dd87ccc409830d/traitlets/config/application.py#L229-L256 12 | # - Python official schema for Application.logging_config: 13 | # https://docs.python.org/3/library/logging.config.html#logging-config-dictschema 14 | # 15 | Application.logging_config = { 16 | "formatters": { 17 | "console": { 18 | "class": "dask_gateway_server.utils.LogFormatter", 19 | }, 20 | }, 21 | } 22 | Application.log_level.default_value = "INFO" 23 | Application.log_format.default_value = ( 24 | "%(log_color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d " 25 | "%(name)s]%(reset)s %(message)s" 26 | ) 27 | 28 | 29 | # Adapted from JupyterHub 30 | class MemoryLimit(Integer): 31 | """A specification of a memory limit, with optional units. 32 | 33 | Supported units are: 34 | - K -> Kibibytes 35 | - M -> Mebibytes 36 | - G -> Gibibytes 37 | - T -> Tebibytes 38 | """ 39 | 40 | UNIT_SUFFIXES = {"K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} 41 | 42 | def validate(self, obj, value): 43 | if isinstance(value, (int, float)): 44 | return int(value) 45 | 46 | try: 47 | num = float(value[:-1]) 48 | except ValueError: 49 | raise TraitError( 50 | "{val} is not a valid memory specification. Must be an int or " 51 | "a string with suffix K, M, G, T".format(val=value) 52 | ) 53 | suffix = value[-1] 54 | 55 | if suffix not in self.UNIT_SUFFIXES: 56 | raise TraitError( 57 | "{val} is not a valid memory specification. Must be an int or " 58 | "a string with suffix K, M, G, T".format(val=value) 59 | ) 60 | return int(float(num) * self.UNIT_SUFFIXES[suffix]) 61 | 62 | 63 | class Callable(TraitType): 64 | """A trait which is callable""" 65 | 66 | info_text = "a callable" 67 | 68 | def validate(self, obj, value): 69 | if callable(value): 70 | return value 71 | else: 72 | self.error(obj, value) 73 | 74 | 75 | class Type(_Type): 76 | """An implementation of `Type` with better errors""" 77 | 78 | def validate(self, obj, value): 79 | if isinstance(value, str): 80 | try: 81 | value = self._resolve_string(value) 82 | except ImportError as exc: 83 | raise TraitError( 84 | "Failed to import %r for trait '%s.%s':\n\n%s" 85 | % (value, type(obj).__name__, self.name, exc) 86 | ) 87 | return super().validate(obj, value) 88 | 89 | 90 | class Command(List): 91 | """Traitlet for a command that should be a list of strings, 92 | but allows it to be specified as a single string. 93 | """ 94 | 95 | def __init__(self, default_value=None, **kwargs): 96 | kwargs.setdefault("minlen", 1) 97 | if isinstance(default_value, str): 98 | default_value = [default_value] 99 | super().__init__(Unicode(), default_value, **kwargs) 100 | 101 | def validate(self, obj, value): 102 | if isinstance(value, str): 103 | value = [value] 104 | return super().validate(obj, value) 105 | -------------------------------------------------------------------------------- /dask-gateway-server/pyproject.toml: -------------------------------------------------------------------------------- 1 | # build-system 2 | # - ref: https://peps.python.org/pep-0517/ 3 | # 4 | [build-system] 5 | requires = ["hatchling"] 6 | build-backend = "hatchling.build" 7 | 8 | # project 9 | # - ref 1: https://peps.python.org/pep-0621/ 10 | # - ref 2: https://hatch.pypa.io/latest/config/metadata/#project-metadata 11 | # 12 | [project] 13 | name = "dask-gateway-server" 14 | version = "2025.4.1-0.dev" 15 | description = "A multi-tenant server for securely deploying and managing multiple Dask clusters." 16 | readme = "README.rst" 17 | requires-python = ">=3.10" 18 | license = {file = "LICENSE"} 19 | keywords = ["dask", "hadoop", "kubernetes", "HPC", "distributed", "cluster"] 20 | authors = [ 21 | {name = "Jim Crist-Harif", email = "jcristharif@gmail.com"}, 22 | ] 23 | classifiers = [ 24 | "Development Status :: 5 - Production/Stable", 25 | "License :: OSI Approved :: BSD License", 26 | "Intended Audience :: Developers", 27 | "Intended Audience :: Science/Research", 28 | "Intended Audience :: System Administrators", 29 | "Topic :: Scientific/Engineering", 30 | "Topic :: System :: Distributed Computing", 31 | "Topic :: System :: Systems Administration", 32 | "Programming Language :: Python :: 3", 33 | ] 34 | dependencies = [ 35 | "aiohttp>=3.9.0", 36 | "async-timeout ; python_version < '3.11'", 37 | "colorlog", 38 | "cryptography", 39 | "traitlets>=5.2.2.post1", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | kerberos = [ 44 | # pykerberos is tricky to install and requires a system package to 45 | # successfully compile some C code, on ubuntu this is libkrb5-dev. 46 | "pykerberos", 47 | ] 48 | jobqueue = ["sqlalchemy>=2.0.0"] 49 | local = ["sqlalchemy>=2.0.0"] 50 | yarn = [ 51 | "sqlalchemy>=2.0.0", 52 | "skein>=0.7.3", 53 | # FIXME: protobuf is a dependency for skein, and is being held back here for 54 | # now due to a error description reported in 55 | # https://github.com/jcrist/skein/issues/255 56 | # 57 | "protobuf<3.21", 58 | ] 59 | kubernetes = ["kubernetes_asyncio"] 60 | all_backends = [ 61 | "sqlalchemy>=2.0.0", 62 | "skein>=0.7.3", 63 | "protobuf<3.21", 64 | "kubernetes_asyncio", 65 | ] 66 | 67 | [project.urls] 68 | Documentation = "https://gateway.dask.org/" 69 | Source = "https://github.com/dask/dask-gateway/" 70 | Issues = "https://github.com/dask/dask-gateway/issues" 71 | 72 | [project.scripts] 73 | dask-gateway-server = "dask_gateway_server.app:main" 74 | dask-gateway-jobqueue-launcher = "dask_gateway_server.backends.jobqueue.launcher:main" 75 | 76 | 77 | # Refs: 78 | # - https://hatch.pypa.io/latest/plugins/build-hook/custom/#pyprojecttoml 79 | # - https://hatch.pypa.io/1.3/plugins/build-hook/reference/#hatchling.builders.hooks.plugin.interface.BuildHookInterface.clean 80 | # - https://github.com/ofek/hatch-mypyc/blob/master/hatch_mypyc/plugin.py 81 | # 82 | [tool.hatch.build.hooks.custom] 83 | path = "hatch_build.py" 84 | 85 | [tool.hatch.build] 86 | include = [ 87 | "**/*.py", 88 | "dask-gateway-proxy/**", 89 | ] 90 | -------------------------------------------------------------------------------- /dask-gateway/.dockerignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | *.pyc 4 | .eggs/ 5 | *.egg-info 6 | .cache/ 7 | .pytest_cache/ 8 | -------------------------------------------------------------------------------- /dask-gateway/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | -------------------------------------------------------------------------------- /dask-gateway/Dockerfile: -------------------------------------------------------------------------------- 1 | # PURPOSE: 2 | # 3 | # This Dockerfile and image, ghcr.io/dask/dask-gateway, is used by the 4 | # dask-gateway Helm chart. It acts as the sample image for scheduler and workers 5 | # in Dask Clusters created by end users. 6 | # 7 | # The admin installing the dask-gateway Helm chart or its end users are meant to 8 | # specify an image for the scheduler and worker pods to use that meets their 9 | # needs for the Dask clusters they startup. Please build your own according to 10 | # the documentation if this very limited image doesn't meet your needs. 11 | # 12 | # See https://gateway.dask.org/install-kube.html#using-a-custom-image. 13 | # 14 | 15 | 16 | # The build stage 17 | # --------------- 18 | # This stage is building Python wheels for use in later stages by using a base 19 | # image that has more pre-requisites to do so, such as a C++ compiler. 20 | # 21 | # psutils, a dependency of distributed, is currently the sole reason we have to 22 | # have this build stage. 23 | # 24 | FROM python:3.13-bullseye as build-stage 25 | 26 | # Build wheels 27 | # 28 | # We set pip's cache directory and expose it across build stages via an 29 | # ephemeral docker cache (--mount=type=cache,target=${PIP_CACHE_DIR}). 30 | # 31 | COPY . /opt/dask-gateway 32 | ARG PIP_CACHE_DIR=/tmp/pip-cache 33 | RUN --mount=type=cache,target=${PIP_CACHE_DIR} \ 34 | pip install build \ 35 | && pip wheel \ 36 | --wheel-dir=/tmp/wheels \ 37 | -r /opt/dask-gateway/Dockerfile.requirements.txt 38 | 39 | 40 | # The final stage 41 | # --------------- 42 | # 43 | FROM python:3.13-slim-bullseye as slim-stage 44 | 45 | # Set labels based on the Open Containers Initiative (OCI): 46 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys 47 | # 48 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway" 49 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/dask-gateway/Dockerfile" 50 | 51 | # Install tini and update linux packages to patch known vulnerabilities. 52 | RUN apt-get update \ 53 | && apt-get upgrade -y \ 54 | && apt-get install -y \ 55 | tini \ 56 | && rm -rf /var/lib/apt/lists/* 57 | 58 | # Create a non-root user to run as 59 | RUN useradd --create-home --user-group --uid 1000 dask 60 | USER dask:dask 61 | ENV PATH=/home/dask/.local/bin:$PATH 62 | WORKDIR /home/dask/ 63 | 64 | # Install dask-gateway 65 | COPY --chown=dask:dask . /opt/dask-gateway 66 | ARG PIP_CACHE_DIR=/tmp/pip-cache 67 | RUN --mount=type=cache,target=${PIP_CACHE_DIR} \ 68 | --mount=type=cache,from=build-stage,source=/tmp/wheels,target=/tmp/wheels \ 69 | pip install \ 70 | --find-links=/tmp/wheels/ \ 71 | -r /opt/dask-gateway/Dockerfile.requirements.txt 72 | 73 | # Only set ENTRYPOINT, CMD is configured at runtime by dask-gateway-server 74 | ENTRYPOINT ["tini", "-g", "--"] 75 | -------------------------------------------------------------------------------- /dask-gateway/Dockerfile.requirements.in: -------------------------------------------------------------------------------- 1 | # These are the requirements we know we want to install in the Dockerfile, and 2 | # then we freeze them ahead of time to provide a clear description of the 3 | # dependencies we have installed. 4 | # 5 | 6 | # Install dask-gateway, which is the only thing needed for our CI test suite. 7 | . 8 | 9 | # We also install the bare minimum to provide end users with a primitive 10 | # end-to-end demonstrative test doing work in the worker pods and accessing the 11 | # scheduler dashboard without changing the image. 12 | # 13 | # - bokeh is required by the scheduler pod to present dashbaords. 14 | # - numpy is required for running a basic computation test: 15 | # https://gateway.dask.org/usage.html#run-computations-on-the-cluster 16 | # 17 | bokeh 18 | numpy 19 | -------------------------------------------------------------------------------- /dask-gateway/Dockerfile.requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.13 3 | # by the following command: 4 | # 5 | # Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml 6 | # 7 | aiohappyeyeballs==2.6.1 8 | # via aiohttp 9 | aiohttp==3.11.16 10 | # via dask-gateway 11 | aiosignal==1.3.2 12 | # via aiohttp 13 | attrs==25.3.0 14 | # via aiohttp 15 | bokeh==3.7.2 16 | # via -r Dockerfile.requirements.in 17 | click==8.1.8 18 | # via 19 | # dask 20 | # dask-gateway 21 | # distributed 22 | cloudpickle==3.1.1 23 | # via 24 | # dask 25 | # distributed 26 | contourpy==1.3.1 27 | # via bokeh 28 | dask==2025.3.0 29 | # via 30 | # dask-gateway 31 | # distributed 32 | dask-gateway @ file:///opt/dask-gateway 33 | # via -r Dockerfile.requirements.in 34 | distributed==2025.3.0 35 | # via dask-gateway 36 | frozenlist==1.5.0 37 | # via 38 | # aiohttp 39 | # aiosignal 40 | fsspec==2025.3.2 41 | # via dask 42 | idna==3.10 43 | # via yarl 44 | jinja2==3.1.6 45 | # via 46 | # bokeh 47 | # distributed 48 | locket==1.0.0 49 | # via 50 | # distributed 51 | # partd 52 | markupsafe==3.0.2 53 | # via jinja2 54 | msgpack==1.1.0 55 | # via distributed 56 | multidict==6.4.2 57 | # via 58 | # aiohttp 59 | # yarl 60 | narwhals==1.34.1 61 | # via bokeh 62 | numpy==2.2.4 63 | # via 64 | # -r Dockerfile.requirements.in 65 | # bokeh 66 | # contourpy 67 | # pandas 68 | packaging==24.2 69 | # via 70 | # bokeh 71 | # dask 72 | # distributed 73 | pandas==2.2.3 74 | # via bokeh 75 | partd==1.4.2 76 | # via dask 77 | pillow==11.1.0 78 | # via bokeh 79 | propcache==0.3.1 80 | # via 81 | # aiohttp 82 | # yarl 83 | psutil==7.0.0 84 | # via distributed 85 | python-dateutil==2.9.0.post0 86 | # via pandas 87 | pytz==2025.2 88 | # via pandas 89 | pyyaml==6.0.2 90 | # via 91 | # bokeh 92 | # dask 93 | # dask-gateway 94 | # distributed 95 | six==1.17.0 96 | # via python-dateutil 97 | sortedcontainers==2.4.0 98 | # via distributed 99 | tblib==3.1.0 100 | # via distributed 101 | toolz==1.0.0 102 | # via 103 | # dask 104 | # distributed 105 | # partd 106 | tornado==6.4.2 107 | # via 108 | # bokeh 109 | # dask-gateway 110 | # distributed 111 | tzdata==2025.2 112 | # via pandas 113 | urllib3==2.3.0 114 | # via distributed 115 | xyzservices==2025.1.0 116 | # via bokeh 117 | yarl==1.19.0 118 | # via aiohttp 119 | zict==3.0.0 120 | # via distributed 121 | -------------------------------------------------------------------------------- /dask-gateway/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Jim Crist-Harif 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /dask-gateway/README.rst: -------------------------------------------------------------------------------- 1 | dask-gateway 2 | ============ 3 | 4 | A client library for interacting with a dask-gateway server. See `the 5 | documentation `__ for more information. 6 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/__init__.py: -------------------------------------------------------------------------------- 1 | # Load configuration 2 | from . import config 3 | from ._version import __version__ 4 | from .auth import BasicAuth, JupyterHubAuth, KerberosAuth 5 | from .client import ( 6 | Gateway, 7 | GatewayCluster, 8 | GatewayClusterError, 9 | GatewayServerError, 10 | GatewayWarning, 11 | ) 12 | from .options import Options 13 | 14 | del config 15 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "2025.4.1-0.dev" 2 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/comm.py: -------------------------------------------------------------------------------- 1 | import ssl 2 | from concurrent.futures import ThreadPoolExecutor 3 | from urllib.parse import urlparse 4 | 5 | from distributed.comm.core import Connector 6 | from distributed.comm.registry import Backend, backends 7 | from distributed.comm.tcp import ( 8 | MAX_BUFFER_SIZE, 9 | TLS, 10 | convert_stream_closed_error, 11 | get_stream_address, 12 | ) 13 | from distributed.utils import ensure_ip, get_ip 14 | from tornado import netutil 15 | from tornado.iostream import StreamClosedError 16 | from tornado.tcpclient import TCPClient 17 | 18 | 19 | def parse_gateway_address(address): 20 | if not address.startswith("gateway://"): 21 | address = "gateway://" + address 22 | parsed = urlparse(address) 23 | if not parsed.path: 24 | raise ValueError("Gateway address %r missing path component" % address) 25 | path = parsed.path.strip("/") 26 | return parsed.hostname, parsed.port, path 27 | 28 | 29 | class GatewayConnector(Connector): 30 | _executor = ThreadPoolExecutor(2) 31 | _resolver = netutil.ExecutorResolver(close_executor=False, executor=_executor) 32 | client = TCPClient(resolver=_resolver) 33 | 34 | async def connect(self, address, deserialize=True, **connection_args): 35 | ip, port, path = parse_gateway_address(address) 36 | sni = "daskgateway-" + path 37 | ctx = connection_args.get("ssl_context") 38 | if not isinstance(ctx, ssl.SSLContext): 39 | raise TypeError( 40 | "Gateway expects a `ssl_context` argument of type " 41 | "ssl.SSLContext, instead got %s" % ctx 42 | ) 43 | 44 | try: 45 | plain_stream = await self.client.connect( 46 | ip, port, max_buffer_size=MAX_BUFFER_SIZE 47 | ) 48 | stream = await plain_stream.start_tls( 49 | False, ssl_options=ctx, server_hostname=sni 50 | ) 51 | if stream.closed() and stream.error: 52 | raise StreamClosedError(stream.error) 53 | 54 | except StreamClosedError as e: 55 | # The socket connect() call failed 56 | convert_stream_closed_error(self, e) 57 | 58 | local_address = "tls://" + get_stream_address(stream) 59 | peer_address = "gateway://" + address 60 | return TLS(stream, local_address, peer_address, deserialize) 61 | 62 | 63 | class GatewayBackend(Backend): 64 | # I/O 65 | def get_connector(self): 66 | return GatewayConnector() 67 | 68 | def get_listener(self, *args, **kwargs): 69 | raise NotImplementedError("Listening on a gateway connection") 70 | 71 | # Address handling 72 | def get_address_host(self, loc): 73 | return parse_gateway_address(loc)[0] 74 | 75 | def get_address_host_port(self, loc): 76 | return parse_gateway_address(loc)[:2] 77 | 78 | def resolve_address(self, loc): 79 | host, port, path = parse_gateway_address(loc) 80 | host = ensure_ip(host) 81 | return "%s:%d/%s" % (host, port, path) 82 | 83 | def get_local_address_for(self, loc): 84 | host, port, path = parse_gateway_address(loc) 85 | host = ensure_ip(host) 86 | host = get_ip(host) 87 | return "%s:%d/%s" % (host, port, path) 88 | 89 | 90 | backends["gateway"] = GatewayBackend() 91 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dask 4 | import yaml 5 | 6 | fn = os.path.join(os.path.dirname(__file__), "gateway.yaml") 7 | dask.config.ensure_file(source=fn) 8 | 9 | with open(fn) as f: 10 | defaults = yaml.safe_load(f) 11 | 12 | dask.config.update_defaults(defaults) 13 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/gateway.yaml: -------------------------------------------------------------------------------- 1 | gateway: 2 | address: null # The full address to the dask-gateway server. 3 | # May also be a template string, which will be formatted 4 | # with any environment variables before usage. 5 | 6 | public-address: null # The address to the dask-gateway server, as accessible 7 | # from a web browser. This will be used as the root of 8 | # all browser-facing links (e.g. the dask dashboard). 9 | # If `None` (default), `gateway.address` will be used. 10 | # May be a template string. 11 | 12 | proxy-address: null # The full address or port to the dask-gateway 13 | # scheduler proxy. If a port, the host/ip is taken from 14 | # ``address``. If null, defaults to `address`. 15 | # May also be a template string. 16 | 17 | auth: 18 | type: basic # The authentication type to use. Options are basic, 19 | # kerberos, jupyterhub, or a full class path to a 20 | # custom class. 21 | 22 | kwargs: {} # Keyword arguments to use when instantiating the 23 | # authentication class above. Values may be template 24 | # strings. 25 | 26 | http-client: 27 | proxy: true # The http proxy configuration to use when contacting 28 | # the dask-gateway server. If `true` (default), this is 29 | # inferred from your environment (i.e. `HTTP(S)_PROXY` 30 | # environment variables). Set to `false` to disable 31 | # this inference. You may also specify a proxy address 32 | # explicitly (e.g. `http://user:password@host:port`). 33 | # May be a template string. 34 | 35 | cluster: 36 | options: {} # Default options to use when calling ``new_cluster`` or 37 | # ``cluster_options``. Values may be template strings. 38 | -------------------------------------------------------------------------------- /dask-gateway/dask_gateway/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | 5 | def format_template(x): 6 | if isinstance(x, str): 7 | return x.format(**os.environ) 8 | return x 9 | 10 | 11 | async def cancel_task(task): 12 | task.cancel() 13 | try: 14 | await task 15 | except asyncio.CancelledError: 16 | pass 17 | -------------------------------------------------------------------------------- /dask-gateway/pyproject.toml: -------------------------------------------------------------------------------- 1 | # build-system 2 | # - ref: https://peps.python.org/pep-0517/ 3 | # 4 | [build-system] 5 | requires = ["hatchling"] 6 | build-backend = "hatchling.build" 7 | 8 | # project 9 | # - ref 1: https://peps.python.org/pep-0621/ 10 | # - ref 2: https://hatch.pypa.io/latest/config/metadata/#project-metadata 11 | # 12 | [project] 13 | name = "dask-gateway" 14 | version = "2025.4.1-0.dev" 15 | description = "A client library for interacting with a dask-gateway server" 16 | readme = "README.rst" 17 | requires-python = ">=3.10" 18 | license = {file = "LICENSE"} 19 | keywords = ["dask", "hadoop", "kubernetes", "HPC", "distributed", "cluster"] 20 | authors = [ 21 | {name = "Jim Crist-Harif", email = "jcristharif@gmail.com"}, 22 | ] 23 | 24 | classifiers = [ 25 | "Development Status :: 5 - Production/Stable", 26 | "License :: OSI Approved :: BSD License", 27 | "Intended Audience :: Developers", 28 | "Intended Audience :: Science/Research", 29 | "Topic :: Scientific/Engineering", 30 | "Topic :: System :: Distributed Computing", 31 | "Programming Language :: Python :: 3", 32 | ] 33 | dependencies = [ 34 | "aiohttp", 35 | "click>=8.1.3", 36 | "dask>=2022.4.0", 37 | "distributed>=2022.4.0", 38 | "pyyaml", 39 | "tornado", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | kerberos = [ 44 | 'pykerberos;platform_system!="Windows"', 45 | 'winkerberos;platform_system=="Windows"', 46 | ] 47 | 48 | [project.urls] 49 | Documentation = "https://gateway.dask.org/" 50 | Source = "https://github.com/dask/dask-gateway/" 51 | Issues = "https://github.com/dask/dask-gateway/issues" 52 | 53 | [tool.hatch.build] 54 | include = [ 55 | "**/*.py", 56 | "**/*.yaml", 57 | ] 58 | -------------------------------------------------------------------------------- /dev-environment.yaml: -------------------------------------------------------------------------------- 1 | # A conda environment file to help setup dependencies to build and test 2 | # dask-gateway locally. 3 | # 4 | # Install: 5 | # 6 | # export DASK_GATEWAY_SERVER__NO_PROXY=true 7 | # 8 | # conda env create -f dev-environment.yaml 9 | # conda activate dask-gateway-dev 10 | # 11 | # unset DASK_GATEWAY_SERVER__NO_PROXY 12 | # 13 | # Cleanup: 14 | # 15 | # conda deactivate 16 | # conda env remove -n dask-gateway-dev 17 | # 18 | name: dask-gateway-dev 19 | channels: 20 | - conda-forge 21 | dependencies: 22 | - python=3.13 23 | - pip 24 | 25 | # Golang with compiler is required to compile dask-gateway-server's bundled 26 | # dask-gateway-proxy executable binary. 27 | - go 28 | - go-cgo 29 | 30 | - pip: 31 | - pre-commit 32 | - --requirement=docs/requirements.txt 33 | - --requirement=tests/requirements.txt 34 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation generated by sphinx-quickstart 2 | # ---------------------------------------------------------------------------- 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) 21 | 22 | 23 | # Manually added commands 24 | # ---------------------------------------------------------------------------- 25 | 26 | # For local development: 27 | # - builds and rebuilds html on changes to source 28 | # - starts a livereload enabled webserver and opens up a browser 29 | devenv: 30 | sphinx-autobuild -b html --open-browser "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) 31 | 32 | # For local development and CI: 33 | # - verifies that links are valid 34 | linkcheck: 35 | $(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)/linkcheck" $(SPHINXOPTS) 36 | @echo 37 | @echo "Link check complete; look for any errors in the above output " \ 38 | "or in $(BUILDDIR)/linkcheck/output.txt." 39 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file describes the requirements to build the documentation, which you can 2 | # do by the following commands: 3 | # 4 | # pip install -r docs/requirements.txt 5 | # 6 | # cd docs 7 | # make html 8 | # 9 | dask-sphinx-theme>=3.0.5 10 | myst-parser 11 | 12 | # FIXME: This workaround is required until we have sphinx>=5, as enabled by 13 | # dask-sphinx-theme no longer pinning sphinx-book-theme==0.2.0. This is 14 | # tracked in https://github.com/dask/dask-sphinx-theme/issues/68. 15 | # 16 | sphinxcontrib-applehelp<1.0.5 17 | sphinxcontrib-devhelp<1.0.6 18 | sphinxcontrib-htmlhelp<2.0.5 19 | sphinxcontrib-serializinghtml<1.1.10 20 | sphinxcontrib-qthelp<1.0.7 21 | 22 | # sphinx-autobuild enables the "make devenv" command defined in the Makefile to 23 | # automatically rebuild the documentation on changes and update live-reload a 24 | # browser. 25 | sphinx-autobuild 26 | 27 | # autodoc-traits will inspect the dask-gateway and dask-gateway-server's Python 28 | # code to generate reference documentation. It will omit files if ImportErrors 29 | # are thrown so we install these packages with all dependencies to avoid that. 30 | # 31 | # Note that we don't have to install pykerberos for autodoc-traits inspection of 32 | # dask-gateway. This is because .py files can be loaded without errors as 33 | # "import kerberos" statements only show up within functions. 34 | # 35 | autodoc-traits 36 | --editable="./dask-gateway" 37 | --editable="./dask-gateway-server[all_backends]" 38 | -------------------------------------------------------------------------------- /docs/source/_images/adapt-widget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/adapt-widget.png -------------------------------------------------------------------------------- /docs/source/_images/options-widget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/options-widget.png -------------------------------------------------------------------------------- /docs/source/_images/scale-widget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/scale-widget.png -------------------------------------------------------------------------------- /docs/source/api-client.rst: -------------------------------------------------------------------------------- 1 | Client API 2 | ========== 3 | 4 | .. currentmodule:: dask_gateway 5 | 6 | 7 | Gateway 8 | ------- 9 | 10 | .. autoclass:: Gateway 11 | :members: 12 | 13 | 14 | GatewayCluster 15 | -------------- 16 | 17 | .. autoclass:: GatewayCluster 18 | :members: 19 | 20 | 21 | Options 22 | ------- 23 | 24 | .. autoclass:: dask_gateway.options.Options 25 | 26 | 27 | Authentication 28 | -------------- 29 | 30 | .. autoclass:: dask_gateway.auth.GatewayAuth 31 | 32 | .. autoclass:: dask_gateway.auth.BasicAuth 33 | 34 | .. autoclass:: dask_gateway.auth.KerberosAuth 35 | 36 | .. autoclass:: dask_gateway.auth.JupyterHubAuth 37 | 38 | 39 | Exceptions 40 | ---------- 41 | 42 | .. autoclass:: dask_gateway.GatewayClusterError 43 | 44 | .. autoclass:: dask_gateway.GatewayServerError 45 | -------------------------------------------------------------------------------- /docs/source/api-server.rst: -------------------------------------------------------------------------------- 1 | Configuration Reference 2 | ======================= 3 | 4 | Gateway Server 5 | -------------- 6 | 7 | .. autoconfigurable:: dask_gateway_server.app.DaskGateway 8 | 9 | 10 | Authentication 11 | -------------- 12 | 13 | .. _kerberos-auth-config: 14 | 15 | KerberosAuthenticator 16 | ^^^^^^^^^^^^^^^^^^^^^ 17 | 18 | .. autoconfigurable:: dask_gateway_server.auth.KerberosAuthenticator 19 | 20 | 21 | .. _jupyterhub-auth-config: 22 | 23 | JupyterHubAuthenticator 24 | ^^^^^^^^^^^^^^^^^^^^^^^ 25 | 26 | .. autoconfigurable:: dask_gateway_server.auth.JupyterHubAuthenticator 27 | 28 | 29 | .. _simple-auth-config: 30 | 31 | SimpleAuthenticator 32 | ^^^^^^^^^^^^^^^^^^^ 33 | 34 | .. autoconfigurable:: dask_gateway_server.auth.SimpleAuthenticator 35 | 36 | 37 | .. _cluster-backends-reference: 38 | 39 | Cluster Backends 40 | ---------------- 41 | 42 | Base Class 43 | ^^^^^^^^^^ 44 | 45 | .. _cluster-config: 46 | 47 | ClusterConfig 48 | ~~~~~~~~~~~~~ 49 | 50 | .. autoconfigurable:: dask_gateway_server.backends.base.ClusterConfig 51 | 52 | Backend 53 | ~~~~~~~ 54 | 55 | .. autoconfigurable:: dask_gateway_server.backends.base.Backend 56 | 57 | 58 | Local Processes 59 | ^^^^^^^^^^^^^^^ 60 | 61 | LocalClusterConfig 62 | ~~~~~~~~~~~~~~~~~~ 63 | 64 | .. autoconfigurable:: dask_gateway_server.backends.local.LocalClusterConfig 65 | 66 | LocalBackend 67 | ~~~~~~~~~~~~ 68 | 69 | .. autoconfigurable:: dask_gateway_server.backends.local.LocalBackend 70 | 71 | UnsafeLocalBackend 72 | ~~~~~~~~~~~~~~~~~~ 73 | 74 | .. autoconfigurable:: dask_gateway_server.backends.local.UnsafeLocalBackend 75 | 76 | 77 | YARN 78 | ^^^^ 79 | 80 | .. _yarn-config: 81 | 82 | YarnClusterConfig 83 | ~~~~~~~~~~~~~~~~~ 84 | 85 | .. autoconfigurable:: dask_gateway_server.backends.yarn.YarnClusterConfig 86 | 87 | YarnBackend 88 | ~~~~~~~~~~~ 89 | 90 | .. autoconfigurable:: dask_gateway_server.backends.yarn.YarnBackend 91 | 92 | 93 | Kubernetes 94 | ^^^^^^^^^^ 95 | 96 | .. _kube-cluster-config: 97 | 98 | KubeClusterConfig 99 | ~~~~~~~~~~~~~~~~~ 100 | 101 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.KubeClusterConfig 102 | 103 | KubeBackend 104 | ~~~~~~~~~~~ 105 | 106 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.KubeBackend 107 | 108 | KubeController 109 | ~~~~~~~~~~~~~~ 110 | 111 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.controller.KubeController 112 | 113 | 114 | .. _jobqueue-config: 115 | 116 | Job Queues 117 | ^^^^^^^^^^ 118 | 119 | PBSClusterConfig 120 | ~~~~~~~~~~~~~~~~ 121 | 122 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.pbs.PBSClusterConfig 123 | 124 | PBSBackend 125 | ~~~~~~~~~~ 126 | 127 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.pbs.PBSBackend 128 | 129 | SlurmClusterConfig 130 | ~~~~~~~~~~~~~~~~~~ 131 | 132 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.slurm.SlurmClusterConfig 133 | 134 | SlurmBackend 135 | ~~~~~~~~~~~~ 136 | 137 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.slurm.SlurmBackend 138 | 139 | 140 | Proxy 141 | ----- 142 | 143 | Proxy 144 | ^^^^^ 145 | 146 | .. autoconfigurable:: dask_gateway_server.proxy.Proxy 147 | 148 | 149 | Cluster Manager Options 150 | ----------------------- 151 | 152 | .. autoclass:: dask_gateway_server.options.Options 153 | 154 | .. autoclass:: dask_gateway_server.options.Integer 155 | 156 | .. autoclass:: dask_gateway_server.options.Float 157 | 158 | .. autoclass:: dask_gateway_server.options.String 159 | 160 | .. autoclass:: dask_gateway_server.options.Bool 161 | 162 | .. autoclass:: dask_gateway_server.options.Select 163 | 164 | .. autoclass:: dask_gateway_server.options.Mapping 165 | 166 | 167 | Models 168 | ------ 169 | 170 | User 171 | ^^^^ 172 | 173 | .. autoclass:: dask_gateway_server.models.User 174 | 175 | Cluster 176 | ^^^^^^^ 177 | 178 | .. autoclass:: dask_gateway_server.models.Cluster 179 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import dask_gateway_server 5 | 6 | # Project settings 7 | project = "Dask Gateway" 8 | copyright = "2021, Jim Crist-Harif" 9 | author = "Jim Crist-Harif" 10 | release = version = dask_gateway_server.__version__ 11 | 12 | source_suffix = [".rst", ".md"] 13 | root_doc = master_doc = "index" 14 | language = None 15 | # Commenting this out for now, if we register dask pygments, 16 | # then eventually this line can be: 17 | # pygments_style = "dask" 18 | exclude_patterns = [] 19 | 20 | # Sphinx Extensions 21 | docs = os.path.dirname(os.path.dirname(__file__)) 22 | sys.path.insert(0, os.path.join(docs, "sphinxext")) 23 | extensions = [ 24 | "autodoc_traits", 25 | "myst_parser", 26 | "sphinx.ext.autodoc", 27 | "sphinx.ext.autosummary", 28 | "sphinx.ext.extlinks", 29 | "sphinx.ext.napoleon", 30 | ] 31 | 32 | extlinks = { 33 | "issue": ("https://github.com/dask/dask-gateway/issues/%s", "Issue #"), 34 | "pr": ("https://github.com/dask/dask-gateway/pull/%s", "PR #"), 35 | } 36 | 37 | # Sphinx Theme 38 | html_theme = "dask_sphinx_theme" 39 | templates_path = ["_templates"] 40 | 41 | # -- Options for linkcheck builder ------------------------------------------- 42 | # http://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-the-linkcheck-builder 43 | # 44 | linkcheck_ignore = [ 45 | r"(.*)github\.com(.*)#", # javascript based anchors 46 | r"https://github.com/[^/]*$", # too many github usernames / searches in changelog 47 | "https://github.com/jupyterhub/oauthenticator/pull/", # too many PRs in changelog 48 | "https://github.com/jupyterhub/oauthenticator/compare/", # too many comparisons in changelog 49 | ] 50 | linkcheck_anchors_ignore = [ 51 | "/#!", 52 | "/#%21", 53 | ] 54 | -------------------------------------------------------------------------------- /docs/source/configuration-user.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | .. currentmodule:: dask_gateway 5 | 6 | Specifying all parameters to the :class:`Gateway` or :class:`GatewayCluster` 7 | constructors every time may be error prone, especially when sharing this 8 | workflow with new users. To simplify things you can provide defaults in a 9 | configuration file, traditionally held in ``~/.config/dask/gateway.yaml`` or 10 | ``/etc/dask/gateway.yaml``. Note that this configuration is *optional*, and 11 | only changes the defaults when not specified in the constructors. You only need 12 | to set the fields you care about, unset fields will fall back to the `default 13 | configuration`_. 14 | 15 | We recommend administrators create a configuration file to share with their 16 | users, specifying the addresses and authentication necessary to connect to 17 | their ``dask-gateway-server``. For example: 18 | 19 | **Example:** 20 | 21 | .. code-block:: yaml 22 | 23 | # ~/.config/dask/gateway.yaml 24 | gateway: 25 | # The full address to the dask-gateway server. 26 | address: http://146.148.58.187 27 | 28 | # The full address to the dask-gateway scheduler proxy 29 | proxy-address: tls://35.202.68.87:8786 30 | 31 | auth: 32 | # Use kerberos for authentication 33 | type: kerberos 34 | 35 | 36 | Users can now create :class:`Gateway` or :class:`GatewayCluster` objects 37 | without specifying any additional information. 38 | 39 | .. code-block:: python 40 | 41 | from dask_gateway import GatewayCluster 42 | 43 | cluster = GatewayCluster() 44 | cluster.scale(20) 45 | 46 | For more information on Dask configuration see the `Dask configuration 47 | documentation `_. 48 | 49 | 50 | Default Configuration 51 | --------------------- 52 | 53 | The default configuration file is as follows 54 | 55 | .. literalinclude:: ../../dask-gateway/dask_gateway/gateway.yaml 56 | :language: yaml 57 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Dask Gateway 2 | ============ 3 | 4 | Dask Gateway provides a secure, multi-tenant server for managing Dask_ 5 | clusters. It allows users to launch and use Dask clusters in a shared, 6 | centrally managed cluster environment, without requiring users to have direct 7 | access to the underlying cluster backend (e.g. Kubernetes, Hadoop/YARN, HPC Job 8 | queues, etc...). 9 | 10 | Dask Gateway is one of many options for deploying Dask clusters, see `Deploying Dask`_ in the Dask documentation for an overview of additional options. 11 | 12 | 13 | Highlights 14 | ---------- 15 | 16 | - **Centrally Managed**: Administrators do the heavy lifting of configuring the 17 | Gateway, users simply connect to the Gateway to get a new cluster. Eases deployment, 18 | and allows enforcing consistent configuration across all users. 19 | 20 | - **Secure by Default**: Cluster communication is automatically encrypted with 21 | TLS. All operations are authenticated with a configurable protocol, allowing 22 | you to use what makes sense for your organization. 23 | 24 | - **Flexible**: The gateway is designed to support multiple backends, and runs 25 | equally well in the cloud as on-premise. Natively supports Kubernetes, 26 | Hadoop/YARN, and HPC Job Queueing systems. 27 | 28 | - **Robust to Failure**: The gateway can be restarted or experience failover 29 | without losing existing clusters. Allows for seamless upgrades and restarts 30 | without disrupting users. 31 | 32 | 33 | Architecture Overview 34 | --------------------- 35 | 36 | Dask Gateway is divided into three separate components: 37 | 38 | - Multiple active **Dask Clusters** (potentially more than one per user) 39 | - A **Proxy** for proxying both the connection between the user's client 40 | and their respective scheduler, and the Dask Web UI for each cluster 41 | - A central **Gateway** that manages authentication and cluster startup/shutdown 42 | 43 | 44 | .. image:: /_images/architecture.svg 45 | :width: 90 % 46 | :align: center 47 | :alt: Dask-Gateway high-level architecture 48 | 49 | 50 | The gateway is designed to be flexible and pluggable, and makes heavy use of 51 | traitlets_ (the same technology used by the Jupyter_ ecosystem). In particular, 52 | both the cluster backend and the authentication protocol are pluggable. 53 | 54 | **Cluster Backends** 55 | 56 | - Kubernetes_ 57 | - `Hadoop/YARN`_ 58 | - Job Queue Systems (PBS_, Slurm_, ...) 59 | - Local Processes 60 | 61 | **Authentication Methods** 62 | 63 | - `Kerberos `__ 64 | - `JupyterHub service plugin `__ 65 | - HTTP Basic 66 | 67 | 68 | .. toctree:: 69 | :maxdepth: 1 70 | :hidden: 71 | :caption: For Users 72 | 73 | install-user 74 | usage 75 | configuration-user 76 | 77 | .. toctree:: 78 | :maxdepth: 1 79 | :hidden: 80 | :caption: Admin - Installation 81 | 82 | install-local 83 | install-hadoop 84 | install-kube 85 | install-jobqueue 86 | 87 | .. toctree:: 88 | :maxdepth: 1 89 | :hidden: 90 | :caption: Admin - Customization 91 | 92 | authentication 93 | security 94 | cluster-options 95 | resource-limits 96 | 97 | .. toctree:: 98 | :maxdepth: 1 99 | :hidden: 100 | :caption: Reference 101 | 102 | api-client 103 | api-server 104 | develop 105 | changelog 106 | 107 | 108 | .. _Dask: https://dask.org/ 109 | .. _traitlets: https://traitlets.readthedocs.io/en/stable/ 110 | .. _Jupyter: https://jupyter.org/ 111 | .. _Hadoop/YARN: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html 112 | .. _PBS: ttps://www.openpbs.org/ 113 | .. _Slurm: https://slurm.schedmd.com/ 114 | .. _Kubernetes: https://kubernetes.io/ 115 | .. _Deploying Dask: https://docs.dask.org/en/stable/deploying.html 116 | -------------------------------------------------------------------------------- /docs/source/install-local.rst: -------------------------------------------------------------------------------- 1 | Install Locally (Quickstart) 2 | ============================ 3 | 4 | This page describes how to deploy and interact with a ``dask-gateway-server`` 5 | locally. This can be useful for testing, demos, and development purposes, but 6 | is not a normal method of deployment. 7 | 8 | If you're a user connecting to an existing ``dask-gateway-server`` instance, 9 | you may want to start at :doc:`usage` instead. 10 | 11 | .. currentmodule:: dask_gateway 12 | 13 | Installation 14 | ------------ 15 | 16 | Dask-Gateway can be installed with ``conda`` or ``pip``. It's composed of two packages: 17 | 18 | - ``dask-gateway-server``: the gateway server. Administrators usually install this once on a cluster. 19 | - ``dask-gateway``: the client library. Users only need this library to use a running Gateway. 20 | 21 | **Install with conda** 22 | 23 | .. code-block:: console 24 | 25 | $ conda install -c conda-forge dask-gateway dask-gateway-server-local 26 | 27 | **Install with pip** 28 | 29 | .. code-block:: console 30 | 31 | $ pip install dask-gateway dask-gateway-server[local] 32 | 33 | 34 | Start the gateway server 35 | ------------------------ 36 | 37 | To start the Gateway server, run: 38 | 39 | .. code-block:: console 40 | 41 | $ dask-gateway-server 42 | 43 | 44 | This starts ``dask-gateway`` locally with the default configuration. This uses: 45 | 46 | - ``UnsafeLocalBackend`` to manage local clusters without any process isolation 47 | - ``SimpleAuthenticator`` to authenticate users using a simple and insecure authentication scheme 48 | 49 | *Both of these options are insecure and not-advised for any real-world 50 | deployments.* They are perfectly fine for testing and experimentation though. 51 | 52 | 53 | Connect to the gateway server 54 | ----------------------------- 55 | 56 | To connect to the gateway, create a :class:`Gateway` client with the URL output 57 | above. By default this is ``http://127.0.0.1:8000``. 58 | 59 | .. code-block:: python 60 | 61 | >>> from dask_gateway import Gateway 62 | >>> gateway = Gateway("http://127.0.0.1:8000") 63 | >>> gateway 64 | Gateway 65 | 66 | To check that everything is setup properly, query the gateway server to see any 67 | existing clusters (should be an empty list). 68 | 69 | .. code-block:: python 70 | 71 | >>> gateway.list_clusters() 72 | [] 73 | 74 | 75 | Interact with the gateway server 76 | -------------------------------- 77 | 78 | At this point you can use the :class:`Gateway` client to interact with the 79 | gateway server. You can use the client to create new clusters and interact with 80 | existing clusters. We direct you to the :doc:`usage` documentation for more 81 | information, starting from the :ref:`usage-create-new-cluster` section. 82 | 83 | 84 | Shutdown the gateway server 85 | --------------------------- 86 | 87 | When you're done with local usage, you'll want to shutdown the Dask-Gateway 88 | server. To do this, ``Ctrl-C`` in the same terminal you started the process in. 89 | Note that any active clusters will also be shutdown. 90 | -------------------------------------------------------------------------------- /docs/source/install-user.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Dask-Gateway is composed of two packages: 5 | 6 | - ``dask-gateway``: the client library, installed by *users*. 7 | - ``dask-gateway-server``: the gateway server, installed by *administrators*. 8 | 9 | Dask-Gateway *users* only need the ``dask-gateway`` client package to interact 10 | with the server. It can be installed with ``conda`` or ``pip``. 11 | 12 | **Install with conda** 13 | 14 | .. code-block:: console 15 | 16 | $ conda install -c conda-forge dask-gateway 17 | 18 | **Install with pip** 19 | 20 | .. code-block:: console 21 | 22 | $ pip install dask-gateway 23 | 24 | The version of the client library should match that of ``dask-gateway-server`` 25 | running on the server. If you don't know the version running on your server, 26 | contact your administrator. 27 | 28 | 29 | Kerberos Authentication Dependencies (Optional) 30 | ----------------------------------------------- 31 | 32 | If your Dask-Gateway server uses Kerberos_ for authentication, you'll also need 33 | to install the kerberos dependencies. This can be done with either ``conda`` or 34 | ``pip``: 35 | 36 | **Install with conda** 37 | 38 | .. code-block:: console 39 | 40 | $ conda install -c conda-forge dask-gateway-kerberos 41 | 42 | **Install with pip** 43 | 44 | .. code-block:: console 45 | 46 | $ pip install dask-gateway[kerberos] 47 | 48 | 49 | .. _Kerberos: https://en.wikipedia.org/wiki/Kerberos_(protocol) 50 | -------------------------------------------------------------------------------- /docs/source/resource-limits.rst: -------------------------------------------------------------------------------- 1 | Cluster Resource Limits 2 | ======================= 3 | 4 | By default users can create clusters with as many workers and resources as they 5 | want. In shared environments this may not always be desirable. To remedy this 6 | administrators can set per-cluster resource limits. 7 | 8 | A few limits are available: 9 | 10 | - :data:`c.ClusterConfig.cluster_max_cores`: Maximum number of cores per cluster 11 | - :data:`c.ClusterConfig.cluster_max_memory`: Maximum amount of memory per cluster 12 | - :data:`c.ClusterConfig.cluster_max_workers`: Maximum number of workers per cluster 13 | 14 | If a cluster is at capacity for any of these limits, requests for new workers 15 | or workers will warn with an informative message saying they're at capacity. 16 | 17 | Example 18 | ------- 19 | 20 | Here we limit each cluster to: 21 | 22 | - A max of 80 active cores 23 | - A max of 1 TiB of RAM 24 | 25 | .. code-block:: python 26 | 27 | c.ClusterConfig.cluster_max_cores = 80 28 | c.ClusterConfig.cluster_max_memory = "1 T" 29 | -------------------------------------------------------------------------------- /docs/source/security.rst: -------------------------------------------------------------------------------- 1 | Security settings 2 | ================= 3 | 4 | Here we present a few common security fields you'll likely want to configure in 5 | a production deployment. 6 | 7 | 8 | Enabling TLS 9 | ------------ 10 | 11 | As a web application, any production deployment of Dask-Gateway should be run 12 | with TLS encryption (HTTPS_) enabled. There are a few common options for 13 | enabling this. 14 | 15 | Using your own TLS certificate 16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 17 | 18 | If you have your own TLS certificate/key pair, you can specify the file 19 | locations in your ``dask_gateway_config.py`` file. The relevant configuration 20 | fields are: 21 | 22 | - :data:`c.Proxy.tls_cert` 23 | - :data:`c.Proxy.tls_key` 24 | 25 | .. code-block:: python 26 | 27 | c.Proxy.tls_cert = "/path/to/my.cert" 28 | c.Proxy.tls_key = "/path/to/my.key" 29 | 30 | Note that the certificate and key *must* be stored in a secure location where 31 | they are readable only by admin users. 32 | 33 | Using letsencrypt 34 | ^^^^^^^^^^^^^^^^^ 35 | 36 | It is also possible to use letsencrypt_ to automatically obtain TLS 37 | certificates. If you have letsencrypt running using the default options, you 38 | can configure this by adding the following to your ``dask_gateway_config.py`` 39 | file: 40 | 41 | .. code-block:: python 42 | 43 | c.Proxy.tls_cert = "/etc/letsencrypt/live/{FQDN}/fullchain.pem" 44 | c.Proxy.tls_key = "/etc/letsencrpyt/live/{FQDN}/privkey.pem" 45 | 46 | where ``FQDN`` is the `fully qualified domain name`_ for your server. 47 | 48 | Using external TLS termination 49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 50 | 51 | If ``dask-gateway-server`` is running behind a proxy that does TLS termination 52 | (e.g. NGINX_), then no further configuration is needed. 53 | 54 | 55 | Proxy authentication tokens 56 | --------------------------- 57 | 58 | To secure communication between the proxy and the gateway server, a secret 59 | token is used. By default this token is generated automatically. It's necessary 60 | for an admin to configure this explicitly if the proxies are being externally 61 | managed (i.e. :data:`c.Proxy.externally_managed` is set to true). To do this 62 | you have two options: 63 | 64 | - Configure :data:`c.Proxy.api_token` in your ``dask_gateway_config.py`` file. 65 | Since the token should be kept secret, the config file *must* be readable 66 | only by admin users. 67 | - Set the ``DASK_GATEWAY_PROXY_TOKEN`` environment variable. For security 68 | reasons, this environment variable should only be visible by the gateway 69 | server and proxy. 70 | 71 | In either case both options take 32 byte random strings, encoded as hex. One way 72 | to create these is through the ``openssl`` CLI: 73 | 74 | .. code-block:: shell 75 | 76 | $ openssl rand -hex 32 77 | 78 | 79 | .. _HTTPS: https://en.wikipedia.org/wiki/HTTPS 80 | .. _letsencrypt: https://letsencrypt.org/ 81 | .. _fully qualified domain name: https://en.wikipedia.org/wiki/Fully_qualified_domain_name 82 | .. _NGINX: https://docs.nginx.com/nginx/admin-guide/security-controls/terminating-ssl-http/ 83 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # isort is used for autoformatting Python code 2 | # 3 | # ref: https://pycqa.github.io/isort/ 4 | # 5 | [tool.isort] 6 | profile = "black" 7 | 8 | 9 | # black is used for autoformatting Python code 10 | # 11 | # ref: https://black.readthedocs.io/en/stable/ 12 | # 13 | [tool.black] 14 | line-length = 88 15 | target_version = [ 16 | "py310", 17 | "py311", 18 | "py312", 19 | "py313", 20 | ] 21 | 22 | 23 | # pytest is used for running Python based tests 24 | # 25 | # ref: https://docs.pytest.org/en/stable/ 26 | # 27 | [tool.pytest.ini_options] 28 | addopts = "--verbose --color=yes --durations=10" 29 | testpaths = ["tests"] 30 | asyncio_mode = "auto" 31 | asyncio_default_fixture_loop_scope = "function" 32 | 33 | 34 | # pytest-cov / coverage is used to measure code coverage of tests 35 | # 36 | # ref: https://coverage.readthedocs.io/en/stable/config.html 37 | # 38 | [tool.coverage.run] 39 | omit = [ 40 | "tests/*.py", 41 | "*/_version.py", 42 | "dask-gateway-server/dask_gateway_server/managers/jobqueue/launcher.py", 43 | ] 44 | source = [ 45 | "dask-gateway-server/dask_gateway_server", 46 | "dask-gateway/dask_gateway", 47 | ] 48 | 49 | 50 | # tbump is used to simplify and standardize the release process when updating 51 | # the version, making a git commit and tag, and pushing changes. 52 | # 53 | # ref: https://github.com/your-tools/tbump#readme 54 | # 55 | [tool.tbump] 56 | github_url = "https://github.com/dask/dask-gateway" 57 | 58 | [tool.tbump.version] 59 | current = "2025.4.1-0.dev" 60 | regex = ''' 61 | (?P\d+) 62 | \. 63 | (?P\d+) 64 | \. 65 | (?P\d+) 66 | (\- 67 | (?P((alpha|beta|rc)\.\d+|0\.dev)) 68 | )? 69 | ''' 70 | 71 | [tool.tbump.git] 72 | message_template = "Release {new_version}" 73 | tag_template = "{new_version}" 74 | 75 | [[tool.tbump.file]] 76 | src = "dask-gateway/pyproject.toml" 77 | search = 'version = "{current_version}"' 78 | 79 | [[tool.tbump.file]] 80 | src = "dask-gateway/dask_gateway/_version.py" 81 | search = '__version__ = "{current_version}"' 82 | 83 | [[tool.tbump.file]] 84 | src = "dask-gateway-server/pyproject.toml" 85 | search = 'version = "{current_version}"' 86 | 87 | [[tool.tbump.file]] 88 | src = "dask-gateway-server/dask_gateway_server/_version.py" 89 | search = '__version__ = "{current_version}"' 90 | 91 | [[tool.tbump.file]] 92 | src = "resources/helm/dask-gateway/Chart.yaml" 93 | search = 'appVersion: "{current_version}"' 94 | -------------------------------------------------------------------------------- /resources/README.rst: -------------------------------------------------------------------------------- 1 | Resources 2 | ========= 3 | 4 | This directory contains extra resources for deploying dask gateway. 5 | -------------------------------------------------------------------------------- /resources/helm/README.rst: -------------------------------------------------------------------------------- 1 | Helm Chart 2 | ========== 3 | 4 | A helm chart for deploying Dask Gateway on Kubernetes. 5 | -------------------------------------------------------------------------------- /resources/helm/chartpress.yaml: -------------------------------------------------------------------------------- 1 | # This is configuration for chartpress, a CLI for Helm chart management. 2 | # 3 | # chartpress is used to test, package, and publish the dask-gateway Helm chart 4 | # to the gh-pages based Helm chart repository at https://helm.dask.org and 5 | # https://github.com/dask/helm-chart respectively. Note that a Helm chart 6 | # repository is just a website that can serve a "index.yaml" file pointing to 7 | # packaged Helm charts that can be downloaded. 8 | # 9 | # chartpress is used to: 10 | # - Build images for multiple CPU architectures 11 | # - Update Chart.yaml (version) and values.yaml (image tags) 12 | # - Package and publish Helm charts to a GitHub based Helm chart repository 13 | # 14 | # Configuration reference: 15 | # https://github.com/jupyterhub/chartpress#configuration 16 | # 17 | charts: 18 | - name: dask-gateway 19 | imagePrefix: ghcr.io/dask/ 20 | repo: 21 | git: dask/helm-chart 22 | published: https://helm.dask.org 23 | images: 24 | # Used for clusters' scheduler and workers pods by default 25 | dask-gateway: 26 | imageName: ghcr.io/dask/dask-gateway 27 | contextPath: ../../dask-gateway 28 | valuesPath: 29 | - gateway.backend.image 30 | # Used for the api and controller pods 31 | dask-gateway-server: 32 | imageName: ghcr.io/dask/dask-gateway-server 33 | contextPath: ../../dask-gateway-server 34 | valuesPath: 35 | - gateway.image 36 | - controller.image 37 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .gitkeep 9 | .bzr/ 10 | .bzrignore 11 | .hg/ 12 | .hgignore 13 | .svn/ 14 | # Common backup files 15 | *.swp 16 | *.bak 17 | *.tmp 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | 25 | # Manually added entries 26 | *.rst 27 | .gitkeep 28 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/Chart.yaml: -------------------------------------------------------------------------------- 1 | # Chart.yaml v2 reference: https://helm.sh/docs/topics/charts/#the-chartyaml-file 2 | apiVersion: v2 3 | name: dask-gateway 4 | version: 0.0.1-set.by.chartpress 5 | appVersion: "2025.4.1-0.dev" 6 | description: A multi-tenant server for deploying and managing Dask clusters 7 | home: https://gateway.dask.org/ 8 | sources: 9 | - https://github.com/dask/dask-gateway/ 10 | icon: https://avatars3.githubusercontent.com/u/17131925?v=3&s=200 11 | kubeVersion: ">=1.30.0-0" 12 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/crds/daskclusters.yaml: -------------------------------------------------------------------------------- 1 | # Any change to these resources should be followed by an entry in the changelog 2 | # about needing to manually patch them, read more about this in 3 | # https://github.com/dask/dask-gateway/issues/553. 4 | # 5 | apiVersion: apiextensions.k8s.io/v1 6 | kind: CustomResourceDefinition 7 | metadata: 8 | name: daskclusters.gateway.dask.org 9 | creationTimestamp: null 10 | spec: 11 | group: gateway.dask.org 12 | names: 13 | kind: DaskCluster 14 | listKind: DaskClusterList 15 | plural: daskclusters 16 | singular: daskcluster 17 | scope: Namespaced 18 | versions: 19 | - name: v1alpha1 20 | served: true 21 | storage: true 22 | subresources: 23 | status: {} 24 | schema: 25 | # NOTE: While we define a schema, it is a dummy schema that doesn't 26 | # validate anything. We just have it to comply with the schema of 27 | # a CustomResourceDefinition that requires it. 28 | # 29 | # A decision has been made to not implement an actual schema at 30 | # this point in time due to the additional maintenance work it 31 | # would require. 32 | # 33 | # Reference: https://github.com/dask/dask-gateway/issues/434 34 | # 35 | openAPIV3Schema: 36 | type: object 37 | x-kubernetes-preserve-unknown-fields: true 38 | status: 39 | acceptedNames: 40 | kind: "" 41 | plural: "" 42 | conditions: [] 43 | storedVersions: [] 44 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/extensions/README.rst: -------------------------------------------------------------------------------- 1 | Extensions 2 | ========== 3 | 4 | Some Dask Gateway deployments will require non-trivial configuration (e.g. a 5 | new ``Authenticator`` class). You have a few options to add such "extensions": 6 | 7 | 1. Add all extension code in the ``gateway.extraConfig`` of your Helm values 8 | file. For simple extensions this is the recommended approach. 9 | 2. Package your code as part of a custom image, and configure the Dask Gateway 10 | api server to use that image via ``gateway.image``. Recommended if your 11 | extension is large enough (Helm charts have a size limit of 1 MiB) or 12 | requires additional dependencies. 13 | 3. Clone the helm chart locally, and make use of the ``extensions`` directory. 14 | This approach prevents using the published Helm chart, but may be useful in 15 | some cases. 16 | 17 | To use the ``extensions`` directory, clone the Helm chart locally, and copy 18 | whatever extra files you require into the ``extensions/gateway`` directory. 19 | All files in ``extensions/gateway`` will be copied into ``/etc/dask-gateway`` 20 | in the deployed Dask Gateway API server pods. This directory is added to 21 | ``PYTHONPATH``, so any Python code will be importable. You can then import what 22 | functionality you need in a smaller section in ``gateway.extraConfig`` to 23 | configure the Dask Gateway server as needed. 24 | 25 | Example 26 | ------- 27 | 28 | For example, say ``myauthenticator.py`` contains a custom ``Authenticator`` 29 | class: 30 | 31 | .. code-block:: python 32 | 33 | from dask_gateway_server.auth import Authenticator 34 | 35 | class MyAuthenticator(Authenticator): 36 | """My custom authenticator""" 37 | ... 38 | 39 | After adding ``myauthenticator.py`` to ``extensions/gateway``, you can 40 | configure the Dask Gateway API server to use your authenticator via the proper 41 | fields in ``values.yaml``. For an authenticator, you can make use of 42 | ``gateway.auth``: 43 | 44 | .. code-block:: yaml 45 | 46 | gateway: 47 | auth: 48 | type: custom 49 | custom: 50 | class: myauthenticator.MyAuthenticator 51 | 52 | For other types of extensions (say ``c.KubeBackend.cluster_options``) you'd 53 | need to import and configure things in ``gateway.extraConfig``: 54 | 55 | .. code-block:: yaml 56 | 57 | gateway: 58 | extraConfig: 59 | my-extension: | 60 | # import your extension and configure appropriately 61 | from myextension import my_cluster_options 62 | c.KubeBackend.cluster_options = my_cluster_options 63 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/extensions/gateway/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/resources/helm/dask-gateway/extensions/gateway/.gitkeep -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | You've installed Dask-Gateway version {{ .Chart.AppVersion }}, from chart 2 | version {{ .Chart.Version }}! 3 | 4 | Your release is named {{ .Release.Name | quote }} and installed into the 5 | namespace {{ .Release.Namespace | quote }}. 6 | 7 | You can find the public address(es) at: 8 | 9 | $ kubectl --namespace={{ .Release.Namespace }} get service {{ include "dask-gateway.traefikName" . }} 10 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "dask-gateway.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "dask-gateway.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "dask-gateway.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "dask-gateway.labels" -}} 38 | app.kubernetes.io/name: {{ include "dask-gateway.name" . }} 39 | helm.sh/chart: {{ include "dask-gateway.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | gateway.dask.org/instance: {{ include "dask-gateway.fullname" . }} 46 | {{- end -}} 47 | 48 | {{/* 49 | Match labels 50 | */}} 51 | {{- define "dask-gateway.matchLabels" -}} 52 | app.kubernetes.io/name: {{ include "dask-gateway.name" . }} 53 | app.kubernetes.io/instance: {{ .Release.Name }} 54 | {{- end -}} 55 | 56 | {{/* 57 | API Server name 58 | */}} 59 | {{- define "dask-gateway.apiName" -}} 60 | {{ include "dask-gateway.fullname" . | printf "api-%s" | trunc 63 | trimSuffix "-" }} 61 | {{- end -}} 62 | 63 | {{/* 64 | Traefik name 65 | */}} 66 | {{- define "dask-gateway.traefikName" -}} 67 | {{ include "dask-gateway.fullname" . | printf "traefik-%s" | trunc 63 | trimSuffix "-" }} 68 | {{- end -}} 69 | 70 | {{/* 71 | Controller name 72 | */}} 73 | {{- define "dask-gateway.controllerName" -}} 74 | {{ include "dask-gateway.fullname" . | printf "controller-%s" | trunc 63 | trimSuffix "-" }} 75 | {{- end -}} 76 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/controller/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.controller.enabled -}} 2 | kind: ConfigMap 3 | apiVersion: v1 4 | metadata: 5 | name: {{ include "dask-gateway.controllerName" . }} 6 | labels: 7 | {{- include "dask-gateway.labels" . | nindent 4 }} 8 | data: 9 | dask_gateway_config.py: |- 10 | # Configure addresses 11 | c.KubeController.address = ":8000" 12 | c.KubeController.api_url = 'http://{{ include "dask-gateway.apiName" . }}.{{ .Release.Namespace }}:8000/api' 13 | c.KubeController.gateway_instance = '{{ include "dask-gateway.fullname" . }}' 14 | c.KubeController.proxy_prefix = "{{ .Values.gateway.prefix }}" 15 | c.KubeController.proxy_web_middlewares = [ 16 | {"name": '{{ include "dask-gateway.fullname" . | printf "clusters-prefix-%s" | trunc 63 | trimSuffix "-" }}', 17 | "namespace": '{{ .Release.Namespace }}'} 18 | ] 19 | c.KubeController.log_level = "{{ .Values.controller.loglevel }}" 20 | c.KubeController.completed_cluster_max_age = {{ .Values.controller.completedClusterMaxAge }} 21 | c.KubeController.completed_cluster_cleanup_period = {{ .Values.controller.completedClusterCleanupPeriod }} 22 | c.KubeController.backoff_base_delay = {{ .Values.controller.backoffBaseDelay }} 23 | c.KubeController.backoff_max_delay = {{ .Values.controller.backoffMaxDelay }} 24 | c.KubeController.k8s_api_rate_limit = {{ .Values.controller.k8sApiRateLimit }} 25 | c.KubeController.k8s_api_rate_limit_burst = {{ .Values.controller.k8sApiRateLimitBurst }} 26 | {{- if eq (toString .Values.traefik.service.ports.tcp.port) "web" }} 27 | c.KubeController.proxy_tcp_entrypoint = "web" 28 | {{- end }} 29 | {{- end }} 30 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/controller/deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.controller.enabled -}} 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ include "dask-gateway.controllerName" . }} 6 | labels: 7 | {{- include "dask-gateway.labels" . | nindent 4 }} 8 | spec: 9 | replicas: 1 10 | strategy: 11 | type: Recreate 12 | selector: 13 | matchLabels: 14 | {{- include "dask-gateway.matchLabels" . | nindent 6 }} 15 | app.kubernetes.io/component: controller 16 | template: 17 | metadata: 18 | labels: 19 | {{- include "dask-gateway.labels" . | nindent 8 }} 20 | app.kubernetes.io/component: controller 21 | annotations: 22 | checksum/configmap: {{ include (print .Template.BasePath "/controller/configmap.yaml") . | sha256sum }} 23 | {{- with .Values.controller.annotations }} 24 | {{- . | toYaml | nindent 8 }} 25 | {{- end }} 26 | spec: 27 | {{- if .Values.rbac.enabled }} 28 | {{- if .Values.rbac.controller.serviceAccountName }} 29 | serviceAccountName: {{ .Values.rbac.controller.serviceAccountName }} 30 | {{- else }} 31 | serviceAccountName: {{ include "dask-gateway.controllerName" . }} 32 | {{- end }} 33 | {{- end }} 34 | volumes: 35 | - name: configmap 36 | configMap: 37 | name: {{ include "dask-gateway.controllerName" . }} 38 | {{- with .Values.controller.imagePullSecrets }} 39 | imagePullSecrets: 40 | {{- . | toYaml | nindent 8 }} 41 | {{- end }} 42 | containers: 43 | - name: controller 44 | image: {{ .Values.controller.image.name }}:{{ .Values.controller.image.tag }} 45 | imagePullPolicy: {{ .Values.controller.image.pullPolicy }} 46 | args: 47 | - dask-gateway-server 48 | - kube-controller 49 | - --config 50 | - /etc/dask-gateway/dask_gateway_config.py 51 | {{- with .Values.controller.resources }} 52 | resources: 53 | {{- . | toYaml | nindent 12 }} 54 | {{- end }} 55 | volumeMounts: 56 | - mountPath: /etc/dask-gateway/ 57 | name: configmap 58 | ports: 59 | - containerPort: 8000 60 | name: api 61 | {{- with .Values.controller.affinity }} 62 | affinity: 63 | {{- . | toYaml | nindent 8 }} 64 | {{- end }} 65 | {{- with .Values.controller.tolerations }} 66 | tolerations: 67 | {{- . | toYaml | nindent 8 }} 68 | {{- end }} 69 | {{- with .Values.controller.nodeSelector }} 70 | nodeSelector: 71 | {{- . | toYaml | nindent 8 }} 72 | {{- end }} 73 | {{- end }} 74 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/controller/rbac.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.controller.enabled -}} 2 | {{- if .Values.rbac.enabled -}} 3 | {{- if not .Values.rbac.controller.serviceAccountName -}} 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: {{ include "dask-gateway.controllerName" . }} 8 | labels: 9 | {{- include "dask-gateway.labels" . | nindent 4 }} 10 | --- 11 | apiVersion: rbac.authorization.k8s.io/v1 12 | kind: ClusterRole 13 | metadata: 14 | name: {{ include "dask-gateway.controllerName" . }} 15 | labels: 16 | {{- include "dask-gateway.labels" . | nindent 4 }} 17 | rules: 18 | - apiGroups: ["gateway.dask.org"] 19 | resources: ["daskclusters", "daskclusters/status"] 20 | verbs: ["*"] 21 | - apiGroups: ["traefik.io"] 22 | resources: ["ingressroutes", "ingressroutetcps"] 23 | verbs: ["get", "create", "delete"] 24 | - apiGroups: [""] 25 | resources: ["pods"] 26 | verbs: ["get", "list", "watch", "create", "delete"] 27 | - apiGroups: [""] 28 | resources: ["endpoints"] 29 | verbs: ["get", "list", "watch"] 30 | - apiGroups: [""] 31 | resources: ["secrets", "services"] 32 | verbs: ["create", "delete"] 33 | --- 34 | kind: ClusterRoleBinding 35 | apiVersion: rbac.authorization.k8s.io/v1 36 | metadata: 37 | name: {{ include "dask-gateway.controllerName" . }} 38 | labels: 39 | {{- include "dask-gateway.labels" . | nindent 4 }} 40 | subjects: 41 | - kind: ServiceAccount 42 | name: {{ include "dask-gateway.controllerName" . }} 43 | namespace: {{ .Release.Namespace }} 44 | roleRef: 45 | kind: ClusterRole 46 | name: {{ include "dask-gateway.controllerName" . }} 47 | apiGroup: rbac.authorization.k8s.io 48 | {{- end }} 49 | {{- end }} 50 | {{- end }} 51 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/gateway/ingressroute.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: traefik.io/v1alpha1 2 | kind: IngressRoute 3 | metadata: 4 | name: {{ include "dask-gateway.apiName" . }} 5 | labels: 6 | {{- include "dask-gateway.labels" . | nindent 4 }} 7 | spec: 8 | entryPoints: 9 | - web 10 | routes: 11 | - match: PathPrefix(`{{ .Values.gateway.prefix }}`) 12 | kind: Rule 13 | services: 14 | - name: {{ include "dask-gateway.apiName" . }} 15 | port: 8000 16 | {{- if ne .Values.gateway.prefix "/"}} 17 | middlewares: 18 | - name: '{{ include "dask-gateway.fullname" . | printf "api-prefix-%s" | trunc 63 | trimSuffix "-" }}' 19 | {{- end }} 20 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/gateway/middleware.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: traefik.io/v1alpha1 2 | kind: Middleware 3 | metadata: 4 | name: {{ include "dask-gateway.fullname" . | printf "clusters-prefix-%s" | trunc 63 | trimSuffix "-" }} 5 | labels: 6 | {{- include "dask-gateway.labels" . | nindent 4 }} 7 | spec: 8 | stripPrefixRegex: 9 | regex: 10 | - '{{ .Values.gateway.prefix | trimSuffix "/" }}/clusters/[a-zA-Z0-9.-]+' 11 | {{- if ne .Values.gateway.prefix "/" }} 12 | --- 13 | apiVersion: traefik.io/v1alpha1 14 | kind: Middleware 15 | metadata: 16 | name: {{ include "dask-gateway.fullname" . | printf "api-prefix-%s" | trunc 63 | trimSuffix "-" }} 17 | labels: 18 | {{- include "dask-gateway.labels" . | nindent 4 }} 19 | spec: 20 | stripPrefix: 21 | prefixes: 22 | - '{{ .Values.gateway.prefix | trimSuffix "/" }}' 23 | {{- end }} 24 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/gateway/rbac.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.enabled -}} 2 | {{- if not .Values.rbac.gateway.serviceAccountName -}} 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: {{ include "dask-gateway.apiName" . }} 7 | labels: 8 | {{- include "dask-gateway.labels" . | nindent 4 }} 9 | --- 10 | apiVersion: rbac.authorization.k8s.io/v1 11 | kind: ClusterRole 12 | metadata: 13 | name: {{ include "dask-gateway.apiName" . }} 14 | labels: 15 | {{- include "dask-gateway.labels" . | nindent 4 }} 16 | rules: 17 | - apiGroups: [""] 18 | resources: ["secrets"] 19 | verbs: ["get"] 20 | - apiGroups: ["gateway.dask.org"] 21 | resources: ["daskclusters"] 22 | verbs: ["*"] 23 | --- 24 | kind: ClusterRoleBinding 25 | apiVersion: rbac.authorization.k8s.io/v1 26 | metadata: 27 | name: {{ include "dask-gateway.apiName" . }} 28 | labels: 29 | {{- include "dask-gateway.labels" . | nindent 4 }} 30 | subjects: 31 | - kind: ServiceAccount 32 | name: {{ include "dask-gateway.apiName" . }} 33 | namespace: {{ .Release.Namespace }} 34 | roleRef: 35 | kind: ClusterRole 36 | name: {{ include "dask-gateway.apiName" . }} 37 | apiGroup: rbac.authorization.k8s.io 38 | {{- end }} 39 | {{- end }} 40 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/gateway/secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (eq .Values.gateway.auth.type "jupyterhub") .Values.gateway.auth.jupyterhub.apiToken -}} 2 | kind: Secret 3 | apiVersion: v1 4 | metadata: 5 | name: {{ include "dask-gateway.apiName" . }} 6 | labels: 7 | {{- include "dask-gateway.labels" . | nindent 4 }} 8 | type: Opaque 9 | data: 10 | jupyterhub-api-token: {{ .Values.gateway.auth.jupyterhub.apiToken | b64enc | quote }} 11 | {{- end }} 12 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/gateway/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "dask-gateway.apiName" . }} 5 | labels: 6 | {{- include "dask-gateway.labels" . | nindent 4 }} 7 | {{- with .Values.gateway.service.annotations }} 8 | annotations: 9 | {{- . | toYaml | nindent 4 }} 10 | {{- end }} 11 | spec: 12 | type: ClusterIP 13 | selector: 14 | {{- include "dask-gateway.matchLabels" . | nindent 4 }} 15 | app.kubernetes.io/component: gateway 16 | ports: 17 | - protocol: TCP 18 | port: 8000 19 | targetPort: 8000 20 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/traefik/dashboard.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.traefik.dashboard -}} 2 | {{- if .Values.traefik.installTraefik -}} 3 | apiVersion: traefik.io/v1alpha1 4 | kind: IngressRoute 5 | metadata: 6 | name: {{ include "dask-gateway.fullname" . | printf "traefik-dashboard-%s" | trunc 63 | trimSuffix "-" }} 7 | labels: 8 | {{- include "dask-gateway.labels" . | nindent 4 }} 9 | spec: 10 | entryPoints: 11 | - traefik 12 | routes: 13 | - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) 14 | kind: Rule 15 | services: 16 | - name: api@internal 17 | kind: TraefikService 18 | {{- end }} 19 | {{- end }} 20 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/traefik/deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.traefik.installTraefik -}} 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ include "dask-gateway.traefikName" . }} 6 | labels: 7 | {{- include "dask-gateway.labels" . | nindent 4 }} 8 | spec: 9 | replicas: {{ .Values.traefik.replicas }} 10 | selector: 11 | matchLabels: 12 | {{- include "dask-gateway.matchLabels" . | nindent 6 }} 13 | app.kubernetes.io/component: traefik 14 | template: 15 | metadata: 16 | labels: 17 | {{- include "dask-gateway.labels" . | nindent 8 }} 18 | app.kubernetes.io/component: traefik 19 | {{- with .Values.traefik.annotations }} 20 | annotations: 21 | {{- . | toYaml | nindent 8 }} 22 | {{- end }} 23 | spec: 24 | {{- if .Values.rbac.enabled }} 25 | {{- if .Values.rbac.traefik.serviceAccountName }} 26 | serviceAccountName: {{ .Values.rbac.traefik.serviceAccountName }} 27 | {{- else }} 28 | serviceAccountName: {{ include "dask-gateway.traefikName" . }} 29 | {{- end }} 30 | {{- end }} 31 | terminationGracePeriodSeconds: 60 32 | containers: 33 | - name: traefik 34 | image: {{ .Values.traefik.image.name }}:{{ .Values.traefik.image.tag }} 35 | imagePullPolicy: {{ .Values.gateway.image.pullPolicy }} 36 | securityContext: 37 | runAsUser: 1000 38 | runAsGroup: 1000 39 | {{- with .Values.traefik.resources }} 40 | resources: 41 | {{- . | toYaml | nindent 12 }} 42 | {{- end }} 43 | # The Dockerfile's entrypoint is traefik the CLI, and we provide args 44 | # to it as documented here: 45 | # https://doc.traefik.io/traefik/reference/static-configuration/cli/ 46 | # 47 | args: 48 | - "--global.checknewversion=false" 49 | - "--global.sendanonymoususage=false" 50 | - "--ping=true" 51 | - "--providers.kubernetescrd" 52 | - "--providers.kubernetescrd.allowcrossnamespace=true" 53 | - '--providers.kubernetescrd.labelselector=gateway.dask.org/instance={{ include "dask-gateway.fullname" . }}' 54 | - "--providers.kubernetescrd.throttleduration=2" 55 | - "--log.level={{ .Values.traefik.loglevel }}" 56 | - "--entrypoints.traefik.address=:9000" 57 | - "--entrypoints.web.address=:8000" 58 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }} 59 | - "--entrypoints.tcp.address=:8786" 60 | {{- end }} 61 | {{- if .Values.traefik.dashboard }} 62 | - "--api.dashboard=true" 63 | - "--api.insecure=true" 64 | {{- end }} 65 | {{- range .Values.traefik.additionalArguments }} 66 | - {{ . | quote }} 67 | {{- end }} 68 | ports: 69 | - name: traefik 70 | containerPort: 9000 71 | - name: web 72 | containerPort: 8000 73 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }} 74 | - name: tcp 75 | containerPort: 8786 76 | {{- end }} 77 | readinessProbe: 78 | httpGet: 79 | path: /ping 80 | port: 9000 81 | failureThreshold: 1 82 | initialDelaySeconds: 10 83 | periodSeconds: 10 84 | successThreshold: 1 85 | timeoutSeconds: 2 86 | livenessProbe: 87 | httpGet: 88 | path: /ping 89 | port: 9000 90 | failureThreshold: 3 91 | initialDelaySeconds: 10 92 | periodSeconds: 10 93 | successThreshold: 1 94 | timeoutSeconds: 2 95 | {{- with .Values.traefik.affinity }} 96 | affinity: 97 | {{- . | toYaml | nindent 8 }} 98 | {{- end }} 99 | {{- with .Values.traefik.tolerations }} 100 | tolerations: 101 | {{- . | toYaml | nindent 8 }} 102 | {{- end }} 103 | {{- with .Values.traefik.nodeSelector }} 104 | nodeSelector: 105 | {{- . | toYaml | nindent 8 }} 106 | {{- end }} 107 | {{- with .Values.traefik.imagePullSecrets }} 108 | imagePullSecrets: 109 | {{- . | toYaml | nindent 8 }} 110 | {{- end }} 111 | {{- end }} 112 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/traefik/rbac.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.enabled -}} 2 | {{- if .Values.traefik.installTraefik -}} 3 | {{- if not .Values.rbac.traefik.serviceAccountName -}} 4 | kind: ServiceAccount 5 | apiVersion: v1 6 | metadata: 7 | name: {{ include "dask-gateway.traefikName" . }} 8 | --- 9 | kind: ClusterRole 10 | apiVersion: rbac.authorization.k8s.io/v1 11 | metadata: 12 | name: {{ include "dask-gateway.traefikName" . }} 13 | # The rules below are from Traefik's Helm chart, most recently 21 Jan 2025 from 14 | # commit 4e15c7c for use with Traefik v3.2.0. 15 | # 16 | # To update them again, you can do: 17 | # 18 | # git clone https://github.com/traefik/traefik-helm-chart 19 | # cd traefik-helm-chart 20 | # helm template traefik --show-only templates/rbac/clusterrole.yaml --set providers.kubernetesIngress.enabled=false 21 | # 22 | rules: 23 | - apiGroups: 24 | - "" 25 | resources: 26 | - nodes 27 | - secrets 28 | - services 29 | verbs: 30 | - get 31 | - list 32 | - watch 33 | - apiGroups: 34 | - discovery.k8s.io 35 | resources: 36 | - endpointslices 37 | verbs: 38 | - list 39 | - watch 40 | - apiGroups: 41 | - extensions 42 | - networking.k8s.io 43 | resources: 44 | - ingressclasses 45 | verbs: 46 | - get 47 | - list 48 | - watch 49 | - apiGroups: 50 | - traefik.io 51 | resources: 52 | - ingressroutes 53 | - ingressroutetcps 54 | - ingressrouteudps 55 | - middlewares 56 | - middlewaretcps 57 | - serverstransports 58 | - serverstransporttcps 59 | - tlsoptions 60 | - tlsstores 61 | - traefikservices 62 | verbs: 63 | - get 64 | - list 65 | - watch 66 | --- 67 | kind: ClusterRoleBinding 68 | apiVersion: rbac.authorization.k8s.io/v1 69 | metadata: 70 | name: {{ include "dask-gateway.traefikName" . }} 71 | roleRef: 72 | apiGroup: rbac.authorization.k8s.io 73 | kind: ClusterRole 74 | name: {{ include "dask-gateway.traefikName" . }} 75 | subjects: 76 | - kind: ServiceAccount 77 | name: {{ include "dask-gateway.traefikName" . }} 78 | namespace: {{ .Release.Namespace }} 79 | {{- end }} 80 | {{- end }} 81 | {{- end }} 82 | -------------------------------------------------------------------------------- /resources/helm/dask-gateway/templates/traefik/service.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.traefik.installTraefik -}} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ include "dask-gateway.traefikName" . }} 6 | labels: 7 | {{- include "dask-gateway.labels" . | nindent 4 }} 8 | {{- with .Values.traefik.service.annotations }} 9 | annotations: 10 | {{- . | toYaml | nindent 4 }} 11 | {{- end }} 12 | spec: 13 | type: {{ .Values.traefik.service.type }} 14 | selector: 15 | {{- include "dask-gateway.matchLabels" . | nindent 4 }} 16 | app.kubernetes.io/component: traefik 17 | {{- with .Values.traefik.service.spec }} 18 | {{- . | toYaml | nindent 2 }} 19 | {{- end }} 20 | ports: 21 | - name: web 22 | targetPort: 8000 23 | port: {{ .Values.traefik.service.ports.web.port }} 24 | {{- with .Values.traefik.service.ports.web.nodePort }} 25 | nodePort: {{ . }} 26 | {{- end }} 27 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }} 28 | - name: tcp 29 | targetPort: 8786 30 | port: {{ .Values.traefik.service.ports.tcp.port }} 31 | {{- with .Values.traefik.service.ports.tcp.nodePort }} 32 | nodePort: {{ . }} 33 | {{- end }} 34 | {{- end }} 35 | {{- if .Values.traefik.dashboard }} 36 | - name: traefik 37 | targetPort: 9000 38 | port: 9000 39 | {{- end }} 40 | {{- end }} 41 | -------------------------------------------------------------------------------- /resources/helm/testing/chart-install-values.yaml: -------------------------------------------------------------------------------- 1 | gateway: 2 | loglevel: DEBUG 3 | prefix: /services/dask-gateway 4 | backend: 5 | scheduler: 6 | cores: 7 | request: 0.1 8 | memory: 9 | request: 256M 10 | worker: 11 | cores: 12 | request: 0.1 13 | memory: 14 | request: 256M 15 | 16 | controller: 17 | loglevel: DEBUG 18 | completedClusterMaxAge: 60 19 | completedClusterCleanupPeriod: 30 20 | 21 | traefik: 22 | loglevel: INFO 23 | service: 24 | ports: 25 | web: 26 | nodePort: 30200 27 | -------------------------------------------------------------------------------- /resources/helm/testing/skaffold.yaml: -------------------------------------------------------------------------------- 1 | gateway: 2 | loglevel: DEBUG 3 | backend: 4 | scheduler: 5 | cores: 6 | request: 0.1 7 | memory: 8 | request: 256M 9 | 10 | worker: 11 | cores: 12 | request: 0.1 13 | memory: 14 | request: 256M 15 | 16 | controller: 17 | loglevel: DEBUG 18 | completedClusterMaxAge: 60 19 | completedClusterCleanupPeriod: 30 20 | 21 | traefik: 22 | loglevel: INFO 23 | -------------------------------------------------------------------------------- /resources/helm/tools/compare-values-schema-content.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script is meant to assist in a manual validation that the content of 4 | values.schema.yaml covers values.yaml, and vice versa. 5 | 6 | FIXME: It would be nice to run this as part of our CI pipeline to report if 7 | values.schema.yaml and values.yaml gets out of sync, but first we need to 8 | address what it means to be out of sync. 9 | 10 | Consider if values.schema.yaml describes extraLabels, and we in this helm chart 11 | have an extra label set in values, how should our comparison realize that 12 | its nothing to bother about? 13 | 14 | That kind of complexity is an issue for labels, resources, 15 | containerSecurityContext, readiness- and livenessProbe's for example. 16 | 17 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is 18 | not yet extracted to be a standalone package, but may be in the future. 19 | """ 20 | 21 | import os 22 | from collections.abc import MutableMapping 23 | 24 | import yaml 25 | 26 | here_dir = os.path.abspath(os.path.dirname(__file__)) 27 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml") 28 | values_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.yaml") 29 | lint_and_validate_values_yaml = os.path.join( 30 | here_dir, os.pardir, "testing", "chart-install-values.yaml" 31 | ) 32 | 33 | 34 | def reduce_schema(d): 35 | """ 36 | Takes a jsonschema loaded as a dictionary and return a reduced structure 37 | ignoring everything apart from the structure it describes. 38 | """ 39 | r = {} 40 | CONTAINS_KEYS = "properties" 41 | if CONTAINS_KEYS in d: 42 | for k, v in d[CONTAINS_KEYS].items(): 43 | if isinstance(v, MutableMapping) and v.get(CONTAINS_KEYS): 44 | r[k] = reduce_schema(v) 45 | else: 46 | r[k] = None 47 | return r 48 | 49 | 50 | def flatten(d, parent_key="", sep="."): 51 | """ 52 | Takes a nested dictionary and return all keys flattened using a separator, 53 | so one element returned would for example be "gateway.image.tag". 54 | """ 55 | items = [] 56 | for k, v in d.items(): 57 | new_key = parent_key + sep + k if parent_key else k 58 | if isinstance(v, MutableMapping): 59 | if v: 60 | items.extend(flatten(v, parent_key=new_key, sep=sep)) 61 | else: 62 | items.append(new_key) 63 | else: 64 | items.append(new_key) 65 | if not parent_key: 66 | return set(items) 67 | else: 68 | return items 69 | 70 | 71 | def run(): 72 | # Using these sets, we can validate further manually by printing the results 73 | # of set operations. 74 | with open(schema_yaml) as f: 75 | schema = yaml.safe_load(f) 76 | with open(values_yaml) as f: 77 | values = yaml.safe_load(f) 78 | # with open(lint_and_validate_values_yaml) as f: 79 | # lint_and_validate_values = yaml.safe_load(f) 80 | 81 | schema = flatten(reduce_schema(schema)) 82 | values = flatten(values) 83 | # lint_and_validate_values = flatten(lint_and_validate_values) 84 | 85 | print( 86 | "The keys from values.yaml minus those from values.schema.yaml:\n", 87 | "\n".join(sorted(values - schema)), 88 | "\n\n", 89 | sep="\n", 90 | ) 91 | print( 92 | "The keys from values.schema.yaml minus those from values.yaml:\n", 93 | "\n".join(sorted(schema - values)), 94 | "\n\n", 95 | sep="\n", 96 | ) 97 | 98 | 99 | run() 100 | -------------------------------------------------------------------------------- /resources/helm/tools/generate-json-schema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script reads values.schema.yaml and generates a values.schema.json that we 4 | can package with the Helm chart. This is allowing the helm CLI to perform 5 | validation of the passed configuration values. 6 | 7 | While we can directly generate a values.schema.json from values.schema.yaml, it 8 | contains a lot of description text we use to generate our configuration 9 | reference that isn't helpful to ship along the validation schema. Due to that, 10 | we trim away everything that isn't needed. 11 | 12 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is 13 | not yet extracted to be a standalone package, but may be in the future. 14 | """ 15 | 16 | import json 17 | import os 18 | from collections.abc import MutableMapping 19 | 20 | import yaml 21 | 22 | here_dir = os.path.abspath(os.path.dirname(__file__)) 23 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml") 24 | values_schema_json = os.path.join( 25 | here_dir, os.pardir, "dask-gateway", "values.schema.json" 26 | ) 27 | 28 | 29 | def clean_jsonschema(d, parent_key=""): 30 | """ 31 | Modifies a dictionary representing a jsonschema in place to not contain 32 | jsonschema keys not relevant for a values.schema.json file solely for use by 33 | the helm CLI. 34 | """ 35 | JSONSCHEMA_KEYS_TO_REMOVE = {"description"} 36 | 37 | # start by cleaning up the current level 38 | for k in set.intersection(JSONSCHEMA_KEYS_TO_REMOVE, set(d.keys())): 39 | del d[k] 40 | 41 | # Recursively cleanup nested levels, bypassing one level where there could 42 | # be a valid Helm chart configuration named just like the jsonschema 43 | # specific key to remove. 44 | if "properties" in d: 45 | for k, v in d["properties"].items(): 46 | if isinstance(v, MutableMapping): 47 | clean_jsonschema(v, k) 48 | 49 | 50 | def run(): 51 | # Using these sets, we can validate further manually by printing the results 52 | # of set operations. 53 | with open(schema_yaml) as f: 54 | schema = yaml.safe_load(f) 55 | 56 | # Drop what isn't relevant for a values.schema.json file packaged with the 57 | # Helm chart, such as the description keys only relevant for our 58 | # configuration reference. 59 | clean_jsonschema(schema) 60 | 61 | # dump schema to values.schema.json 62 | with open(values_schema_json, "w") as f: 63 | json.dump(schema, f) 64 | 65 | print("dask-gateway/values.schema.json created") 66 | 67 | 68 | run() 69 | -------------------------------------------------------------------------------- /resources/helm/tools/validate-against-schema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This scripts validates the charts default values against the values.schema.yaml 4 | file, and optionally also another file against the values.schema.yaml. 5 | 6 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is 7 | not yet extracted to be a standalone package, but may be in the future. 8 | """ 9 | 10 | import os 11 | 12 | import jsonschema 13 | import yaml 14 | 15 | here_dir = os.path.abspath(os.path.dirname(__file__)) 16 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml") 17 | values_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.yaml") 18 | lint_and_validate_values_yaml = os.path.join( 19 | here_dir, os.pardir, "testing", "chart-install-values.yaml" 20 | ) 21 | 22 | with open(schema_yaml) as f: 23 | schema = yaml.safe_load(f) 24 | with open(values_yaml) as f: 25 | values = yaml.safe_load(f) 26 | with open(lint_and_validate_values_yaml) as f: 27 | lint_and_validate_values = yaml.safe_load(f) 28 | 29 | # Validate values.yaml against schema 30 | print("Validating values.yaml against values.schema.yaml...") 31 | jsonschema.validate(values, schema) 32 | print("OK!") 33 | print() 34 | 35 | # FIXME: Create a lint-and-validate-values.yaml file that covers all kinds of 36 | # configuration properly and let it be tested to function with the schema 37 | # and successfully render valid k8s templates. 38 | # 39 | # # Validate chart-install-values.yaml against schema 40 | # print("Validating chart-install-values.yaml against values.schema.yaml...") 41 | # jsonschema.validate(lint_and_validate_values, schema) 42 | # print("OK!") 43 | -------------------------------------------------------------------------------- /skaffold.yaml: -------------------------------------------------------------------------------- 1 | # "skaffold" is a command line tool we can use to rebuild images for a Helm 2 | # chart and even do some "live reload" of parts of an installation of a Helm 3 | # chart. 4 | # 5 | # skaffolds purpose in this project is to be of assistance for local 6 | # development, while we use the tool "chartpress" for testing and publishing of 7 | # the Helm chart in our GitHub Workflows. 8 | # 9 | # Skaffold configuration reference: https://skaffold.dev/docs/references/yaml/ 10 | # 11 | # FIXME: 12 | # - Add notes on how to use skaffold 13 | # - Update the skaffold/v2alpha3 configuration to a modern one 14 | # 15 | apiVersion: skaffold/v2alpha3 16 | kind: Config 17 | 18 | build: 19 | local: 20 | push: false 21 | useBuildkit: true 22 | artifacts: 23 | - image: ghcr.io/dask/dask-gateway-server 24 | context: ./dask-gateway-server 25 | docker: 26 | dockerfile: Dockerfile 27 | - image: ghcr.io/dask/dask-gateway 28 | context: ./dask-gateway 29 | docker: 30 | dockerfile: Dockerfile 31 | 32 | deploy: 33 | helm: 34 | releases: 35 | - name: dask-gateway 36 | chartPath: resources/helm/dask-gateway/ 37 | namespace: default 38 | imageStrategy: 39 | helm: {} 40 | values: 41 | gateway.image: ghcr.io/dask/dask-gateway-server 42 | gateway.backend.image: ghcr.io/dask/dask-gateway 43 | controller.image: ghcr.io/dask/dask-gateway-server 44 | valuesFiles: 45 | - resources/helm/testing/skaffold.yaml 46 | flags: 47 | upgrade: 48 | - --install 49 | 50 | profiles: 51 | - name: local-controller 52 | patches: 53 | - op: add 54 | path: /deploy/helm/releases/0/setValues 55 | value: 56 | controller.enabled: false 57 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture(autouse=True) 7 | def reset_logs(): 8 | # PDB's stdout/stderr capture can close fds that our loggers are configured 9 | # to write to. To prevent this, reset the log handlers before every test. 10 | logging.getLogger("DaskGateway").handlers.clear() 11 | 12 | 13 | def pytest_configure(config): 14 | # Adds a marker here, rather than setup.cfg, since the repository has two packages. 15 | config.addinivalue_line("markers", "kubernetes: marks a test as kubernetes-related") 16 | -------------------------------------------------------------------------------- /tests/kubernetes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/kubernetes/__init__.py -------------------------------------------------------------------------------- /tests/kubernetes/test_helm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/kubernetes/test_helm.py -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file describes the requirements to test the Python code in dask-gateway 2 | # and dask-gateway server. 3 | # 4 | # This is how you would install and run most tests: 5 | # 6 | # pip install -r tests/requirements.txt 7 | # pytest 8 | # 9 | # 10 | # FIXME: 11 | # - Make kubernetes test like other backend tests, something you opt into rather 12 | # than out out of. 13 | # 14 | 15 | # chartpress helps update the Helm chart's Chart.yaml and values.yaml with 16 | # tagged images etc. 17 | chartpress 18 | 19 | # pyyaml is used by our generate-json-schema.py script. 20 | pyyaml 21 | 22 | pytest 23 | pytest-asyncio 24 | pytest-timeout 25 | 26 | # dask-gateway and dask-gateway-server and all their dependencies are assumed to 27 | # be installed. 28 | --editable="./dask-gateway" 29 | --editable="./dask-gateway-server[all_backends]" 30 | 31 | # ipython and ipywidget is optional integrations allowing for fancy rendering of 32 | # end user provided configuration options. Tests in test_options.py will be 33 | # skipped without this installed. 34 | ipython 35 | ipywidgets 36 | 37 | # bokeh needs to be installed for test_dashboard_link_from_public_address to not 38 | # be skipped. 39 | # 40 | # FIXME: clarify why bokeh is needed for this test. 41 | # 42 | bokeh 43 | 44 | # trustme is a utility used in the code of the test ca_and_tls_proxy in 45 | # test_proxies.py. 46 | trustme 47 | 48 | # IMPORTANT: These environment variables indicating tests should be run with 49 | # integration against external dask cluster providers (backends). 50 | # 51 | # For this to work, there needs to be various things running in the 52 | # background. 53 | # 54 | # TEST_DASK_GATEWAY_YARN - test_yarn_backend.py, and test_kerberos_auth in test_auth.py 55 | # TEST_DASK_GATEWAY_PBS - test_pbs_backend.py 56 | # TEST_DASK_GATEWAY_SLURM - test_slurm_backend.py 57 | # TEST_DASK_GATEWAY_KUBE - kubernetes/test_integration.py 58 | # 59 | # TEST_DASK_GATEWAY_KUBE_ADDRESS is also used to describe how to reach the 60 | # traefik pod used as a proxy to access dask-gateway-server running in the api 61 | # pod. 62 | # 63 | 64 | # IMPORTANT: Not installed Python packages with system dependencies 65 | # 66 | # - To run tests related to KerberosAuthenticator, you need to install 67 | # pykerberos which is tricky to install with pip but easy with conda. For 68 | # example, to install pykerberos with pip on ubunutu, you need to first 69 | # install the apt package libkrb5-dev. 70 | # - To run tests related to JupyterHubAuthenticator, you need to install 71 | # jupyterhub and the Node npm package configurable-http-proxy that JupyterHub 72 | # depends on to route traffic. 73 | # - To run tests related to the Helm chart, you need to install the helm CLI. 74 | # 75 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dask_gateway_server.app import DaskGateway 5 | from dask_gateway_server.proxy.core import _PROXY_EXE, ProxyApp 6 | 7 | 8 | def test_generate_config(tmpdir, capfd): 9 | cfg_file = str(tmpdir.join("dask_gateway_config.py")) 10 | orig_text = "c.foo = 'bar'" 11 | 12 | with open(cfg_file, "w") as f: 13 | f.write(orig_text) 14 | 15 | with pytest.raises(SystemExit) as exc: 16 | DaskGateway.launch_instance(["generate-config", "--output", cfg_file]) 17 | DaskGateway.clear_instance() 18 | assert "already exists" in exc.value.code 19 | out, err = capfd.readouterr() 20 | assert not out 21 | assert not err 22 | 23 | assert os.path.exists(cfg_file) 24 | with open(cfg_file) as f: 25 | cfg_text = f.read() 26 | assert cfg_text == orig_text 27 | 28 | DaskGateway.launch_instance(["generate-config", "--force", "--output", cfg_file]) 29 | DaskGateway.clear_instance() 30 | out, err = capfd.readouterr() 31 | assert cfg_file in out 32 | assert not err 33 | 34 | with open(cfg_file) as f: 35 | cfg_text = f.read() 36 | 37 | assert "DaskGateway.backend_class" in cfg_text 38 | assert "Backend.cluster_options" in cfg_text 39 | 40 | 41 | def test_proxy_cli(tmpdir, monkeypatch): 42 | cfg_file = str(tmpdir.join("dask_gateway_config.py")) 43 | 44 | text = ( 45 | "c.DaskGateway.address = '127.0.0.1:8888'\n" 46 | "c.Proxy.address = '127.0.0.1:8866'\n" 47 | "c.Proxy.tcp_address = '127.0.0.1:8867'\n" 48 | "c.Proxy.log_level = 'debug'\n" 49 | "c.Proxy.api_token = 'abcde'" 50 | ) 51 | with open(cfg_file, "w") as f: 52 | f.write(text) 53 | 54 | called_with = [] 55 | 56 | def mock_execle(*args): 57 | called_with.extend(args) 58 | 59 | monkeypatch.setattr(os, "execle", mock_execle) 60 | DaskGateway.launch_instance(["proxy", "-f", cfg_file, "--log-level", "warn"]) 61 | DaskGateway.clear_instance() 62 | ProxyApp.clear_instance() 63 | 64 | assert called_with 65 | env = called_with.pop() 66 | 67 | assert called_with == [ 68 | _PROXY_EXE, 69 | "dask-gateway-proxy", 70 | "-address", 71 | "127.0.0.1:8866", 72 | "-tcp-address", 73 | "127.0.0.1:8867", 74 | "-api-url", 75 | "http://127.0.0.1:8888/api/v1/routes", 76 | "-log-level", 77 | "warn", 78 | ] 79 | 80 | assert "DASK_GATEWAY_PROXY_TOKEN" in env 81 | -------------------------------------------------------------------------------- /tests/test_local_backend.py: -------------------------------------------------------------------------------- 1 | from .utils_test import ( 2 | LocalTestingBackend, 3 | temp_gateway, 4 | wait_for_workers, 5 | with_retries, 6 | ) 7 | 8 | 9 | async def test_local_cluster_backend(): 10 | async with temp_gateway(backend_class=LocalTestingBackend) as g: 11 | async with g.gateway_client() as gateway: 12 | async with gateway.new_cluster() as cluster: 13 | db_cluster = g.gateway.backend.db.get_cluster(cluster.name) 14 | 15 | res = await g.gateway.backend.do_check_clusters([db_cluster]) 16 | assert res == [True] 17 | 18 | await cluster.scale(3) 19 | await wait_for_workers(cluster, exact=3) 20 | await cluster.scale(1) 21 | await wait_for_workers(cluster, exact=1) 22 | 23 | db_workers = list(db_cluster.workers.values()) 24 | 25 | async def test(): 26 | res = await g.gateway.backend.do_check_workers(db_workers) 27 | assert sum(res) == 1 28 | 29 | await with_retries(test, 20, 0.5) 30 | 31 | async with cluster.get_client(set_as_default=False) as client: 32 | res = await client.submit(lambda x: x + 1, 1) 33 | assert res == 2 34 | 35 | await cluster.scale(0) 36 | await wait_for_workers(cluster, exact=0) 37 | 38 | async def test(): 39 | res = await g.gateway.backend.do_check_workers(db_workers) 40 | assert sum(res) == 0 41 | 42 | await with_retries(test, 20, 0.5) 43 | 44 | # No-op for shutdown of already shutdown worker 45 | db_worker = db_workers[0] 46 | res = await g.gateway.backend.do_stop_worker(db_worker) 47 | 48 | async def test(): 49 | res = await g.gateway.backend.do_check_clusters([db_cluster]) 50 | assert res == [False] 51 | 52 | await with_retries(test, 20, 0.5) 53 | -------------------------------------------------------------------------------- /tests/test_traitlets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dask_gateway_server.traitlets import Command, Type 3 | from traitlets import HasTraits, TraitError 4 | 5 | 6 | def test_Type_traitlet(): 7 | class Foo(HasTraits): 8 | typ = Type(klass="dask_gateway_server.auth.Authenticator") 9 | 10 | with pytest.raises(TraitError) as exc: 11 | Foo(typ="dask_gateway_server.auth.not_a_real_path") 12 | assert "Failed to import" in str(exc.value) 13 | 14 | Foo(typ="dask_gateway_server.auth.SimpleAuthenticator") 15 | 16 | 17 | def test_Command_traitlet(): 18 | class C(HasTraits): 19 | cmd = Command("default command") 20 | cmd2 = Command(["default_cmd"]) 21 | 22 | c = C() 23 | assert c.cmd == ["default command"] 24 | assert c.cmd2 == ["default_cmd"] 25 | c.cmd = "foo bar" 26 | assert c.cmd == ["foo bar"] 27 | 28 | 29 | def test_worker_threads_kube_cluster(): 30 | kube_backend = pytest.importorskip("dask_gateway_server.backends.kubernetes") 31 | # KubeClusterConfig allows floats, so determining worker_threads is more complex 32 | assert kube_backend.KubeClusterConfig().worker_threads == 1 33 | assert kube_backend.KubeClusterConfig(worker_threads=None).worker_threads == 1 34 | assert kube_backend.KubeClusterConfig(worker_cores=0.1).worker_threads == 1 35 | assert kube_backend.KubeClusterConfig(worker_cores_limit=0.1).worker_threads == 1 36 | 37 | assert kube_backend.KubeClusterConfig(worker_cores=2.1).worker_threads == 2 38 | assert kube_backend.KubeClusterConfig(worker_cores_limit=2.1).worker_threads == 1 39 | assert ( 40 | kube_backend.KubeClusterConfig( 41 | worker_cores=2.1, worker_threads=None 42 | ).worker_threads 43 | == 2 44 | ) 45 | assert ( 46 | kube_backend.KubeClusterConfig( 47 | worker_cores_limit=2.1, worker_threads=None 48 | ).worker_threads 49 | == 1 50 | ) 51 | assert ( 52 | kube_backend.KubeClusterConfig( 53 | worker_cores=2.1, worker_threads=1 54 | ).worker_threads 55 | == 1 56 | ) 57 | assert ( 58 | kube_backend.KubeClusterConfig( 59 | worker_cores_limit=2.1, worker_threads=1 60 | ).worker_threads 61 | == 1 62 | ) 63 | -------------------------------------------------------------------------------- /tests/test_yarn_backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from traitlets.config import Config 5 | 6 | skein = pytest.importorskip("skein") 7 | 8 | if not os.environ.get("TEST_DASK_GATEWAY_YARN"): 9 | pytest.skip("Not running YARN tests", allow_module_level=True) 10 | 11 | from dask_gateway.auth import BasicAuth 12 | from dask_gateway_server.backends.yarn import YarnBackend 13 | 14 | from .utils_test import temp_gateway, wait_for_workers, with_retries 15 | 16 | pytestmark = pytest.mark.usefixtures("cleanup_applications") 17 | 18 | 19 | _APPIDS = set() 20 | 21 | 22 | @pytest.fixture(scope="module") 23 | def cleanup_applications(): 24 | yield 25 | 26 | if not _APPIDS: 27 | return 28 | 29 | with skein.Client(principal="dask", keytab="/home/dask/dask.keytab") as client: 30 | for appid in _APPIDS: 31 | try: 32 | client.kill_application(appid) 33 | except OSError: 34 | pass 35 | print("-- Stopped %d lost clusters --" % len(_APPIDS)) 36 | 37 | 38 | class YarnTestingBackend(YarnBackend): 39 | async def do_start_cluster(self, cluster): 40 | async for state in super().do_start_cluster(cluster): 41 | _APPIDS.add(state["app_id"]) 42 | yield state 43 | 44 | async def do_stop_cluster(self, cluster): 45 | appid = cluster.state.get("app_id") 46 | await super().do_stop_cluster(cluster) 47 | _APPIDS.discard(appid) 48 | 49 | 50 | @pytest.mark.timeout(90) 51 | async def test_yarn_backend(): 52 | c = Config() 53 | c.YarnClusterConfig.scheduler_cmd = "/opt/python/bin/dask-scheduler" 54 | c.YarnClusterConfig.worker_cmd = "/opt/python/bin/dask-worker" 55 | c.YarnClusterConfig.scheduler_memory = "256M" 56 | c.YarnClusterConfig.worker_memory = "256M" 57 | c.YarnClusterConfig.scheduler_cores = 1 58 | c.YarnClusterConfig.worker_cores = 1 59 | 60 | c.YarnBackend.keytab = "/home/dask/dask.keytab" 61 | c.YarnBackend.principal = "dask" 62 | 63 | c.DaskGateway.backend_class = YarnTestingBackend 64 | 65 | async with temp_gateway(config=c) as g: 66 | auth = BasicAuth(username="alice") 67 | async with g.gateway_client(auth=auth) as gateway: 68 | async with gateway.new_cluster() as cluster: 69 | db_cluster = g.gateway.backend.db.get_cluster(cluster.name) 70 | 71 | res = await g.gateway.backend.do_check_clusters([db_cluster]) 72 | assert res == [True] 73 | 74 | await cluster.scale(2) 75 | await wait_for_workers(cluster, exact=2) 76 | await cluster.scale(1) 77 | await wait_for_workers(cluster, exact=1) 78 | 79 | db_workers = list(db_cluster.workers.values()) 80 | 81 | async def test(): 82 | res = await g.gateway.backend.do_check_workers(db_workers) 83 | assert sum(res) == 1 84 | 85 | await with_retries(test, 30, 0.25) 86 | 87 | async with cluster.get_client(set_as_default=False) as client: 88 | res = await client.submit(lambda x: x + 1, 1) 89 | assert res == 2 90 | 91 | await cluster.scale(0) 92 | await wait_for_workers(cluster, exact=0) 93 | 94 | async def test(): 95 | res = await g.gateway.backend.do_check_workers(db_workers) 96 | assert sum(res) == 0 97 | 98 | await with_retries(test, 30, 0.25) 99 | 100 | # No-op for shutdown of already shutdown worker 101 | async def test(): 102 | res = await g.gateway.backend.do_check_clusters([db_cluster]) 103 | assert res == [False] 104 | 105 | await with_retries(test, 30, 0.25) 106 | --------------------------------------------------------------------------------