├── .flake8
├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── build-publish-docs.yaml
│ ├── build-publish-helm-chart.yaml
│ ├── build-publish-python-packages.yaml
│ ├── refreeze-dockerfile-requirements-txt.yaml
│ └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.rst
├── RELEASE.md
├── continuous_integration
├── docker
│ ├── README.md
│ ├── base
│ │ ├── Dockerfile
│ │ └── files
│ │ │ └── etc
│ │ │ └── sudoers.d
│ │ │ └── preserve_path
│ ├── hadoop
│ │ ├── Dockerfile
│ │ ├── _install.sh
│ │ ├── _print_logs.sh
│ │ ├── _test.sh
│ │ ├── files
│ │ │ ├── etc
│ │ │ │ ├── hadoop
│ │ │ │ │ ├── conf.kerberos
│ │ │ │ │ │ ├── capacity-scheduler.xml
│ │ │ │ │ │ ├── container-executor.cfg
│ │ │ │ │ │ ├── core-site.xml
│ │ │ │ │ │ ├── hdfs-site.xml
│ │ │ │ │ │ └── yarn-site.xml
│ │ │ │ │ └── conf.simple
│ │ │ │ │ │ ├── core-site.xml
│ │ │ │ │ │ └── hdfs-site.xml
│ │ │ │ ├── krb5.conf
│ │ │ │ ├── supervisord.conf
│ │ │ │ └── supervisord.d
│ │ │ │ │ ├── hdfs-datanode.conf
│ │ │ │ │ ├── hdfs-namenode.conf
│ │ │ │ │ ├── kerberos.conf
│ │ │ │ │ ├── yarn-nodemanager.conf
│ │ │ │ │ └── yarn-resourcemanager.conf
│ │ │ ├── scripts
│ │ │ │ ├── init-hdfs.sh
│ │ │ │ ├── setup-hadoop.sh
│ │ │ │ └── setup-kerb.sh
│ │ │ └── var
│ │ │ │ └── kerberos
│ │ │ │ └── krb5kdc
│ │ │ │ ├── kadm5.acl
│ │ │ │ └── kdc.conf
│ │ ├── install.sh
│ │ ├── print_logs.sh
│ │ ├── start.sh
│ │ └── test.sh
│ ├── pbs
│ │ ├── Dockerfile
│ │ ├── _install.sh
│ │ ├── _test.sh
│ │ ├── files
│ │ │ ├── etc
│ │ │ │ └── sudoers.d
│ │ │ │ │ └── dask
│ │ │ └── scripts
│ │ │ │ └── start.sh
│ │ ├── install.sh
│ │ ├── print_logs.sh
│ │ ├── start.sh
│ │ └── test.sh
│ └── slurm
│ │ ├── Dockerfile
│ │ ├── _install.sh
│ │ ├── _print_logs.sh
│ │ ├── _test.sh
│ │ ├── files
│ │ ├── etc
│ │ │ ├── slurm
│ │ │ │ ├── cgroup.conf
│ │ │ │ ├── slurm.conf
│ │ │ │ └── slurmdbd.conf
│ │ │ ├── sudoers.d
│ │ │ │ └── dask
│ │ │ ├── supervisord.conf
│ │ │ └── supervisord.d
│ │ │ │ └── slurm.conf
│ │ └── scripts
│ │ │ └── init-mysql.sh
│ │ ├── install.sh
│ │ ├── print_logs.sh
│ │ ├── start.sh
│ │ └── test.sh
└── kubernetes
│ ├── build-publish-helm-chart.sh
│ └── k3d-create.sh
├── dask-gateway-server
├── .dockerignore
├── .gitignore
├── Dockerfile
├── Dockerfile.requirements.in
├── Dockerfile.requirements.txt
├── LICENSE
├── README.rst
├── dask-gateway-proxy
│ ├── .gitignore
│ ├── README.md
│ ├── cmd
│ │ └── dask-gateway-proxy
│ │ │ └── main.go
│ ├── go.mod
│ ├── go.sum
│ ├── internal
│ │ └── logging
│ │ │ └── logging.go
│ └── pkg
│ │ ├── router
│ │ ├── router.go
│ │ └── router_test.go
│ │ └── sni
│ │ └── sni.go
├── dask_gateway_server
│ ├── __init__.py
│ ├── __main__.py
│ ├── _version.py
│ ├── app.py
│ ├── auth.py
│ ├── backends
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── db_base.py
│ │ ├── inprocess.py
│ │ ├── jobqueue
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── launcher.py
│ │ │ ├── pbs.py
│ │ │ └── slurm.py
│ │ ├── kubernetes
│ │ │ ├── __init__.py
│ │ │ ├── backend.py
│ │ │ ├── controller.py
│ │ │ └── utils.py
│ │ ├── local.py
│ │ └── yarn.py
│ ├── models.py
│ ├── options.py
│ ├── proxy
│ │ ├── __init__.py
│ │ └── core.py
│ ├── routes.py
│ ├── tls.py
│ ├── traitlets.py
│ ├── utils.py
│ └── workqueue.py
├── hatch_build.py
└── pyproject.toml
├── dask-gateway
├── .dockerignore
├── .gitignore
├── Dockerfile
├── Dockerfile.requirements.in
├── Dockerfile.requirements.txt
├── LICENSE
├── README.rst
├── dask_gateway
│ ├── __init__.py
│ ├── _version.py
│ ├── auth.py
│ ├── client.py
│ ├── comm.py
│ ├── config.py
│ ├── gateway.yaml
│ ├── options.py
│ ├── scheduler_preload.py
│ └── utils.py
└── pyproject.toml
├── dev-environment.yaml
├── docs
├── Makefile
├── requirements.txt
└── source
│ ├── _images
│ ├── adapt-widget.png
│ ├── architecture-k8s.svg
│ ├── architecture.svg
│ ├── options-widget.png
│ └── scale-widget.png
│ ├── api-client.rst
│ ├── api-server.rst
│ ├── authentication.rst
│ ├── changelog.md
│ ├── cluster-options.rst
│ ├── conf.py
│ ├── configuration-user.rst
│ ├── develop.rst
│ ├── index.rst
│ ├── install-hadoop.rst
│ ├── install-jobqueue.rst
│ ├── install-kube.rst
│ ├── install-local.rst
│ ├── install-user.rst
│ ├── resource-limits.rst
│ ├── security.rst
│ └── usage.rst
├── pyproject.toml
├── resources
├── README.rst
└── helm
│ ├── README.rst
│ ├── chartpress.yaml
│ ├── dask-gateway
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── crds
│ │ ├── daskclusters.yaml
│ │ └── traefik.yaml
│ ├── extensions
│ │ ├── README.rst
│ │ └── gateway
│ │ │ └── .gitkeep
│ ├── templates
│ │ ├── NOTES.txt
│ │ ├── _helpers.tpl
│ │ ├── controller
│ │ │ ├── configmap.yaml
│ │ │ ├── deployment.yaml
│ │ │ └── rbac.yaml
│ │ ├── gateway
│ │ │ ├── configmap.yaml
│ │ │ ├── deployment.yaml
│ │ │ ├── ingressroute.yaml
│ │ │ ├── middleware.yaml
│ │ │ ├── rbac.yaml
│ │ │ ├── secret.yaml
│ │ │ └── service.yaml
│ │ └── traefik
│ │ │ ├── dashboard.yaml
│ │ │ ├── deployment.yaml
│ │ │ ├── rbac.yaml
│ │ │ └── service.yaml
│ ├── values.schema.yaml
│ └── values.yaml
│ ├── testing
│ ├── chart-install-values.yaml
│ └── skaffold.yaml
│ └── tools
│ ├── compare-values-schema-content.py
│ ├── generate-json-schema.py
│ └── validate-against-schema.py
├── skaffold.yaml
└── tests
├── __init__.py
├── conftest.py
├── kubernetes
├── __init__.py
├── test_helm.py
├── test_integration.py
└── test_methods.py
├── requirements.txt
├── test_auth.py
├── test_cli.py
├── test_client.py
├── test_db_backend.py
├── test_local_backend.py
├── test_options.py
├── test_pbs_backend.py
├── test_proxies.py
├── test_slurm_backend.py
├── test_traitlets.py
├── test_utils.py
├── test_workqueue.py
├── test_yarn_backend.py
└── utils_test.py
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | __init__.py,
4 | _version.py
5 | ignore =
6 | # Import formatting
7 | E4,
8 | # Space before :
9 | E203,
10 | # Comparing types instead of isinstance
11 | E721,
12 | # Assign a lambda
13 | E731,
14 | # Ambiguous variable names
15 | E741,
16 | # Allow breaks before/after binary operators
17 | W503,
18 | W504
19 |
20 | # black is set to 88, but isn't a strict limit so we add some wiggle room for
21 | # flake8 testing.
22 | max-line-length = 100
23 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | dask-gateway/dask_gateway/_version.py export-subst
2 | dask-gateway-server/dask_gateway_server/_version.py export-subst
3 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # dependabot.yml reference: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
2 | #
3 | # Notes:
4 | # - Status and logs from dependabot are provided at
5 | # https://github.com/dask/dask-gateway/network/updates.
6 | # - YAML anchors are not supported here or in GitHub Workflows.
7 | #
8 | version: 2
9 | updates:
10 | # Update actions in our workflows to their latest releases
11 | - package-ecosystem: github-actions
12 | directory: /
13 | schedule:
14 | interval: monthly
15 | time: "05:00"
16 | timezone: Etc/UTC
17 | labels:
18 | - ci
19 |
--------------------------------------------------------------------------------
/.github/workflows/build-publish-docs.yaml:
--------------------------------------------------------------------------------
1 | # This is a GitHub workflow defining a set of jobs with a set of steps.
2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions
3 | #
4 | name: Build and publish documentation
5 |
6 | on:
7 | pull_request:
8 | paths:
9 | - "docs/**"
10 | - "dask-gateway/**"
11 | - "dask-gateway-server/**"
12 | - ".github/workflows/build-publish-docs.yaml"
13 | push:
14 | paths:
15 | - "docs/**"
16 | - "dask-gateway/**"
17 | - "dask-gateway-server/**"
18 | - ".github/workflows/build-publish-docs.yaml"
19 | branches: ["main"]
20 | tags: ["**"]
21 | workflow_dispatch:
22 |
23 | env:
24 | commit_msg: ${{ github.event.head_commit.message }}
25 |
26 | jobs:
27 | build-and-publish-docs:
28 | name: Build and publish documentation
29 | runs-on: ubuntu-24.04
30 |
31 | # permissions requested for secrets.github_token in order to push to the
32 | # gh-pages branch, available for push and workflow_dispatch triggers.
33 | permissions:
34 | contents: write
35 |
36 | steps:
37 | - uses: actions/checkout@v4
38 | - uses: actions/setup-python@v5
39 | with:
40 | python-version: "3.11"
41 | # ref https://github.com/dask/dask-sphinx-theme/issues/68
42 |
43 | - name: Install Python docs requirements
44 | run: |
45 | DASK_GATEWAY_SERVER__NO_PROXY=true pip install -r docs/requirements.txt
46 |
47 | - name: Build docs (make html)
48 | run: |
49 | cd docs
50 | make html SPHINXOPTS='--color -W --keep-going'
51 |
52 | - name: Push built docs to gh-pages branch
53 | uses: JamesIves/github-pages-deploy-action@releases/v4
54 | if: github.event_name == 'push' && github.ref == 'refs/heads/main'
55 | with:
56 | branch: gh-pages
57 | folder: docs/_build/html/
58 |
59 | linkcheck-docs:
60 | name: Test links in docs
61 | runs-on: ubuntu-24.04
62 |
63 | steps:
64 | - uses: actions/checkout@v4
65 | - uses: actions/setup-python@v5
66 | with:
67 | python-version: "3.11"
68 | # ref https://github.com/dask/dask-sphinx-theme/issues/68
69 | - name: Install Python docs requirements
70 | run: |
71 | DASK_GATEWAY_SERVER__NO_PROXY=true pip install -r docs/requirements.txt
72 |
73 | - name: Linkcheck docs (make linkcheck)
74 | run: |
75 | cd docs
76 | make linkcheck SPHINXOPTS='--color -W --keep-going'
77 |
--------------------------------------------------------------------------------
/.github/workflows/build-publish-helm-chart.yaml:
--------------------------------------------------------------------------------
1 | # This is a GitHub workflow defining a set of jobs with a set of steps.
2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions
3 | #
4 | name: Build and publish Helm chart
5 |
6 | on:
7 | push:
8 | tags: ["**"]
9 | workflow_dispatch:
10 |
11 | jobs:
12 | build-publish-helm-chart:
13 | name: "Build and publish Helm chart"
14 | runs-on: ubuntu-24.04
15 |
16 | # permissions requested for secrets.github_token in order to push to the
17 | # container registry, available for push and workflow_dispatch triggers.
18 | permissions:
19 | contents: read
20 | packages: write
21 |
22 | steps:
23 | - uses: actions/checkout@v4
24 | with:
25 | # chartpress requires git history to set chart version and image tags
26 | # correctly
27 | fetch-depth: 0
28 | - uses: actions/setup-python@v5
29 | with:
30 | python-version: "3.13"
31 |
32 | - name: Install chart publishing dependencies (chartpress, pyyaml, helm)
33 | run: |
34 | pip install chartpress pyyaml
35 | pip list
36 |
37 | echo "Helm is already installed"
38 | helm version
39 |
40 | - name: Set up QEMU (for docker buildx)
41 | uses: docker/setup-qemu-action@v3
42 |
43 | - name: Set up Docker Buildx (for multi-arch builds)
44 | uses: docker/setup-buildx-action@v3
45 |
46 | - name: Login to container registry
47 | run: echo "${{ secrets.github_token }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
48 |
49 | # chartpress pushes a packages Helm chart to dask/helm-chart's gh-pages
50 | # branch, so we need to have a git user.email and user.name configured
51 | - name: Configure a git user
52 | run: |
53 | git config --global user.email "github-actions@example.local"
54 | git config --global user.name "GitHub Actions user"
55 |
56 | - name: Generate values.schema.json from YAML equivalent
57 | run: resources/helm/tools/generate-json-schema.py
58 |
59 | - name: Build and publish Helm chart with chartpress
60 | env:
61 | # chartpress can make use of a personal access token by setting these
62 | # environment variables like this, for details see:
63 | # https://github.com/jupyterhub/chartpress/blob/d4e2346d50f0724f6bee387f4f8aebc108afb648/chartpress.py#L118-L128
64 | #
65 | GITHUB_ACTOR: ""
66 | GITHUB_TOKEN: "${{ secrets.dask_bot_token }}"
67 | # DOCKER_BUILDKIT is required for building images with --mount flags,
68 | # as used in dask-gateway/Dockerfile.
69 | DOCKER_BUILDKIT: "1"
70 | run: continuous_integration/kubernetes/build-publish-helm-chart.sh
71 |
--------------------------------------------------------------------------------
/.github/workflows/refreeze-dockerfile-requirements-txt.yaml:
--------------------------------------------------------------------------------
1 | # This is a GitHub workflow defining a set of jobs with a set of steps.
2 | # ref: https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions
3 | #
4 | name: Refreeze Dockerfile.requirements.txt
5 |
6 | on:
7 | push:
8 | paths:
9 | - "**/Dockerfile"
10 | - "**/Dockerfile.requirements.in"
11 | - "**/Dockerfile.requirements.txt"
12 | - ".github/workflows/refreeze-dockerfile-requirements-txt.yaml"
13 | branches: ["main"]
14 | workflow_dispatch:
15 |
16 | jobs:
17 | refreeze-dockerfile-requirements-txt:
18 | name: Refreeze Dockerfile.requirements.txt
19 |
20 | # Don't run this job on forks
21 | if: github.repository == 'dask/dask-gateway'
22 | runs-on: ubuntu-24.04
23 |
24 | strategy:
25 | fail-fast: false
26 | matrix:
27 | include:
28 | - image: dask-gateway
29 | - image: dask-gateway-server
30 |
31 | steps:
32 | - uses: actions/checkout@v4
33 |
34 | - name: Refreeze Dockerfile.requirements.txt based on Dockerfile.requirements.in
35 | run: |
36 | cd ${{ matrix.image }}
37 | docker run --rm \
38 | --env=CUSTOM_COMPILE_COMMAND='Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml' \
39 | --env=DASK_GATEWAY_SERVER__NO_PROXY=1 \
40 | --volume=$PWD:/opt/${{ matrix.image }} \
41 | --workdir=/opt/${{ matrix.image }} \
42 | --user=root \
43 | python:3.13-slim-bullseye \
44 | sh -c 'pip install pip-tools==7.* && pip-compile --allow-unsafe --strip-extras --upgrade --output-file=Dockerfile.requirements.txt Dockerfile.requirements.in'
45 |
46 | - name: git diff
47 | run: git --no-pager diff --color=always
48 |
49 | # ref: https://github.com/peter-evans/create-pull-request
50 | - name: Create a PR
51 | uses: peter-evans/create-pull-request@v7
52 | with:
53 | token: "${{ secrets.dask_bot_token }}"
54 | author: Dask Bot Account <65357765+dask-bot@users.noreply.github.com>
55 | committer: Dask Bot Account <65357765+dask-bot@users.noreply.github.com>
56 | branch: update-image-${{ matrix.image }}
57 | labels: dependencies
58 | commit-message: Refreeze ${{ matrix.image}}/Dockerfile.requirements.txt
59 | title: Refreeze ${{ matrix.image}}/Dockerfile.requirements.txt
60 | body: >-
61 | ${{ matrix.image}}/Dockerfile.requirements.txt has been refrozen
62 | based on ${{ matrix.image}}/Dockerfile.requirements.in.
63 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # pre-commit is a tool to perform a predefined set of tasks manually and/or
2 | # automatically before git commits are made.
3 | #
4 | # Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level
5 | #
6 | # Common tasks
7 | #
8 | # - Run on all files: pre-commit run --all-files
9 | # - Register git hooks: pre-commit install --install-hooks
10 | #
11 | # About pre-commit.ci
12 | #
13 | # pre-commit.ci is a service that is enabled for this repo via
14 | # https://github.com/organizations/dask/settings/installations to do the
15 | # following:
16 | #
17 | # 1. Automatically keep the pinned versions in this file updated by opening PRs.
18 | # 2. Automatically run a pre-commit test like a GitHub workflow also could do.
19 | # 3. Automatically add a commit with autoformatting changes to PRs if they have
20 | # forgot to run configured autoformatters.
21 | #
22 | repos:
23 | # Autoformat: Python code, syntax patterns are modernized
24 | - repo: https://github.com/asottile/pyupgrade
25 | rev: v3.19.1
26 | hooks:
27 | - id: pyupgrade
28 | args:
29 | - --py310-plus
30 |
31 | # Autoformat: Python code
32 | - repo: https://github.com/PyCQA/isort
33 | rev: "6.0.1"
34 | hooks:
35 | - id: isort
36 |
37 | # Autoformat: Python code
38 | - repo: https://github.com/psf/black
39 | rev: "25.1.0"
40 | hooks:
41 | - id: black
42 |
43 | # Autoformat: general small fixes
44 | - repo: https://github.com/pre-commit/pre-commit-hooks
45 | rev: v5.0.0
46 | hooks:
47 | - id: end-of-file-fixer
48 | exclude_types: ["svg"]
49 | - id: trailing-whitespace
50 |
51 | # Lint: Python code
52 | - repo: https://github.com/PyCQA/flake8
53 | rev: "7.2.0"
54 | hooks:
55 | - id: flake8
56 |
57 | # pre-commit.ci config reference: https://pre-commit.ci/#configuration
58 | ci:
59 | autoupdate_schedule: monthly
60 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Dask is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more.
2 |
3 | Please see https://gateway.dask.org/develop.html for more information.
4 |
5 | Also for general information on how to contribute to Dask projects see https://docs.dask.org/en/latest/develop.html.
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019, Jim Crist-Harif
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | dask-gateway
2 | ============
3 |
4 | |github-actions-tests| |github-actions-docs| |pypi-dask-gateway| |pypi-dask-gateway-server| |conda-dask-gateway| |conda-dask-gateway-server|
5 |
6 | A multi-tenant server for securely deploying and managing Dask clusters. See
7 | `the documentation `__ for more information.
8 |
9 | LICENSE
10 | -------
11 |
12 | New BSD. See the `License File
13 | `_.
14 |
15 | .. |github-actions-tests| image:: https://github.com/dask/dask-gateway/actions/workflows/test.yaml/badge.svg
16 | :target: https://github.com/dask/dask-gateway/actions/workflows/test.yaml
17 | .. |github-actions-docs| image:: https://github.com/dask/dask-gateway/actions/workflows/build-publish-docs.yaml/badge.svg
18 | :target: https://gateway.dask.org/
19 | .. |pypi-dask-gateway| image:: https://img.shields.io/pypi/v/dask-gateway.svg?label=dask-gateway
20 | :target: https://pypi.org/project/dask-gateway/
21 | .. |pypi-dask-gateway-server| image:: https://img.shields.io/pypi/v/dask-gateway-server.svg?label=dask-gateway-server
22 | :target: https://pypi.org/project/dask-gateway-server/
23 | .. |conda-dask-gateway| image:: https://img.shields.io/conda/v/conda-forge/dask-gateway?color=blue&label=dask-gateway
24 | :target: https://anaconda.org/conda-forge/dask-gateway
25 | .. |conda-dask-gateway-server| image:: https://img.shields.io/conda/v/conda-forge/dask-gateway-server?color=blue&label=dask-gateway-server
26 | :target: https://anaconda.org/conda-forge/dask-gateway-server
27 |
--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
1 | # How to make a release
2 |
3 | `dask-gateway` and `dask-gateway-server` are packages available on [PyPI] and
4 | [conda-forge], and `dask-gateway` is a Helm chart available at [helm.dask.org]
5 | which is both a user facing website and a Helm chart repository by having an
6 | [index.yaml] file read by `helm` the CLI linking to packaged Helm charts.
7 |
8 | These are instructions on how to make a release.
9 |
10 | ## Pre-requisites
11 |
12 | - Push rights to [dask/dask-gateway]
13 | - Push rights to [conda-forge/dask-gateway-feedstock]
14 |
15 | ## Steps to make a release
16 |
17 | 1. Refreeze Dockerfile.requirements.txt files by running the [refreeze workflow]
18 | and merging created PRs.
19 |
20 | [refreeze workflow]: https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml
21 |
22 | 1. Create a PR updating `docs/source/changelog.md` with [github-activity] and
23 | continue only when its merged.
24 |
25 | ```shell
26 | pip install github-activity
27 |
28 | github-activity --heading-level=2 dask/dask-gateway
29 | ```
30 |
31 | - Visit and label all uncategorized PRs appropriately with: `maintenance`,
32 | `enhancement`, `new`, `breaking`, `bug`, or `documentation`.
33 | - Generate a list of PRs again and add it to the changelog
34 | - Highlight breaking changes
35 | - Summarize the release changes
36 |
37 | 2. Checkout main and make sure it is up to date.
38 |
39 | ```shell
40 | git checkout main
41 | git fetch origin main
42 | git reset --hard origin/main
43 | git clean -xfd
44 | ```
45 |
46 | 3. Update the version, make commits, and push a git tag with `tbump`.
47 |
48 | ```shell
49 | pip install tbump
50 | tbump --dry-run ${VERSION}
51 |
52 | tbump ${VERSION}
53 | ```
54 |
55 | Following this, the [CI system] will build and publish the PyPI packages and
56 | Helm chart.
57 |
58 | 4. Following the release to PyPI, an automated PR should arrive to
59 | [conda-forge/dask-gateway-feedstock] with instructions.
60 |
61 | [pypi]: https://pypi.org/project/dask-gateway/
62 | [conda-forge]: https://anaconda.org/conda-forge/dask-gateway
63 | [helm.dask.org]: https://helm.dask.org/
64 | [index.yaml]: https://helm.dask.org/index.yaml
65 | [dask/dask-gateway]: https://github.com/dask/dask-gateway
66 | [conda-forge/dask-gateway-feedstock]: https://github.com/conda-forge/dask-gateway-feedstock
67 | [github-activity]: https://github.com/executablebooks/github-activity
68 | [ci system]: https://github.com/dask/dask-gateway/actions
69 |
--------------------------------------------------------------------------------
/continuous_integration/docker/base/Dockerfile:
--------------------------------------------------------------------------------
1 | # See continuous_integration/docker/README.md for details about this and other
2 | # Dockerfiles under the continuous_integration/docker folder on their purpose
3 | # and how to work with them.
4 | #
5 | # centos:8 reached end-of-life 31 Dec 2021
6 | # centos:7 reach end-of-life 30 Jun 2024
7 | #
8 | FROM centos:7
9 |
10 | ARG python_version="3.11"
11 | # go_version was 1.19 until it was updated to 1.23.6 (2025-02-07) by adding a
12 | # layer on top of the previous image, as it is no longer able to build and it
13 | # was an easy way to update the golang version.
14 | ARG go_version="1.23.6"
15 |
16 | # Set labels based on the Open Containers Initiative (OCI):
17 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
18 | #
19 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway"
20 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/base/Dockerfile"
21 |
22 | # Configure yum to error on missing packages
23 | RUN echo "skip_missing_names_on_install=False" >> /etc/yum.conf
24 |
25 | # Install common yum packages
26 | RUN yum install -y \
27 | sudo \
28 | # sudo is used to run commands as various other users
29 | git \
30 | # git is a requirement for golang to fetch dependencies during
31 | # compilation of the golang code we have in
32 | # dask-gateway-server/dask-gateway-proxy.
33 | && yum clean all \
34 | && rm -rf /var/cache/yum
35 |
36 | # Install python and the following utilities:
37 | #
38 | # - tini: can wrap an container entrypoint to avoid misc issues, see
39 | # https://github.com/krallin/tini#readme
40 | # - psutil: provides misc tools of relevance for debugging, see
41 | # https://psutil.readthedocs.io/en/latest/#about
42 | #
43 | # NOTE: micromamba is a slimmed mamba/conda executable functioning without a
44 | # pre-installed Python environment we use to install a Python version of
45 | # choice to not first need to install a full Python environment to then
46 | # install another Python environment.
47 | #
48 | # See https://github.com/mamba-org/mamba#micromamba.
49 | #
50 | RUN yum install -y bzip2 \
51 | \
52 | && curl -sL https://micromamba.snakepit.net/api/micromamba/linux-64/latest \
53 | | tar --extract --verbose --bzip2 bin/micromamba --strip-components=1 \
54 | && ./micromamba install \
55 | --channel=conda-forge \
56 | --root-prefix="/opt/python" \
57 | --prefix="/opt/python" \
58 | python="${python_version}" \
59 | mamba \
60 | psutil \
61 | tini \
62 | && rm ./micromamba \
63 | && /opt/python/bin/mamba clean -af \
64 | && find /opt/python/ -type f -name '*.a' -delete \
65 | && find /opt/python/ -type f -name '*.pyc' -delete \
66 | \
67 | && yum remove -y bzip2 \
68 | && yum clean all \
69 | && rm -rf /var/cache/yum
70 |
71 | # Install go
72 | RUN curl -sL https://go.dev/dl/go${go_version}.linux-amd64.tar.gz \
73 | | tar --extract --verbose --gzip --directory=/opt/
74 |
75 | # Put Python and Go environments on PATH
76 | #
77 | # NOTE: This PATH environment will be preserved if sudo is used to switch to
78 | # other users thanks to changes to /etc/sudoers.d/preserve_path.
79 | #
80 | ENV PATH=/opt/python/bin:/opt/go/bin:$PATH
81 | COPY ./files/etc /etc/
82 |
83 | # Make a few user accounts and a user group for later use
84 | RUN useradd --create-home dask \
85 | && useradd --create-home alice \
86 | && useradd --create-home bob \
87 | && groupadd dask_users \
88 | && usermod --append --groups dask_users alice \
89 | && usermod --append --groups dask_users bob
90 |
--------------------------------------------------------------------------------
/continuous_integration/docker/base/files/etc/sudoers.d/preserve_path:
--------------------------------------------------------------------------------
1 | # This config ensures that the PATH environment variable this only-for-testing
2 | # container is started with is preserved when changing to other users with sudo.
3 | #
4 | # NOTES:
5 | #
6 | # - `sudo` is used to execute commands as other users. What then happens to the
7 | # environment will be determined by configuration in /etc/sudoers and
8 | # /etc/sudoers.d/* as well as flags we pass to the sudo command. The behavior
9 | # can be inspected with `sudo -V` run as root.
10 | #
11 | # ref: `man sudo` https://linux.die.net/man/8/sudo
12 | # ref: `man sudoers` https://www.sudo.ws/man/1.8.15/sudoers.man.html
13 | #
14 | # - We disable the `secure_path` which is set by default in /etc/sudoers as it
15 | # would override the PATH variable.
16 | Defaults !secure_path
17 | #
18 | # - We can use the `-E` or `--preserve-env` flag to pass through most
19 | # environment variables, but understand that exceptions are caused by the
20 | # sudoers configuration: `env_delete`, `env_check`, and `secure_path`.
21 | #
22 | # - We reduce the `env_delete` list of default variables to be deleted. It has
23 | # higher priority than the `--preserve-env` flag and `env_keep` configuration.
24 | Defaults env_delete -= "PATH"
25 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/_install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 |
6 | # FIXME: pip should be installed to a modern version in the base image instead
7 | # of being upgraded here. It isn't because of
8 | # https://github.com/dask/dask-gateway/issues/837.
9 | pip install "pip==24.*"
10 |
11 | # pykerberos needs to compile c++ code that depends on system libraries, by
12 | # installing it from conda-forge, we avoid such hassle.
13 | #
14 | mamba install -c conda-forge pykerberos
15 |
16 | # This installs everything else we need for tests
17 | pip install -r tests/requirements.txt
18 |
19 | pip list
20 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/_print_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Bold high intensity green
4 | G='\033[1;92m'
5 | # No color
6 | NC='\033[0m'
7 |
8 | printf "\n${G}supervisorctl status${NC}\n"
9 | supervisorctl status
10 |
11 | printf "\n${G}cat /var/log/supervisor/krb5kdc.log${NC}\n"
12 | cat /var/log/supervisor/krb5kdc.log
13 | printf "\n${G}cat /var/log/supervisor/kadmind.log${NC}\n"
14 | cat /var/log/supervisor/kadmind.log
15 | printf "\n${G}cat /var/log/supervisor/yarn-nodemanager.log${NC}\n"
16 | cat /var/log/supervisor/yarn-nodemanager.log
17 | printf "\n${G}cat /var/log/supervisor/yarn-resourcemanager.log${NC}\n"
18 | cat /var/log/supervisor/yarn-resourcemanager.log
19 | printf "\n${G}cat /var/log/supervisor/hdfs-namenode.log${NC}\n"
20 | cat /var/log/supervisor/hdfs-namenode.log
21 | printf "\n${G}cat /var/log/supervisor/hdfs-datanode.log${NC}\n"
22 | cat /var/log/supervisor/hdfs-datanode.log
23 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 | pytest -v \
6 | tests/test_yarn_backend.py \
7 | tests/test_auth.py \
8 | -k "yarn or kerberos"
9 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/capacity-scheduler.xml:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 | yarn.scheduler.capacity.root.queues
13 | default,fruit
14 |
15 |
16 |
17 | yarn.scheduler.capacity.root.fruit.queues
18 | apples,bananas,oranges
19 |
20 |
21 |
22 | yarn.scheduler.capacity.maximum-am-resource-percent
23 | 0.75
24 |
25 |
26 |
27 |
28 | yarn.scheduler.capacity.root.default.capacity
29 | 60.0
30 |
31 |
32 |
33 | yarn.scheduler.capacity.root.default.maximum-capacity
34 | 100.0
35 |
36 |
37 |
38 |
39 | yarn.scheduler.capacity.root.fruit.capacity
40 | 40.0
41 |
42 |
43 |
44 | yarn.scheduler.capacity.root.fruit.maximum-capacity
45 | 50.0
46 |
47 |
48 |
49 |
50 | yarn.scheduler.capacity.root.fruit.apples.capacity
51 | 50.0
52 |
53 |
54 |
55 | yarn.scheduler.capacity.root.fruit.apples.maximum-capacity
56 | 100.0
57 |
58 |
59 |
60 |
61 | yarn.scheduler.capacity.root.fruit.bananas.capacity
62 | 25.0
63 |
64 |
65 |
66 | yarn.scheduler.capacity.root.fruit.bananas.maximum-capacity
67 | 50.0
68 |
69 |
70 |
71 |
72 | yarn.scheduler.capacity.root.fruit.oranges.capacity
73 | 25.0
74 |
75 |
76 |
77 | yarn.scheduler.capacity.root.fruit.oranges.maximum-capacity
78 | 60.0
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/container-executor.cfg:
--------------------------------------------------------------------------------
1 | # Some configuration of yarn.nodemanager is duplicated to yarn-site.xml, as
2 | # documented it needs to be:
3 | # https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/SecureContainer.html#Configuration
4 | #
5 | yarn.nodemanager.delete.debug-delay-sec=3600
6 | yarn.nodemanager.local-dirs=/var/tmp/hadoop-yarn/local
7 | yarn.nodemanager.log-dirs=/var/tmp/hadoop-yarn/log
8 | yarn.nodemanager.linux-container-executor.group=yarn
9 |
10 | banned.users=hdfs,yarn,mapred,bin
11 | min.user.id=1000
12 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop.tmp.dir
5 | /var/tmp/
6 |
7 |
8 |
9 | fs.defaultFS
10 | hdfs://master.example.com:9000
11 |
12 |
13 |
14 | hadoop.proxyuser.dask.hosts
15 | *
16 |
17 |
18 |
19 | hadoop.proxyuser.dask.users
20 | *
21 |
22 |
23 |
24 | hadoop.security.authentication
25 | kerberos
26 |
27 |
28 |
29 | hadoop.security.authorization
30 | true
31 |
32 |
33 |
37 |
38 | hadoop.http.filter.initializers
39 | org.apache.hadoop.security.AuthenticationFilterInitializer
40 |
41 |
42 |
43 | hadoop.http.authentication.type
44 | simple
45 |
46 |
47 |
48 | hadoop.http.authentication.signature.secret.file
49 | /opt/hadoop/etc/hadoop/http-secret-file
50 |
51 |
52 |
53 | hadoop.http.authentication.cookie.domain
54 | .example.com
55 |
56 |
57 |
58 | hadoop.http.authentication.simple.anonymous.allowed
59 | true
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | dfs.replication
6 | 1
7 |
8 |
9 |
10 | dfs.permissions.enabled
11 | true
12 |
13 |
14 |
15 | dfs.webhdfs.enabled
16 | true
17 |
18 |
19 |
20 | dfs.block.access.token.enable
21 | true
22 |
23 |
24 |
25 |
26 | dfs.namenode.keytab.file
27 | /opt/hadoop/etc/hadoop/master-keytabs/hdfs.keytab
28 |
29 |
30 |
31 | dfs.namenode.kerberos.principal
32 | hdfs/master.example.com@EXAMPLE.COM
33 |
34 |
35 |
36 | dfs.namenode.kerberos.internal.spnego.principal
37 | HTTP/master.example.com@EXAMPLE.COM
38 |
39 |
40 |
41 | dfs.datanode.keytab.file
42 | /opt/hadoop/etc/hadoop/master-keytabs/hdfs.keytab
43 |
44 |
45 |
46 | dfs.datanode.kerberos.principal
47 | hdfs/master.example.com@EXAMPLE.COM
48 |
49 |
50 |
51 | dfs.web.authentication.kerberos.principal
52 | HTTP/master.example.com@EXAMPLE.COM
53 |
54 |
55 |
56 | dfs.web.authentication.kerberos.keytab
57 | /opt/hadoop/etc/hadoop/master-keytabs/HTTP.keytab
58 |
59 |
60 |
61 |
62 | ignore.secure.ports.for.testing
63 | true
64 |
65 |
66 |
67 | dfs.http.policy
68 | HTTP_ONLY
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.kerberos/yarn-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | yarn.resourcemanager.hostname
6 | master.example.com
7 |
8 |
9 |
14 |
15 | yarn.application.classpath
16 |
17 | $HADOOP_CONF_DIR,
18 | $HADOOP_COMMON_HOME/share/hadoop/common/*,
19 | $HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
20 | $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
21 | $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
22 | $HADOOP_YARN_HOME/share/hadoop/yarn/*,
23 | $HADOOP_YARN_HOME/share/hadoop/yarn/lib/*
24 |
25 |
26 |
27 |
28 |
29 | yarn.nodemanager.local-dirs
30 | file:///var/tmp/hadoop-yarn/local
31 |
32 |
33 |
34 | yarn.nodemanager.log-dirs
35 | file:///var/tmp/hadoop-yarn/log
36 |
37 |
38 |
39 | yarn.log-aggregation-enable
40 | true
41 |
42 |
43 |
44 | yarn.nodemanager.remote-app-log-dir
45 | hdfs://master.example.com:9000/var/log/hadoop-yarn/apps
46 |
47 |
48 |
49 |
50 | yarn.resourcemanager.scheduler.class
51 | org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
52 |
53 |
54 |
55 | yarn.scheduler.minimum-allocation-mb
56 | 32
57 |
58 |
59 |
60 | yarn.resource-types.memory-mb.increment-allocation
61 | ${yarn.scheduler.minimum-allocation-mb}
62 |
63 |
64 |
65 | yarn.scheduler.increment-allocation-mb
66 | ${yarn.scheduler.minimum-allocation-mb}
67 |
68 |
69 |
70 |
71 | yarn.resourcemanager.keytab
72 | /opt/hadoop/etc/hadoop/master-keytabs/yarn.keytab
73 |
74 |
75 |
76 | yarn.resourcemanager.principal
77 | yarn/master.example.com@EXAMPLE.COM
78 |
79 |
80 |
81 | yarn.nodemanager.keytab
82 | /opt/hadoop/etc/hadoop/master-keytabs/yarn.keytab
83 |
84 |
85 |
86 | yarn.nodemanager.principal
87 | yarn/master.example.com@EXAMPLE.COM
88 |
89 |
90 |
91 |
92 | yarn.nodemanager.container-executor.class
93 | org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor
94 |
95 |
96 |
97 | yarn.nodemanager.linux-container-executor.path
98 | /opt/hadoop/bin/container-executor
99 |
100 |
101 |
102 | yarn.nodemanager.linux-container-executor.group
103 | yarn
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.simple/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | hadoop.tmp.dir
5 | /var/tmp/
6 |
7 |
8 |
9 | fs.defaultFS
10 | hdfs://master.example.com:9000
11 |
12 |
13 |
14 | hadoop.security.authentication
15 | simple
16 |
17 |
18 |
19 | hadoop.security.authorization
20 | true
21 |
22 |
23 |
24 | hadoop.http.filter.initializers
25 | org.apache.hadoop.security.AuthenticationFilterInitializer
26 |
27 |
28 |
29 | hadoop.http.authentication.type
30 | simple
31 |
32 |
33 |
34 | hadoop.http.authentication.signature.secret.file
35 | /opt/hadoop/etc/hadoop/http-secret-file
36 |
37 |
38 |
39 | hadoop.http.authentication.cookie.domain
40 | .example.com
41 |
42 |
43 |
44 | hadoop.http.authentication.simple.anonymous.allowed
45 | false
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/hadoop/conf.simple/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | dfs.replication
5 | 1
6 |
7 |
8 |
9 | dfs.permissions.enabled
10 | true
11 |
12 |
13 |
14 | dfs.webhdfs.enabled
15 | true
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/krb5.conf:
--------------------------------------------------------------------------------
1 | # krb5.conf is a configuration for Kerberos. supervisord is configured to start
2 | # a Kerberos Key Distribution Center (KDC) influenced by this configuration.
3 | #
4 | # krb5.conf reference:
5 | # https://web.mit.edu/kerberos/krb5-1.19/doc/admin/conf_files/krb5_conf.html
6 | #
7 | [logging]
8 | default = FILE:/var/log/supervisor/krb5libs.log
9 | kdc = FILE:/var/log/supervisor/krb5kdc.log
10 | admin_server = FILE:/var/log/supervisor/kadmind.log
11 |
12 | [libdefaults]
13 | default_realm = EXAMPLE.COM
14 | dns_lookup_realm = false
15 | dns_lookup_kdc = false
16 | ticket_lifetime = 24h
17 | forwardable = true
18 |
19 | [realms]
20 | EXAMPLE.COM = {
21 | kdc = master.example.com
22 | admin_server = master.example.com
23 | }
24 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.conf:
--------------------------------------------------------------------------------
1 | # supervisord starts other "programs" declared in the additional configuration
2 | # files found in the /etc/supervisor.d folder.
3 | #
4 | # supervisord configuration reference:
5 | # http://supervisord.org/configuration.html#configuration-file
6 | #
7 | [supervisord]
8 | strip_ansi = true
9 | nodaemon = true
10 | logfile = /var/log/supervisord.log
11 | pidfile = /var/run/supervisord.pid
12 |
13 | [unix_http_server]
14 | file = /tmp/supervisor.sock
15 |
16 | [rpcinterface:supervisor]
17 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
18 |
19 | [supervisorctl]
20 | serverurl = unix:///tmp/supervisor.sock
21 | prompt = hadoop
22 |
23 | [include]
24 | files = /etc/supervisord.d/*.conf
25 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.d/hdfs-datanode.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | [program:hdfs-datanode]
5 | user = hdfs
6 | command = hdfs datanode
7 | stdout_logfile = /var/log/supervisor/hdfs-datanode.log
8 | redirect_stderr = true
9 | autostart = true
10 | autorestart = false
11 | startsecs = 3
12 | stopwaitsecs = 10
13 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.d/hdfs-namenode.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | [program:hdfs-namenode]
5 | user = hdfs
6 | command = hdfs namenode
7 | stdout_logfile = /var/log/supervisor/hdfs-namenode.log
8 | redirect_stderr = true
9 | autostart = true
10 | autorestart = false
11 | startsecs = 3
12 | stopwaitsecs = 10
13 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.d/kerberos.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | # krb5kdc or kadmind aren't emitting logs to stdout but writing logs directly to
5 | # files as configured in /etc/krb5.conf
6 | #
7 | [program:krb5kdc]
8 | user = root
9 | command = /usr/sbin/krb5kdc -r EXAMPLE.COM -P /var/run/krb5kdc.pid -n
10 | stdout_logfile = /dev/stdout
11 | stdout_logfile_maxbytes = 0
12 | autostart = true
13 | autorestart = true
14 |
15 | [program:kadmind]
16 | user = root
17 | command = /usr/sbin/kadmind -r EXAMPLE.COM -P /var/run/kadmind.pid -nofork
18 | stdout_logfile = /dev/stdout
19 | stdout_logfile_maxbytes = 0
20 | autostart = true
21 | autorestart = true
22 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.d/yarn-nodemanager.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | [program:yarn-nodemanager]
5 | user = yarn
6 | command = yarn nodemanager
7 | stdout_logfile = /var/log/supervisor/yarn-nodemanager.log
8 | redirect_stderr = true
9 | autostart = true
10 | autorestart = false
11 | startsecs = 3
12 | stopwaitsecs = 10
13 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/etc/supervisord.d/yarn-resourcemanager.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | [program:yarn-resourcemanager]
5 | user = yarn
6 | command = yarn resourcemanager
7 | stdout_logfile = /var/log/supervisor/yarn-resourcemanager.log
8 | redirect_stderr = true
9 | autostart = true
10 | autorestart = false
11 | startsecs = 3
12 | stopwaitsecs = 10
13 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/scripts/init-hdfs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -x
3 |
4 | # Exponential backoff on testing hdfs status, then run init script
5 | echo "Waiting to connect to HDFS"
6 | timeout=2
7 | exit_code=0
8 | for attempt in {1..5}; do
9 | hdfs dfs -ls /
10 | exit_code=$?
11 |
12 | if [[ $exit_code == 0 ]]; then
13 | break
14 | fi
15 |
16 | echo "Retrying in $timeout.." 1>&2
17 | sleep $timeout
18 | timeout=$[$timeout * 2]
19 | done
20 |
21 | if [[ $exit_code != 0 ]]; then
22 | echo "Failed to connect to HDFS"
23 | exit $exit_code
24 | fi
25 | echo "HDFS connected, initializing directory structure"
26 |
27 | hdfs dfs -mkdir -p /tmp \
28 | && hdfs dfs -chmod -R 1777 /tmp \
29 | && hdfs dfs -mkdir -p /var/log \
30 | && hdfs dfs -chmod -R 1775 /var/log \
31 | && hdfs dfs -chown yarn:hadoop /var/log \
32 | && hdfs dfs -mkdir -p /tmp/hadoop-yarn \
33 | && hdfs dfs -chown -R mapred:hadoop /tmp/hadoop-yarn \
34 | && hdfs dfs -mkdir -p /tmp/hadoop-yarn/staging/history/done_intermediate \
35 | && hdfs dfs -chown -R mapred:hadoop /tmp/hadoop-yarn/staging \
36 | && hdfs dfs -chmod -R 1777 /tmp \
37 | && hdfs dfs -mkdir -p /var/log/hadoop-yarn/apps \
38 | && hdfs dfs -chmod -R 1777 /var/log/hadoop-yarn/apps \
39 | && hdfs dfs -chown yarn:hadoop /var/log/hadoop-yarn/apps \
40 | && hdfs dfs -mkdir -p /user \
41 | && hdfs dfs -mkdir -p /user/root \
42 | && hdfs dfs -chmod -R 777 /user/root \
43 | && hdfs dfs -chown root /user/root \
44 | && hdfs dfs -mkdir -p /user/history \
45 | && hdfs dfs -chmod -R 1777 /user/history \
46 | && hdfs dfs -chown mapred:hadoop /user/history \
47 | && hdfs dfs -mkdir -p /user/dask \
48 | && hdfs dfs -chown dask /user/dask \
49 | && hdfs dfs -mkdir -p /user/alice \
50 | && hdfs dfs -chown alice /user/alice \
51 | && hdfs dfs -mkdir -p /user/bob \
52 | && hdfs dfs -chown bob /user/bob
53 |
54 | exit_code=$?
55 | if [[ $exit_code != 0 ]]; then
56 | echo "Failed to initialize HDFS"
57 | exit $exit_code
58 | fi
59 | echo "Initialized HDFS"
60 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/scripts/setup-hadoop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ex
3 |
4 | # Tweak hadoop configuration and permissions:
5 | #
6 | # - hadoop is unpacked with default configuration in etc/hadoop, we relocate
7 | # that to /etc/hadoop/conf.empty.
8 | #
9 | mv /opt/hadoop/etc/hadoop /etc/hadoop/conf.empty
10 | #
11 | # - log4j.properties is a requirement to have in the hadoop configuration
12 | # directory that we don't wan't to redefine, so we copy it from the default
13 | # configuration to our configurations.
14 | #
15 | cp /etc/hadoop/conf.empty/log4j.properties /etc/hadoop/conf.simple/
16 | cp /etc/hadoop/conf.empty/log4j.properties /etc/hadoop/conf.kerberos/
17 | #
18 | # - Create /opt/hadoop/logs directory with high group permissions to ensure it
19 | # isn't created with narrow permissions later when running "hdfs namenode".
20 | #
21 | mkdir -p /opt/hadoop/logs
22 | chmod g+w /opt/hadoop/logs
23 | #
24 | # - Create /var/tmp directory with permissions to ensure the hadoop group is
25 | # propegated and have right to create new directories. Note that the hdfs user
26 | # will later create /var/tmp/dfs but then get to own it even though it will be
27 | # owned also by the hadoop group due to the 2xxx part of these permissions.
28 | #
29 | chown -R root:hadoop /var/tmp
30 | chmod -R 2770 /var/tmp
31 | #
32 | # - Generate a key to authenticate web access during the brief time we use the
33 | # /etc/hadoop/conf.simple configuration as part of building the docker image.
34 | #
35 | dd if=/dev/urandom bs=64 count=1 > /etc/hadoop/conf.simple/http-secret-file
36 | chown root:hadoop /etc/hadoop/conf.simple/http-secret-file
37 | chmod 440 /etc/hadoop/conf.simple/http-secret-file
38 | #
39 | # - Declare HDFS configuration to use temporarily, let /opt/hadoop/etc/hadoop
40 | # point to /etc/hadoop/conf.simple.
41 | #
42 | alternatives --install /opt/hadoop/etc/hadoop hadoop-conf /etc/hadoop/conf.simple 50
43 | alternatives --set hadoop-conf /etc/hadoop/conf.simple
44 |
45 |
46 |
47 |
48 | # Initialize HDFS filesystem with content to test against
49 | #
50 | # 1. Delete all hdfs files and start with a clean slate.
51 | #
52 | sudo --preserve-env --user hdfs \
53 | hdfs namenode -format -force
54 | #
55 | # 2. Add to hosts to resolve a domain name, /etc/hosts will be cleared when the
56 | # container starts though, see https://stackoverflow.com/a/25613983. This
57 | # container is supposed to start with "--hostname master.example.com".
58 | #
59 | echo "127.0.0.1 master.example.com" >> /etc/hosts
60 | #
61 | # 3. Start "hdfs namenode" and "hdfs datanode" but detach with "&" to continue
62 | # doing other things.
63 | #
64 | sudo --preserve-env --user hdfs \
65 | hdfs namenode &
66 | sudo --preserve-env --user hdfs \
67 | hdfs datanode &
68 | #
69 | # 4. Run a script to bootstrap the HDFS filesystem with content for testing.
70 | #
71 | sudo --preserve-env --user hdfs \
72 | /scripts/init-hdfs.sh
73 | #
74 | # 5. Shut down started "hdfs namenode" and "hdfs datanode" processes.
75 | #
76 | pkill java
77 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/var/kerberos/krb5kdc/kadm5.acl:
--------------------------------------------------------------------------------
1 | root/admin@EXAMPLE.COM ex
2 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/files/var/kerberos/krb5kdc/kdc.conf:
--------------------------------------------------------------------------------
1 | [kdcdefaults]
2 | kdc_ports = 88
3 | kdc_tcp_ports = 88
4 |
5 | [realms]
6 | EXAMPLE.COM = {
7 | acl_file = /var/kerberos/krb5kdc/kadm5.acl
8 | dict_file = /usr/share/dict/words
9 | admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab
10 | supported_enctypes = des3-hmac-sha1:normal arcfour-hmac:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal
11 | }
12 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_install.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/print_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_print_logs.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | ci_docker_hadoop="$(dirname "${BASH_SOURCE[0]}")"
5 | full_path_ci_docker_hadoop="$(cd "${ci_docker_hadoop}" && pwd)"
6 | git_root="$(cd "${full_path_ci_docker_hadoop}/../../.." && pwd)"
7 |
8 | docker run --rm -d \
9 | --name hadoop \
10 | -h master.example.com \
11 | -v "$git_root":/working \
12 | -p 8000:8000 \
13 | -p 8786:8786 \
14 | -p 8088:8088 \
15 | ghcr.io/dask/dask-gateway-ci-hadoop
16 |
17 | # The hadoop container's systemd process emits logs about the progress of
18 | # starting up declared services that we will await.
19 | #
20 | # We do it to avoid getting OOMKilled by peaking memory needs during startup,
21 | # which is prone to happen if we run pip install at the same time.
22 | #
23 | # Practically, we await "entered RUNNING state" to be observed exactly 6 times,
24 | # which represents our 6 systemd services.
25 | #
26 | # INFO success: kadmind entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
27 | # INFO success: krb5kdc entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
28 | # INFO success: hdfs-namenode entered RUNNING state, process has stayed up for > than 3 seconds (startsecs)
29 | # INFO success: hdfs-datanode entered RUNNING state, process has stayed up for > than 3 seconds (startsecs)
30 | # INFO success: yarn-resourcemanager entered RUNNING state, process has stayed up for > than 3 seconds (startsecs)
31 | # INFO success: yarn-nodemanager entered RUNNING state, process has stayed up for > than 3 seconds (startsecs)
32 | #
33 | set +x
34 | await_startup() {
35 | i=0; while [ $i -ne 30 ]; do
36 | docker logs hadoop | grep "entered RUNNING state" | wc -l 2>/dev/null | grep --silent "6" \
37 | && start_script_finishing=true && break \
38 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for hadoop container startup ($i seconds)"
39 | done
40 | if [ "$start_script_finishing" != "true" ]; then
41 | echo "WARNING: /script/start.sh was slow to finish!"
42 | exit 1
43 | fi
44 |
45 | echo "hadoop container started!"
46 | }
47 | await_startup
48 |
--------------------------------------------------------------------------------
/continuous_integration/docker/hadoop/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec hadoop /working/continuous_integration/docker/hadoop/_test.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/Dockerfile:
--------------------------------------------------------------------------------
1 | # See continuous_integration/docker/README.md for details about this and other
2 | # Dockerfiles under the continuous_integration/docker folder on their purpose
3 | # and how to work with them.
4 | #
5 | FROM ghcr.io/dask/dask-gateway-ci-base:latest
6 |
7 | # Set labels based on the Open Containers Initiative (OCI):
8 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
9 | #
10 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/pbs/Dockerfile"
11 |
12 | # Notify dask-gateway tests that PBS is available
13 | ENV TEST_DASK_GATEWAY_PBS true
14 |
15 | # Install openpbs
16 | #
17 | # 1. Download and install .rpm
18 | #
19 | # OpenPBS versions: https://github.com/openpbs/openpbs/releases
20 | #
21 | # We use an old version because there isn't a modern one pre-built for
22 | # centos:7 as used in the base image. The old version was called propbs, so
23 | # there is a change needed in the download url related to that if switching
24 | # to a newwer version.
25 | #
26 | RUN INSTALL_OPENPBS_VERSION=19.1.3 \
27 | && yum install -y unzip \
28 | \
29 | && curl -sL -o /tmp/openpbs.zip https://github.com/openpbs/openpbs/releases/download/v${INSTALL_OPENPBS_VERSION}/pbspro_${INSTALL_OPENPBS_VERSION}.centos_7.zip \
30 | && unzip /tmp/openpbs.zip -d /opt/openpbs \
31 | && rm /tmp/openpbs.zip \
32 | && yum install -y \
33 | /opt/openpbs/*pbs*/*-server-*.rpm \
34 | \
35 | && yum remove -y unzip \
36 | && yum clean all \
37 | && rm -rf /var/cache/yum
38 | #
39 | # 2. Update PATH environment variable
40 | #
41 | # Note that this PATH environment will be preserved when sudo is used to
42 | # switch to other users thanks to changes to /etc/sudoers.d/preserve_path,
43 | # which is configured in the base Dockerfile.
44 | #
45 | ENV PATH=/opt/pbs/bin:$PATH
46 |
47 | # Copy over files
48 | COPY ./files /
49 |
50 | ENTRYPOINT ["/opt/python/bin/tini", "-g", "--"]
51 | CMD ["/scripts/start.sh"]
52 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/_install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 |
6 | # FIXME: pip should be installed to a modern version in the base image instead
7 | # of being upgraded here. It isn't because of
8 | # https://github.com/dask/dask-gateway/issues/837.
9 | pip install "pip==24.*"
10 |
11 | # This installs everything we need for tests
12 | pip install -r tests/requirements.txt
13 |
14 | pip list
15 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 | pytest -v tests/test_pbs_backend.py
6 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/files/etc/sudoers.d/dask:
--------------------------------------------------------------------------------
1 | Cmnd_Alias DASK_GATEWAY_JOBQUEUE_LAUNCHER = /opt/python/bin/dask-gateway-jobqueue-launcher
2 |
3 | %dask_users ALL=(dask) /usr/bin/sudo
4 | dask ALL=(%dask_users) NOPASSWD:DASK_GATEWAY_JOBQUEUE_LAUNCHER
5 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/files/scripts/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -ex
3 |
4 | PBS_CONF_FILE=/etc/pbs.conf
5 | MOM_CONF_FILE=/var/spool/pbs/mom_priv/config
6 | HOSTNAME=$(hostname)
7 |
8 | # Configure PBS to run all on one node
9 | #
10 | # Configuration references:
11 | # - https://github.com/openpbs/openpbs/blob/master/doc/man8/pbs.conf.8B
12 | # - https://github.com/openpbs/openpbs/blob/HEAD/doc/man8/pbs_comm.8B
13 | # - https://github.com/openpbs/openpbs/blob/master/doc/man8/pbs_mom.8B
14 | #
15 | sed -i "s/PBS_SERVER=.*/PBS_SERVER=$HOSTNAME/" $PBS_CONF_FILE
16 | sed -i "s/PBS_START_MOM=.*/PBS_START_MOM=1/" $PBS_CONF_FILE
17 | sed -i "s/\$clienthost .*/\$clienthost $HOSTNAME/" $MOM_CONF_FILE
18 | echo "\$usecp *:/ /" >> $MOM_CONF_FILE
19 |
20 | # Reduce the memory footprint by using less threads to avoid the OOMKiller in
21 | # GitHub Actions as observed with exit code 137.
22 | #
23 | echo "PBS_COMM_THREADS=2" >> $PBS_CONF_FILE
24 |
25 | # Start PBS
26 | /etc/init.d/pbs start
27 |
28 | # Reduce time between PBS scheduling and add history
29 | /opt/pbs/bin/qmgr -c "set server scheduler_iteration = 20"
30 | /opt/pbs/bin/qmgr -c "set server job_history_enable = True"
31 | /opt/pbs/bin/qmgr -c "set server job_history_duration = 24:00:00"
32 | /opt/pbs/bin/qmgr -c "set node pbs queue=workq"
33 | /opt/pbs/bin/qmgr -c "set server operators += dask@pbs"
34 |
35 | # "Entering sleep" can be used as a signal in logs that we have passed the
36 | # initialization phase where the memory needs may peak and expose us to the
37 | # OOMKiller and 137 exit codes.
38 | #
39 | echo "Entering sleep"
40 | sleep infinity
41 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec pbs /working/continuous_integration/docker/pbs/_install.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/print_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker logs pbs
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | this_dir="$(dirname "${BASH_SOURCE[0]}")"
5 | full_path_this_dir="$(cd "${this_dir}" && pwd)"
6 | git_root="$(cd "${full_path_this_dir}/../../.." && pwd)"
7 |
8 | docker run --rm -d \
9 | --name pbs \
10 | -h pbs \
11 | -v "$git_root":/working \
12 | -p 8000:8000 \
13 | -p 8786:8786 \
14 | -p 8088:8088 \
15 | --cap-add=SYS_RESOURCE \
16 | ghcr.io/dask/dask-gateway-ci-pbs
17 |
18 | # The pbs container's entrypoint, files/scripts/start.sh, emits a log message
19 | # that we will await.
20 | #
21 | # We do it to avoid getting OOMKilled by peaking memory needs during startup,
22 | # which is prone to happen if we run pip install at the same time.
23 | #
24 | set +x
25 | await_startup() {
26 | i=0; while [ $i -ne 30 ]; do
27 | docker logs pbs 2>/dev/null | grep --silent "Entering sleep" \
28 | && start_script_finishing=true && break \
29 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for pbs container startup ($i seconds)"
30 | done
31 | if [ "$start_script_finishing" != "true" ]; then
32 | echo "WARNING: /script/start.sh was slow to finish!"
33 | exit 1
34 | fi
35 | echo "pbs container started!"
36 |
37 | # We add some seconds of precautionary sleep to avoid unknown and hard to
38 | # debug issues.
39 | sleep 3
40 | }
41 | await_startup
42 |
--------------------------------------------------------------------------------
/continuous_integration/docker/pbs/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec -u dask pbs /working/continuous_integration/docker/pbs/_test.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/Dockerfile:
--------------------------------------------------------------------------------
1 | # See continuous_integration/docker/README.md for details about this and other
2 | # Dockerfiles under the continuous_integration/docker folder on their purpose
3 | # and how to work with them.
4 | #
5 | FROM ghcr.io/dask/dask-gateway-ci-base:latest
6 |
7 | # Set labels based on the Open Containers Initiative (OCI):
8 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
9 | #
10 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/continuous_integration/docker/slurm/Dockerfile"
11 |
12 | # Notify dask-gateway tests that Slurm is available
13 | ENV TEST_DASK_GATEWAY_SLURM true
14 |
15 | # Install Slurm
16 | #
17 | # 1. Download and compile slurm
18 | #
19 | # Slurm versions: https://download.schedmd.com/slurm/
20 | # Slurm release notes: https://github.com/SchedMD/slurm/blame/HEAD/RELEASE_NOTES
21 | #
22 | RUN INSTALL_SLURM_VERSION=22.05.5 \
23 | && yum install -y \
24 | # required to install supervisor (and more?)
25 | epel-release \
26 | && yum install -y \
27 | # temporary installation dependencies later uninstalled
28 | bzip2 \
29 | gcc \
30 | mariadb-devel \
31 | munge-devel \
32 | ncurses-devel \
33 | openssl-devel \
34 | readline-devel \
35 | # persistent installation dependencies
36 | man2html \
37 | mariadb-server \
38 | munge \
39 | openssl \
40 | perl \
41 | supervisor \
42 | \
43 | && curl -sL https://download.schedmd.com/slurm/slurm-${INSTALL_SLURM_VERSION}.tar.bz2 \
44 | | tar --extract --verbose --bzip2 --directory=/tmp \
45 | && cd /tmp/slurm-* \
46 | && ./configure \
47 | --sysconfdir=/etc/slurm \
48 | --with-mysql_config=/usr/bin \
49 | --libdir=/usr/lib64 \
50 | && make install \
51 | && rm -rf /tmp/slurm-* \
52 | \
53 | && yum remove -y \
54 | bzip2 \
55 | gcc \
56 | mariadb-devel \
57 | munge-devel \
58 | ncurses-devel \
59 | openssl-devel \
60 | readline-devel \
61 | && yum clean all \
62 | && rm -rf /var/cache/yum
63 | #
64 | # 2. Setup Slurm
65 | #
66 | RUN groupadd --system slurm \
67 | && useradd --system --gid slurm slurm \
68 | && mkdir \
69 | /etc/sysconfig/slurm \
70 | /var/lib/slurmd \
71 | /var/log/slurm \
72 | /var/run/slurmd \
73 | /var/spool/slurmd \
74 | && chown slurm:slurm \
75 | /var/lib/slurmd \
76 | /var/log/slurm \
77 | /var/run/slurmd \
78 | /var/spool/slurmd \
79 | && /sbin/create-munge-key
80 | #
81 | # 3. Copy misc configuration files
82 | #
83 | COPY --chown=slurm:slurm ./files/etc/slurm /etc/slurm/
84 | COPY ./files/etc/sudoers.d /etc/sudoers.d/
85 | COPY ./files/etc/supervisord.conf /etc/
86 | COPY ./files/etc/supervisord.d /etc/supervisord.d/
87 | RUN chmod 644 /etc/slurm/slurm.conf \
88 | && chmod 600 /etc/slurm/slurmdbd.conf \
89 | && chmod 440 /etc/sudoers.d/dask \
90 | && chmod 644 /etc/supervisord.conf \
91 | && chmod 644 /etc/supervisord.d/*
92 | #
93 | # 4. Initialize a Slurm database
94 | #
95 | COPY ./files/scripts /scripts/
96 | RUN /scripts/init-mysql.sh
97 |
98 | ENTRYPOINT ["/usr/bin/supervisord", "--configuration", "/etc/supervisord.conf"]
99 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/_install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 |
6 | # FIXME: pip should be installed to a modern version in the base image instead
7 | # of being upgraded here. It isn't because of
8 | # https://github.com/dask/dask-gateway/issues/837.
9 | pip install "pip==24.*"
10 |
11 | # This installs everything we need for tests
12 | pip install -r tests/requirements.txt
13 |
14 | pip list
15 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/_print_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Bold high intensity green
4 | G='\033[1;92m'
5 | # No color
6 | NC='\033[0m'
7 |
8 | printf "\n${G}supervisorctl status${NC}\n"
9 | supervisorctl status
10 |
11 | printf "\n${G}cat /var/log/supervisord.log${NC}\n"
12 | cat /var/log/supervisord.log
13 | printf "\n${G}cat /var/log/supervisor/slurmdbd.log${NC}\n"
14 | cat /var/log/supervisor/slurmdbd.log
15 | printf "\n${G}cat /var/log/supervisor/slurmctld.log${NC}\n"
16 | cat /var/log/supervisor/slurmctld.log
17 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | cd /working
5 | pytest -v tests/test_slurm_backend.py
6 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/slurm/cgroup.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference: https://slurm.schedmd.com/cgroup.conf.html
2 | #
3 | # This file was added as a workaround added when upgrading to from surm
4 | # 2021.08.6 to 22.05.5, where slurmd failed to start with an error message
5 | # logged in /var/log/slurm/slurmd.log saying:
6 | #
7 | # error: Couldn't find the specified plugin name for cgroup/v2 looking at all files
8 | # error: cannot find cgroup plugin for cgroup/v2
9 | # error: cannot create cgroup context for cgroup/v2
10 | # error: Unable to initialize cgroup plugin
11 | # error: slurmd initialization failed
12 | #
13 | CgroupPlugin=cgroup/v1
14 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/slurm/slurm.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference: https://slurm.schedmd.com/slurm.conf.html
2 | #
3 | ClusterName=linux
4 | ControlMachine=slurm
5 | SlurmUser=slurm
6 | SlurmctldPort=6817
7 | SlurmdPort=6818
8 | AuthType=auth/munge
9 | StateSaveLocation=/var/lib/slurmd
10 | SlurmdSpoolDir=/var/spool/slurmd
11 | SwitchType=switch/none
12 | MpiDefault=none
13 | SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
14 | SlurmdPidFile=/var/run/slurmd/slurmd.pid
15 | ProctrackType=proctrack/pgid
16 | ReturnToService=0
17 | SlurmctldTimeout=300
18 | SlurmdTimeout=300
19 | InactiveLimit=0
20 | MinJobAge=300
21 | KillWait=30
22 | Waittime=0
23 | SchedulerType=sched/backfill
24 | SelectType=select/cons_res
25 | SelectTypeParameters=CR_CPU_Memory
26 | SlurmctldDebug=3
27 | SlurmctldLogFile=/var/log/slurm/slurmctld.log
28 | SlurmdDebug=3
29 | SlurmdLogFile=/var/log/slurm/slurmd.log
30 | JobCompType=jobcomp/none
31 | AccountingStorageType=accounting_storage/slurmdbd
32 | # Nodes
33 | SlurmdParameters=config_overrides
34 | NodeName=slurm RealMemory=4096 Sockets=4 CoresPerSocket=4 ThreadsPerCore=4
35 | # Partitions
36 | PartitionName=DEFAULT Nodes=ALL OverSubscribe=FORCE:8 MaxTime=INFINITE State=UP
37 | PartitionName=dev Priority=10 Default=YES
38 | PartitionName=prod Priority=20 Default=NO
39 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/slurm/slurmdbd.conf:
--------------------------------------------------------------------------------
1 | #
2 | # Example slurmdbd.conf file.
3 | #
4 | # See the slurmdbd.conf man page for more information.
5 | #
6 | # Archive info
7 | #ArchiveJobs=yes
8 | #ArchiveDir="/tmp"
9 | #ArchiveSteps=yes
10 | #ArchiveScript=
11 | #JobPurge=12
12 | #StepPurge=1
13 | #
14 | # Authentication info
15 | AuthType=auth/munge
16 | #AuthInfo=/var/run/munge/munge.socket.2
17 | #
18 | # slurmDBD info
19 | DbdAddr=localhost
20 | DbdHost=localhost
21 | #DbdPort=7031
22 | SlurmUser=slurm
23 | #MessageTimeout=300
24 | DebugLevel=4
25 | #DefaultQOS=normal,standby
26 | LogFile=/var/log/slurm/slurmdbd.log
27 | PidFile=/var/run/slurmdbd.pid
28 | #PluginDir=/usr/lib/slurm
29 | #PrivateData=accounts,users,usage,jobs
30 | #TrackWCKey=yes
31 | #
32 | # Database info
33 | StorageType=accounting_storage/mysql
34 | StorageHost=localhost
35 | StoragePass=password
36 | StorageUser=slurm
37 | StorageLoc=slurm_acct_db
38 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/sudoers.d/dask:
--------------------------------------------------------------------------------
1 | Cmnd_Alias DASK_GATEWAY_JOBQUEUE_LAUNCHER = /opt/python/bin/dask-gateway-jobqueue-launcher
2 |
3 | %dask_users ALL=(dask) /usr/bin/sudo
4 | dask ALL=(%dask_users) NOPASSWD:DASK_GATEWAY_JOBQUEUE_LAUNCHER
5 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/supervisord.conf:
--------------------------------------------------------------------------------
1 | # supervisord starts other "programs" declared in the additional configuration
2 | # files found in the /etc/supervisor.d folder.
3 | #
4 | # supervisord configuration reference:
5 | # http://supervisord.org/configuration.html#configuration-file
6 | #
7 | [supervisord]
8 | strip_ansi = true
9 | nodaemon = true
10 | logfile = /var/log/supervisord.log
11 | pidfile = /var/run/supervisord.pid
12 |
13 | [unix_http_server]
14 | file = /tmp/supervisor.sock
15 |
16 | [rpcinterface:supervisor]
17 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
18 |
19 | [supervisorctl]
20 | serverurl = unix:///tmp/supervisor.sock
21 | prompt = slurm
22 |
23 | [include]
24 | files = /etc/supervisord.d/*.conf
25 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/etc/supervisord.d/slurm.conf:
--------------------------------------------------------------------------------
1 | # Configuration reference:
2 | # http://supervisord.org/configuration.html#program-x-section-settings
3 | #
4 | [program:munged]
5 | user=munge
6 | command=/usr/sbin/munged -F
7 | autostart=true
8 | autorestart=true
9 | startsecs=5
10 | startretries=2
11 | exitcodes=0,1,2
12 | stdout_logfile=/var/log/supervisor/munged.log
13 | stdout_logfile_maxbytes=1MB
14 | stdout_logfile_backups=5
15 | stderr_logfile=/var/log/supervisor/munged.log
16 | stderr_logfile_maxbytes=1MB
17 | stderr_logfile_backups=5
18 | priority=1
19 |
20 | [program:mysqld]
21 | command=/usr/bin/pidproxy /var/run/mariadb/mariadb.pid /usr/bin/mysqld_safe
22 | stdout_logfile=/var/log/supervisor/mysqld.log
23 | stdout_logfile_maxbytes=1MB
24 | stdout_logfile_backups=5
25 | stderr_logfile=/var/log/supervisor/mysqld.log
26 | stderr_logfile_maxbytes=1MB
27 | stderr_logfile_backups=5
28 | exitcodes=0,1,2
29 | autostart=true
30 | autorestart=false
31 | priority=2
32 |
33 | [program:slurmdbd]
34 | user=root
35 | command=/bin/bash -c "until echo 'SELECT 1' | mysql -h localhost -uslurm -ppassword &> /dev/null; do sleep 1; done && /usr/local/sbin/slurmdbd -Dvvv"
36 | autostart=true
37 | autorestart=false
38 | exitcodes=0,1,2
39 | stdout_logfile=/var/log/supervisor/slurmdbd.log
40 | stdout_logfile_maxbytes=1MB
41 | stdout_logfile_backups=5
42 | stderr_logfile=/var/log/supervisor/slurmdbd.log
43 | stderr_logfile_maxbytes=1MB
44 | stderr_logfile_backups=5
45 | priority=10
46 |
47 | [program:slurmctld]
48 | user=root
49 | command=/bin/bash -c "until 2>/dev/null >/dev/tcp/localhost/6819; do sleep 1; done && /usr/local/sbin/slurmctld -Dvvv"
50 | autostart=true
51 | autorestart=false
52 | startsecs=3
53 | exitcodes=0,1,2
54 | stdout_logfile=/var/log/supervisor/slurmctld.log
55 | stdout_logfile_maxbytes=1MB
56 | stdout_logfile_backups=5
57 | stderr_logfile=/var/log/supervisor/slurmctld.log
58 | stderr_logfile_maxbytes=1MB
59 | stderr_logfile_backups=5
60 | priority=50
61 |
62 | [program:slurmd]
63 | user=root
64 | command=/bin/bash -c "until 2>/dev/null >/dev/tcp/localhost/6817; do sleep 1; done && /usr/local/sbin/slurmd -Dvvv"
65 | autostart=true
66 | autorestart=false
67 | exitcodes=0,1,2
68 | stdout_logfile=/var/log/supervisor/slurmd.log
69 | stdout_logfile_maxbytes=1MB
70 | stdout_logfile_backups=5
71 | stderr_logfile=/var/log/supervisor/slurmd.log
72 | stderr_logfile_maxbytes=1MB
73 | stderr_logfile_backups=5
74 | priority=100
75 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/files/scripts/init-mysql.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | if [ ! -f "/var/lib/mysql/ibdata1" ]; then
4 | echo "- Initializing database"
5 | /usr/bin/mysql_install_db &> /dev/null
6 | echo "- Database initialized"
7 | echo "- Updating MySQL directory permissions"
8 | chown -R mysql:mysql /var/lib/mysql
9 | chown -R mysql:mysql /var/run/mariadb
10 | fi
11 |
12 | if [ ! -d "/var/lib/mysql/slurm_acct_db" ]; then
13 | /usr/bin/mysqld_safe --datadir='/var/lib/mysql' &
14 |
15 | for count in {30..0}; do
16 | if echo "SELECT 1" | mysql &> /dev/null; then
17 | break
18 | fi
19 | echo "- Starting MariaDB to create Slurm account database"
20 | sleep 1
21 | done
22 |
23 | if [[ "$count" -eq 0 ]]; then
24 | echo >&2 "MariaDB did not start"
25 | exit 1
26 | fi
27 |
28 | echo "- Creating Slurm acct database"
29 | mysql -NBe "CREATE DATABASE slurm_acct_db"
30 | mysql -NBe "CREATE USER 'slurm'@'localhost'"
31 | mysql -NBe "SET PASSWORD for 'slurm'@'localhost' = password('password')"
32 | mysql -NBe "GRANT USAGE ON *.* to 'slurm'@'localhost'"
33 | mysql -NBe "GRANT ALL PRIVILEGES on slurm_acct_db.* to 'slurm'@'localhost'"
34 | mysql -NBe "FLUSH PRIVILEGES"
35 | echo "- Slurm acct database created. Stopping MariaDB"
36 | pkill mysqld
37 | for count in {30..0}; do
38 | if echo "SELECT 1" | mysql &> /dev/null; then
39 | sleep 1
40 | else
41 | break
42 | fi
43 | done
44 | if [[ "$count" -eq 0 ]]; then
45 | echo >&2 "MariaDB did not stop"
46 | exit 1
47 | fi
48 | fi
49 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec slurm /working/continuous_integration/docker/slurm/_install.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/print_logs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec slurm /working/continuous_integration/docker/slurm/_print_logs.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xe
3 |
4 | this_dir="$(dirname "${BASH_SOURCE[0]}")"
5 | full_path_this_dir="$(cd "${this_dir}" && pwd)"
6 | git_root="$(cd "${full_path_this_dir}/../../.." && pwd)"
7 |
8 | docker run --rm -d \
9 | --name slurm \
10 | -h slurm \
11 | -v "$git_root":/working \
12 | -p 8000:8000 \
13 | -p 8786:8786 \
14 | -p 8088:8088 \
15 | ghcr.io/dask/dask-gateway-ci-slurm
16 |
17 | # The slurm container's systemd process emits logs about the progress of
18 | # starting up declared services that we will await.
19 | #
20 | # We do it to avoid getting OOMKilled by peaking memory needs during startup,
21 | # which is prone to happen if we run pip install at the same time.
22 | #
23 | # Practically, we await "entered RUNNING state" to be observed exactly 5 times,
24 | # which represents our 5 systemd services.
25 | #
26 | # INFO success: mysqld entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
27 | # INFO success: slurmdbd entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
28 | # INFO success: slurmd entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
29 | # INFO success: slurmctld entered RUNNING state, process has stayed up for > than 3 seconds (startsecs)
30 | # INFO success: munged entered RUNNING state, process has stayed up for > than 5 seconds (startsecs)
31 | #
32 | set +x
33 | await_startup() {
34 | i=0; while [ $i -ne 30 ]; do
35 | docker logs slurm | grep "entered RUNNING state" | wc -l 2>/dev/null | grep --silent "5" \
36 | && start_script_finishing=true && break \
37 | || start_script_finishing=false && sleep 1 && i=$((i + 1)) && echo "Waiting for slurm container startup ($i seconds)"
38 | done
39 | if [ "$start_script_finishing" != "true" ]; then
40 | echo "WARNING: /script/start.sh was slow to finish!"
41 | exit 1
42 | fi
43 |
44 | echo "slurm container started!"
45 | }
46 | await_startup
47 |
--------------------------------------------------------------------------------
/continuous_integration/docker/slurm/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | docker exec -u dask slurm /working/continuous_integration/docker/slurm/_test.sh
3 |
--------------------------------------------------------------------------------
/continuous_integration/kubernetes/build-publish-helm-chart.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script publishes the Helm chart to the Dask Helm chart repo and pushes
3 | # associated built docker images to our container registry using chartpress.
4 | # --------------------------------------------------------------------------
5 |
6 | # Exit on errors, assert env vars, log commands
7 | set -eux
8 |
9 | PUBLISH_ARGS="--push --publish-chart \
10 | --builder=docker-buildx \
11 | --platform=linux/amd64 \
12 | --platform=linux/arm64 \
13 | "
14 |
15 | # chartpress needs to run next to resources/helm/chartpress.yaml
16 | cd resources/helm
17 |
18 | # chartpress use git to push to our Helm chart repository, which is the gh-pages
19 | # branch of dask/helm-chart. We assume permissions to the docker registry are
20 | # already configured.
21 | if [[ $GITHUB_REF != refs/tags/* ]]; then
22 | # Using --extra-message, we help readers of merged PRs to know what version
23 | # they need to bump to in order to make use of the PR. This is enabled by a
24 | # GitHub notificaiton in the PR like "Github Action user pushed a commit to
25 | # dask/helm-chart that referenced this pull request..."
26 | #
27 | # ref: https://github.com/jupyterhub/chartpress#usage
28 | #
29 | # NOTE: GitHub merge commits contain a PR reference like #123. `sed` is used
30 | # to extract a PR reference like #123 or a commit hash reference like
31 | # @123abcd. Combined with GITHUB_REPOSITORY we craft a commit message
32 | # like dask/dask-gateway#123 or dask/dask-gateway@123abcd.
33 | PR_OR_HASH=$(git log -1 --pretty=%h-%B | head -n1 | sed 's/^.*\(#[0-9]*\).*/\1/' | sed 's/^\([0-9a-f]*\)-.*/@\1/')
34 | LATEST_COMMIT_TITLE=$(git log -1 --pretty=%B | head -n1)
35 | EXTRA_MESSAGE="${GITHUB_REPOSITORY}${PR_OR_HASH} ${LATEST_COMMIT_TITLE}"
36 |
37 | # shellcheck disable=SC2086
38 | chartpress $PUBLISH_ARGS --extra-message "${EXTRA_MESSAGE}"
39 | else
40 | # Setting a tag explicitly enforces a rebuild if this tag had already been
41 | # built and we wanted to override it.
42 |
43 | # shellcheck disable=SC2086
44 | chartpress $PUBLISH_ARGS --tag "${GITHUB_REF:10}"
45 | fi
46 |
47 | # Let us log the changes chartpress did, it should include replacements for
48 | # fields in values.yaml, such as what tag for various images we are using.
49 | git --no-pager diff --color
50 |
--------------------------------------------------------------------------------
/continuous_integration/kubernetes/k3d-create.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # This script can be used during local development to setup a k8s cluster.
3 | #
4 | # Note that if you are using k3d you must also "load" images so that your pods
5 | # in the k3d cluster can access them. What docker images are available on your
6 | # machine are different from those in the k3d sandbox.
7 | # --------------------------------------------------------------------------
8 | set -e
9 |
10 | this_dir="$(dirname "${BASH_SOURCE[0]}")"
11 | full_path_this_dir="$(cd "${this_dir}" && pwd)"
12 | git_root="$(cd "${full_path_this_dir}/../.." && pwd)"
13 |
14 | echo "Starting k3d"
15 | k3d create \
16 | --publish 30200:30200 \
17 | --api-port 6444 \
18 | --name k3s-default
19 |
20 | echo "Waiting for k3d access..."
21 | for i in {1..10}; do
22 | export KUBECONFIG="$(k3d get-kubeconfig --name='k3s-default')"
23 | if [[ $KUBECONFIG != "" ]]; then
24 | break;
25 | fi
26 | sleep 1
27 | done
28 |
29 | echo "Waiting for k3d nodes..."
30 | JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'
31 | until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do
32 | sleep 0.5
33 | done
34 |
35 | echo "k3d is running!"
36 |
37 | kubectl get nodes
38 |
--------------------------------------------------------------------------------
/dask-gateway-server/.dockerignore:
--------------------------------------------------------------------------------
1 | dask-gateway-proxy/
2 | build/
3 | dist/
4 | *.pyc
5 | .eggs/
6 | *.egg-info
7 | .cache/
8 | .pytest_cache/
9 |
--------------------------------------------------------------------------------
/dask-gateway-server/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | dask-gateway-proxy/dask-gateway-proxy*
4 | dask_gateway_server/proxy/dask-gateway-proxy
5 |
--------------------------------------------------------------------------------
/dask-gateway-server/Dockerfile:
--------------------------------------------------------------------------------
1 | # This Dockerfile and image, ghcr.io/dask/dask-gateway-server, is used by the
2 | # dask-gateway Helm chart, by the api pod and the controller pod.
3 | #
4 | # The pods are started with different commands:
5 | #
6 | # - api pod command: dask-gateway-server ...
7 | # - controller pod command: dask-gateway-server kube-controller ...
8 | #
9 | FROM python:3.13-slim-bullseye
10 |
11 | # Set labels based on the Open Containers Initiative (OCI):
12 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
13 | #
14 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway"
15 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/dask-gateway-server/Dockerfile"
16 |
17 | # Install tini and upgrade linux packages are updated to patch known
18 | # vulnerabilities.
19 | RUN apt-get update \
20 | && apt-get upgrade -y \
21 | && apt-get install -y \
22 | tini \
23 | && rm -rf /var/lib/apt/lists/*
24 |
25 | # Create a non-root user to run as
26 | RUN useradd --create-home --user-group --uid 1000 dask
27 | USER dask:dask
28 | ENV PATH=/home/dask/.local/bin:$PATH
29 | WORKDIR /home/dask/
30 |
31 | # Install dask-gateway-server
32 | #
33 | # The Golang proxy binary isn't built as the dask-gateway Helm chart relies on
34 | # Traefik as a proxy instead to run in its dedicated pod.
35 | #
36 | COPY --chown=dask:dask . /opt/dask-gateway-server
37 | RUN DASK_GATEWAY_SERVER__NO_PROXY=true pip install --no-cache-dir \
38 | -r /opt/dask-gateway-server/Dockerfile.requirements.txt
39 |
40 | ENTRYPOINT ["tini", "-g", "--"]
41 | CMD ["dask-gateway-server", "--config", "/etc/dask-gateway/dask_gateway_config.py"]
42 |
--------------------------------------------------------------------------------
/dask-gateway-server/Dockerfile.requirements.in:
--------------------------------------------------------------------------------
1 | # These are the requirements we know we want to install in the Dockerfile, and
2 | # then we freeze them ahead of time to provide a clear description of the
3 | # dependencies we have installed.
4 | #
5 |
6 | # Install dask-gateway-server[kubernetes], which is the only thing needed for
7 | # our CI test suite.
8 | #
9 | .[kubernetes]
10 |
--------------------------------------------------------------------------------
/dask-gateway-server/Dockerfile.requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile with Python 3.13
3 | # by the following command:
4 | #
5 | # Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml
6 | #
7 | aiohappyeyeballs==2.6.1
8 | # via aiohttp
9 | aiohttp==3.11.16
10 | # via
11 | # dask-gateway-server
12 | # kubernetes-asyncio
13 | aiosignal==1.3.2
14 | # via aiohttp
15 | attrs==25.3.0
16 | # via aiohttp
17 | certifi==2025.1.31
18 | # via kubernetes-asyncio
19 | cffi==1.17.1
20 | # via cryptography
21 | colorlog==6.9.0
22 | # via dask-gateway-server
23 | cryptography==44.0.2
24 | # via dask-gateway-server
25 | dask-gateway-server @ file:///opt/dask-gateway-server
26 | # via file:///opt/dask-gateway-server
27 | frozenlist==1.5.0
28 | # via
29 | # aiohttp
30 | # aiosignal
31 | idna==3.10
32 | # via yarl
33 | kubernetes-asyncio==32.3.0
34 | # via dask-gateway-server
35 | multidict==6.4.2
36 | # via
37 | # aiohttp
38 | # yarl
39 | propcache==0.3.1
40 | # via
41 | # aiohttp
42 | # yarl
43 | pycparser==2.22
44 | # via cffi
45 | python-dateutil==2.9.0.post0
46 | # via kubernetes-asyncio
47 | pyyaml==6.0.2
48 | # via kubernetes-asyncio
49 | six==1.17.0
50 | # via
51 | # kubernetes-asyncio
52 | # python-dateutil
53 | traitlets==5.14.3
54 | # via dask-gateway-server
55 | urllib3==2.3.0
56 | # via kubernetes-asyncio
57 | yarl==1.19.0
58 | # via aiohttp
59 |
--------------------------------------------------------------------------------
/dask-gateway-server/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019, Jim Crist-Harif
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/dask-gateway-server/README.rst:
--------------------------------------------------------------------------------
1 | dask-gateway-server
2 | ===================
3 |
4 | A multi-tenant server for securely deploying and managing Dask clusters. See
5 | `the documentation `__ for more information.
6 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/.gitignore:
--------------------------------------------------------------------------------
1 | # If you prefer the allow list template instead of the deny list, see community template:
2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
3 | #
4 | # Binaries for programs and plugins
5 | *.exe
6 | *.exe~
7 | *.dll
8 | *.so
9 | *.dylib
10 |
11 | # Test binary, built with `go test -c`
12 | *.test
13 |
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 |
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 |
20 | # Go workspace file
21 | go.work
22 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/README.md:
--------------------------------------------------------------------------------
1 | ## dask-gateway-proxy
2 |
3 | A configurable TLS proxy, that dispatches to different routes based on the
4 | connection's [Server Name
5 | Indication](https://en.wikipedia.org/wiki/Server_Name_Indication). Routes can
6 | be added and removed at runtime using the provided REST API.
7 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/dask/dask-gateway/dask-gateway-proxy
2 |
3 | go 1.22
4 |
5 | require github.com/stretchr/testify v1.10.0
6 |
7 | require (
8 | github.com/davecgh/go-spew v1.1.1 // indirect
9 | github.com/pmezard/go-difflib v1.0.0 // indirect
10 | gopkg.in/yaml.v3 v3.0.1 // indirect
11 | )
12 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
9 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
11 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/internal/logging/logging.go:
--------------------------------------------------------------------------------
1 | // Our tiny custom logging framework.
2 | //
3 | // Provides common log levels, and quick functions for formatting and writing
4 | // output at those levels.
5 | package logging
6 |
7 | import (
8 | "fmt"
9 | "io"
10 | "os"
11 | "sync"
12 | "time"
13 | )
14 |
15 | type LogLevel int8
16 |
17 | const (
18 | ERROR LogLevel = iota - 1
19 | WARN
20 | INFO
21 | DEBUG
22 | )
23 |
24 | func ParseLevel(s string) LogLevel {
25 | switch s {
26 | case "ERROR", "error":
27 | return ERROR
28 | case "WARN", "warn":
29 | return WARN
30 | case "INFO", "info":
31 | return INFO
32 | case "DEBUG", "debug":
33 | return DEBUG
34 | }
35 | panic("Couldn't parse log level " + s)
36 | }
37 |
38 | func (l LogLevel) Char() byte {
39 | switch l {
40 | case ERROR:
41 | return 'E'
42 | case WARN:
43 | return 'W'
44 | case INFO:
45 | return 'I'
46 | case DEBUG:
47 | return 'D'
48 | }
49 | return '?'
50 | }
51 |
52 | type Logger struct {
53 | sync.Mutex
54 | Name string
55 | Level LogLevel
56 | Out io.Writer
57 | Buf []byte
58 | }
59 |
60 | func NewLogger(name string, level LogLevel) *Logger {
61 | return &Logger{Name: name, Level: level, Out: os.Stderr}
62 | }
63 |
64 | func (l *Logger) logMsg(level LogLevel, msg string) {
65 | if l.Level >= level {
66 | now := time.Now() // get this early.
67 | l.Lock()
68 | defer l.Unlock()
69 | l.Buf = l.Buf[:0]
70 | l.Buf = append(l.Buf, '[')
71 | l.Buf = append(l.Buf, level.Char())
72 | l.Buf = append(l.Buf, ' ')
73 | l.Buf = now.AppendFormat(l.Buf, "2006-01-02 15:04:05.000")
74 | l.Buf = append(l.Buf, ' ')
75 | l.Buf = append(l.Buf, l.Name...)
76 | l.Buf = append(l.Buf, "] "...)
77 | l.Buf = append(l.Buf, msg...)
78 | l.Buf = append(l.Buf, '\n')
79 | l.Out.Write(l.Buf)
80 | }
81 | }
82 |
83 | func (l *Logger) logF(level LogLevel, format string, args ...interface{}) {
84 | if l.Level >= level {
85 | l.logMsg(level, fmt.Sprintf(format, args...))
86 | }
87 | }
88 |
89 | func (l *Logger) Debug(msg string) {
90 | l.logMsg(DEBUG, msg)
91 | }
92 |
93 | func (l *Logger) Debugf(format string, args ...interface{}) {
94 | l.logF(DEBUG, format, args...)
95 | }
96 |
97 | func (l *Logger) Info(msg string) {
98 | l.logMsg(INFO, msg)
99 | }
100 |
101 | func (l *Logger) Infof(format string, args ...interface{}) {
102 | l.logF(INFO, format, args...)
103 | }
104 |
105 | func (l *Logger) Warn(msg string) {
106 | l.logMsg(WARN, msg)
107 | }
108 |
109 | func (l *Logger) Warnf(format string, args ...interface{}) {
110 | l.logF(WARN, format, args...)
111 | }
112 |
113 | func (l *Logger) Error(msg string) {
114 | l.logMsg(ERROR, msg)
115 | }
116 |
117 | func (l *Logger) Errorf(format string, args ...interface{}) {
118 | l.logF(ERROR, format, args...)
119 | }
120 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/pkg/router/router.go:
--------------------------------------------------------------------------------
1 | package router
2 |
3 | import (
4 | "encoding/json"
5 | "net/url"
6 | "strings"
7 | )
8 |
9 | func normalizePath(path string) (string, int) {
10 | offset := 0
11 | if path == "" || path == "/" {
12 | return "", offset
13 | }
14 | if path[0] == '/' {
15 | path = path[1:]
16 | offset = 1
17 | }
18 | if path[len(path)-1] == '/' {
19 | path = path[:len(path)-1]
20 | }
21 | return path, offset
22 | }
23 |
24 | func getSegment(path string, start int) (segment string, next int) {
25 | if len(path) == 0 {
26 | return path, -1
27 | }
28 | end := strings.IndexRune(path[start:], '/')
29 | if end == -1 {
30 | return path[start:], -1
31 | }
32 | return path[start : start+end], start + end + 1
33 | }
34 |
35 | type Router struct {
36 | url *url.URL
37 | branches map[string]*Router
38 | }
39 |
40 | func (r *Router) isLeaf() bool {
41 | return len(r.branches) == 0
42 | }
43 |
44 | func NewRouter() *Router {
45 | return &Router{}
46 | }
47 |
48 | func (router *Router) HasMatch(path string) bool {
49 | if router.url != nil {
50 | return true
51 | }
52 | path, _ = normalizePath(path)
53 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) {
54 | router = router.branches[part]
55 | if router == nil {
56 | break
57 | }
58 | if router.url != nil {
59 | return true
60 | }
61 | if i == -1 {
62 | break
63 | }
64 | }
65 | return false
66 | }
67 |
68 | func (router *Router) Match(path string) (*url.URL, string) {
69 | path2, offset := normalizePath(path)
70 | node := router
71 | out := node.url
72 | n := 0
73 | offset2 := 0
74 | for {
75 | part, i := getSegment(path2, n)
76 | node = node.branches[part]
77 | if node == nil {
78 | break
79 | }
80 | if node.url != nil {
81 | out = node.url
82 | if i == -1 {
83 | offset2 = len(path2)
84 | } else {
85 | offset2 = i
86 | }
87 | }
88 | if i == -1 {
89 | break
90 | }
91 | n = i
92 | }
93 | if out == nil {
94 | return nil, ""
95 | }
96 | return out, path[offset+offset2:]
97 | }
98 |
99 | func (router *Router) Put(path string, url *url.URL) {
100 | path, _ = normalizePath(path)
101 | if path == "" {
102 | router.url = url
103 | return
104 | }
105 | node := router
106 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) {
107 | child, _ := node.branches[part]
108 | if child == nil {
109 | child = NewRouter()
110 | if node.branches == nil {
111 | node.branches = make(map[string]*Router)
112 | }
113 | node.branches[part] = child
114 | }
115 | node = child
116 | if i == -1 {
117 | break
118 | }
119 | }
120 | node.url = url
121 | }
122 |
123 | func (router *Router) Delete(path string) {
124 | path, _ = normalizePath(path)
125 |
126 | if path == "" {
127 | // Handle root node
128 | router.url = nil
129 | return
130 | }
131 |
132 | type record struct {
133 | node *Router
134 | part string
135 | }
136 |
137 | var paths []record
138 | node := router
139 | for part, i := getSegment(path, 0); ; part, i = getSegment(path, i) {
140 | paths = append(paths, record{part: part, node: node})
141 | node = node.branches[part]
142 | if node == nil {
143 | return
144 | }
145 | if i == -1 {
146 | break
147 | }
148 | }
149 | node.url = nil
150 | if node.isLeaf() {
151 | for i := len(paths) - 1; i >= 0; i-- {
152 | parent := paths[i].node
153 | part := paths[i].part
154 | delete(parent.branches, part)
155 | // If completely empty, deallocate whole map
156 | if len(parent.branches) == 0 {
157 | parent.branches = nil
158 | }
159 | if parent.url != nil || !parent.isLeaf() {
160 | break
161 | }
162 | }
163 | }
164 | }
165 |
166 | func (r *Router) traverse(prefix string, f func(prefix string, value *url.URL)) {
167 | if r.url != nil {
168 | f(prefix, r.url)
169 | }
170 | prefix = prefix + "/"
171 | for path, node := range r.branches {
172 | node.traverse(prefix+path, f)
173 | }
174 | }
175 |
176 | func (r *Router) MarshalJSON() ([]byte, error) {
177 | out := make(map[string]string)
178 | r.traverse("", func(prefix string, value *url.URL) {
179 | out[prefix] = value.String()
180 | })
181 | b, err := json.Marshal(out)
182 | if err != nil {
183 | return nil, err
184 | }
185 | return b, nil
186 | }
187 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask-gateway-proxy/pkg/sni/sni.go:
--------------------------------------------------------------------------------
1 | package sni
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "crypto/tls"
7 | "io"
8 | "net"
9 | )
10 |
11 | // hideWriteTo is a workaround introduced to make the code functional in 1.22+,
12 | // where io.Copy would no longer make use of peekedTCPConn.Read after
13 | // net.TCPConn.WriteTo was added, so the workaround is to hide it again.
14 | //
15 | // The workaround was developed inspecting:
16 | // https://github.com/golang/go/commit/f664031bc17629080332a1c7bede38d67fd32e47
17 | //
18 | type hideWriteTo struct{}
19 | func (hideWriteTo) WriteTo(io.Writer) (int64, error) {
20 | panic("can't happen")
21 | }
22 |
23 | type TcpConn interface {
24 | net.Conn
25 | CloseWrite() error
26 | CloseRead() error
27 | }
28 |
29 | type peekedTCPConn struct {
30 | peeked []byte
31 | hideWriteTo
32 | *net.TCPConn
33 | }
34 |
35 | func (c *peekedTCPConn) Read(p []byte) (n int, err error) {
36 | if len(c.peeked) > 0 {
37 | n = copy(p, c.peeked)
38 | c.peeked = c.peeked[n:]
39 | if len(c.peeked) == 0 {
40 | c.peeked = nil
41 | }
42 | return n, nil
43 | }
44 | return c.TCPConn.Read(p)
45 | }
46 |
47 | func wrapPeeked(inConn *net.TCPConn, br *bufio.Reader) TcpConn {
48 | peeked, _ := br.Peek(br.Buffered())
49 | return &peekedTCPConn{TCPConn: inConn, peeked: peeked}
50 | }
51 |
52 | type readonly struct {
53 | r io.Reader
54 | net.Conn
55 | }
56 |
57 | func (c readonly) Read(p []byte) (int, error) { return c.r.Read(p) }
58 | func (readonly) Write(p []byte) (int, error) { return 0, io.EOF }
59 |
60 | func ReadSNI(inConn *net.TCPConn) (string, bool, TcpConn, error) {
61 | br := bufio.NewReader(inConn)
62 | hdr, err := br.Peek(1)
63 | if err != nil {
64 | return "", false, nil, err
65 | }
66 |
67 | if hdr[0] != 0x16 {
68 | // Not a TLS handshake
69 | return "", false, wrapPeeked(inConn, br), nil
70 | }
71 |
72 | const headerLen = 5
73 | hdr, err = br.Peek(headerLen)
74 | if err != nil {
75 | return "", false, wrapPeeked(inConn, br), nil
76 | }
77 |
78 | recLen := int(hdr[3])<<8 | int(hdr[4])
79 | helloBytes, err := br.Peek(headerLen + recLen)
80 | if err != nil {
81 | return "", true, wrapPeeked(inConn, br), nil
82 | }
83 |
84 | sni := ""
85 | server := tls.Server(readonly{r: bytes.NewReader(helloBytes)}, &tls.Config{
86 | GetConfigForClient: func(hello *tls.ClientHelloInfo) (*tls.Config, error) {
87 | sni = hello.ServerName
88 | return nil, nil
89 | },
90 | })
91 | server.Handshake()
92 |
93 | return sni, true, wrapPeeked(inConn, br), nil
94 | }
95 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/__init__.py:
--------------------------------------------------------------------------------
1 | from ._version import __version__
2 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/__main__.py:
--------------------------------------------------------------------------------
1 | from .app import main
2 |
3 | main()
4 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "2025.4.1-0.dev"
2 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Backend, ClusterConfig
2 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/inprocess.py:
--------------------------------------------------------------------------------
1 | from dask_gateway.scheduler_preload import GatewaySchedulerService, make_gateway_client
2 | from distributed import Scheduler, Security, Worker
3 | from distributed.core import Status
4 | from distributed.utils import TimeoutError
5 |
6 | from .local import UnsafeLocalBackend
7 |
8 | __all__ = ("InProcessBackend",)
9 |
10 |
11 | class InProcessBackend(UnsafeLocalBackend):
12 | """A backend that runs everything in the same process"""
13 |
14 | def get_security(self, cluster):
15 | cert_path, key_path = self.get_tls_paths(cluster)
16 | return Security(
17 | tls_ca_file=cert_path,
18 | tls_scheduler_cert=cert_path,
19 | tls_scheduler_key=key_path,
20 | tls_worker_cert=cert_path,
21 | tls_worker_key=key_path,
22 | )
23 |
24 | def get_gateway_client(self, cluster):
25 | return make_gateway_client(
26 | cluster_name=cluster.name, api_token=cluster.token, api_url=self.api_url
27 | )
28 |
29 | def _check_status(self, objs, mapping):
30 | out = []
31 | for x in objs:
32 | x = mapping.get(x.name)
33 | ok = x is not None and not x.status != Status.closed
34 | out.append(ok)
35 | return out
36 |
37 | async def do_setup(self):
38 | self.schedulers = {}
39 | self.workers = {}
40 |
41 | async def do_start_cluster(self, cluster):
42 | workdir = self.setup_working_directory(cluster)
43 | yield {"workdir": workdir}
44 |
45 | security = self.get_security(cluster)
46 | gateway_client = self.get_gateway_client(cluster)
47 |
48 | self.schedulers[cluster.name] = scheduler = Scheduler(
49 | protocol="tls",
50 | host="127.0.0.1",
51 | port=0,
52 | dashboard_address="127.0.0.1:0",
53 | security=security,
54 | services={
55 | ("gateway", ":0"): (
56 | GatewaySchedulerService,
57 | {
58 | "gateway": gateway_client,
59 | "heartbeat_period": self.cluster_heartbeat_period,
60 | "adaptive_period": cluster.config.adaptive_period,
61 | "idle_timeout": cluster.config.idle_timeout,
62 | },
63 | )
64 | },
65 | )
66 | await scheduler
67 | yield {"workdir": workdir, "started": True}
68 |
69 | async def do_stop_cluster(self, cluster):
70 | scheduler = self.schedulers.pop(cluster.name)
71 |
72 | await scheduler.close()
73 | scheduler.stop()
74 |
75 | workdir = cluster.state.get("workdir")
76 | if workdir is not None:
77 | self.cleanup_working_directory(workdir)
78 |
79 | async def do_check_clusters(self, clusters):
80 | return self._check_status(clusters, self.schedulers)
81 |
82 | async def do_start_worker(self, worker):
83 | security = self.get_security(worker.cluster)
84 | workdir = worker.cluster.state["workdir"]
85 | self.workers[worker.name] = worker = Worker(
86 | worker.cluster.scheduler_address,
87 | nthreads=worker.cluster.config.worker_threads,
88 | memory_limit=0,
89 | security=security,
90 | name=worker.name,
91 | local_directory=workdir,
92 | )
93 | await worker
94 | yield {"started": True}
95 |
96 | async def do_stop_worker(self, worker):
97 | worker = self.workers.pop(worker.name, None)
98 | if worker is None:
99 | return
100 | try:
101 | await worker.close(timeout=1)
102 | except TimeoutError:
103 | pass
104 |
105 | async def do_check_workers(self, workers):
106 | return self._check_status(workers, self.workers)
107 |
108 | async def worker_status(self, worker_name, worker_state, cluster_state):
109 | worker = self.workers.get(worker_name)
110 | if worker is None:
111 | return False
112 | return not worker.status != Status.closed
113 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/jobqueue/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/dask-gateway-server/dask_gateway_server/backends/jobqueue/__init__.py
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/jobqueue/launcher.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import shutil
4 | import subprocess
5 | import sys
6 |
7 |
8 | def finish(**kwargs):
9 | json.dump(kwargs, sys.stdout)
10 | sys.stdout.flush()
11 |
12 |
13 | def run_command(cmd, env, stdin=None):
14 | if stdin is not None:
15 | stdin = stdin.encode("utf8")
16 | STDIN = subprocess.PIPE
17 | else:
18 | STDIN = None
19 |
20 | proc = subprocess.Popen(
21 | cmd,
22 | env=env,
23 | cwd=os.path.expanduser("~"),
24 | stdout=subprocess.PIPE,
25 | stderr=subprocess.PIPE,
26 | stdin=STDIN,
27 | )
28 |
29 | stdout, stderr = proc.communicate(stdin)
30 |
31 | finish(
32 | ok=True,
33 | returncode=proc.returncode,
34 | stdout=stdout.decode("utf8", "replace"),
35 | stderr=stderr.decode("utf8", "replace"),
36 | )
37 |
38 |
39 | def start(cmd, env, stdin=None, staging_dir=None, files=None):
40 | if staging_dir:
41 | try:
42 | os.makedirs(staging_dir, mode=0o700, exist_ok=False)
43 | for name, value in files.items():
44 | with open(os.path.join(staging_dir, name), "w") as f:
45 | f.write(value)
46 | except Exception as exc:
47 | finish(
48 | ok=False,
49 | error=f"Error setting up staging directory {staging_dir}: {exc}",
50 | )
51 | return
52 | run_command(cmd, env, stdin=stdin)
53 |
54 |
55 | def stop(cmd, env, staging_dir=None):
56 | if staging_dir:
57 | if not os.path.exists(staging_dir):
58 | return
59 | try:
60 | shutil.rmtree(staging_dir)
61 | except Exception as exc:
62 | finish(
63 | ok=False,
64 | error=f"Error removing staging directory {staging_dir}: {exc}",
65 | )
66 | return
67 | run_command(cmd, env)
68 |
69 |
70 | def main():
71 | try:
72 | kwargs = json.load(sys.stdin)
73 | except ValueError as exc:
74 | finish(ok=False, error=str(exc))
75 | return
76 |
77 | action = kwargs.pop("action", None)
78 | if action == "start":
79 | start(**kwargs)
80 | elif action == "stop":
81 | stop(**kwargs)
82 | else:
83 | finish(ok=False, error="Valid actions are 'start' and 'stop'")
84 |
85 |
86 | if __name__ == "__main__":
87 | main()
88 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/jobqueue/slurm.py:
--------------------------------------------------------------------------------
1 | import math
2 | import os
3 | import shutil
4 |
5 | from traitlets import Unicode, default
6 |
7 | from ...traitlets import Type
8 | from .base import JobQueueBackend, JobQueueClusterConfig
9 |
10 | __all__ = ("SlurmBackend", "SlurmClusterConfig")
11 |
12 |
13 | def slurm_format_memory(n):
14 | """Format memory in bytes for use with slurm."""
15 | if n >= 10 * (1024**3):
16 | return "%dG" % math.ceil(n / (1024**3))
17 | if n >= 10 * (1024**2):
18 | return "%dM" % math.ceil(n / (1024**2))
19 | if n >= 10 * 1024:
20 | return "%dK" % math.ceil(n / 1024)
21 | return "1K"
22 |
23 |
24 | class SlurmClusterConfig(JobQueueClusterConfig):
25 | """Dask cluster configuration options when running on SLURM"""
26 |
27 | partition = Unicode("", help="The partition to submit jobs to.", config=True)
28 |
29 | qos = Unicode("", help="QOS string associated with each job.", config=True)
30 |
31 | account = Unicode("", help="Account string associated with each job.", config=True)
32 |
33 |
34 | class SlurmBackend(JobQueueBackend):
35 | """A backend for deploying Dask on a Slurm cluster."""
36 |
37 | cluster_config_class = Type(
38 | "dask_gateway_server.backends.jobqueue.slurm.SlurmClusterConfig",
39 | klass="dask_gateway_server.backends.base.ClusterConfig",
40 | help="The cluster config class to use",
41 | config=True,
42 | )
43 |
44 | @default("submit_command")
45 | def _default_submit_command(self):
46 | return shutil.which("sbatch") or "sbatch"
47 |
48 | @default("cancel_command")
49 | def _default_cancel_command(self):
50 | return shutil.which("scancel") or "scancel"
51 |
52 | @default("status_command")
53 | def _default_status_command(self):
54 | return shutil.which("squeue") or "squeue"
55 |
56 | def get_submit_cmd_env_stdin(self, cluster, worker=None):
57 | cmd = [self.submit_command, "--parsable"]
58 | cmd.append("--job-name=dask-gateway")
59 | if cluster.config.partition:
60 | cmd.append("--partition=" + cluster.config.partition)
61 | if cluster.config.account:
62 | cmd.append("--account=" + cluster.config.account)
63 | if cluster.config.qos:
64 | cmd.append("--qos=" + cluster.config.qos)
65 |
66 | if worker:
67 | cpus = cluster.config.worker_cores
68 | mem = slurm_format_memory(cluster.config.worker_memory)
69 | log_file = "dask-worker-%s.log" % worker.name
70 | script = "\n".join(
71 | [
72 | "#!/bin/sh",
73 | cluster.config.worker_setup,
74 | " ".join(self.get_worker_command(cluster, worker.name)),
75 | ]
76 | )
77 | env = self.get_worker_env(cluster)
78 | else:
79 | cpus = cluster.config.scheduler_cores
80 | mem = slurm_format_memory(cluster.config.scheduler_memory)
81 | log_file = "dask-scheduler-%s.log" % cluster.name
82 | script = "\n".join(
83 | [
84 | "#!/bin/sh",
85 | cluster.config.scheduler_setup,
86 | " ".join(self.get_scheduler_command(cluster)),
87 | ]
88 | )
89 | env = self.get_scheduler_env(cluster)
90 |
91 | staging_dir = self.get_staging_directory(cluster)
92 |
93 | cmd.extend(
94 | [
95 | "--chdir=" + staging_dir,
96 | "--output=" + os.path.join(staging_dir, log_file),
97 | "--cpus-per-task=%d" % cpus,
98 | "--mem=%s" % mem,
99 | "--export=%s" % (",".join(sorted(env))),
100 | ]
101 | )
102 |
103 | return cmd, env, script
104 |
105 | def get_stop_cmd_env(self, job_id):
106 | return [self.cancel_command, job_id], {}
107 |
108 | def get_status_cmd_env(self, job_ids):
109 | cmd = [self.status_command, "-h", "--job=%s" % ",".join(job_ids), "-o", "%i %t"]
110 | return cmd, {}
111 |
112 | def parse_job_states(self, stdout):
113 | states = {}
114 | for l in stdout.splitlines():
115 | job_id, state = l.split()
116 | states[job_id] = state in ("R", "CG", "PD", "CF")
117 | return states
118 |
119 | def parse_job_id(self, stdout):
120 | return stdout.strip()
121 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/backends/kubernetes/__init__.py:
--------------------------------------------------------------------------------
1 | from .backend import KubeBackend, KubeClusterConfig
2 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/proxy/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import Proxy
2 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/tls.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timedelta, timezone
2 |
3 | from cryptography import x509
4 | from cryptography.hazmat.backends import default_backend
5 | from cryptography.hazmat.primitives import hashes, serialization
6 | from cryptography.hazmat.primitives.asymmetric import rsa
7 | from cryptography.x509.oid import NameOID
8 |
9 |
10 | def new_keypair(sni):
11 | """Create a new self-signed certificate & key pair with the given SNI.
12 |
13 | Parameters
14 | ----------
15 | sni : str
16 | The SNI name to use.
17 |
18 | Returns
19 | -------
20 | cert_bytes : bytes
21 | key_bytes : bytes
22 | """
23 | key = rsa.generate_private_key(
24 | public_exponent=65537, key_size=2048, backend=default_backend()
25 | )
26 | key_bytes = key.private_bytes(
27 | encoding=serialization.Encoding.PEM,
28 | format=serialization.PrivateFormat.PKCS8,
29 | encryption_algorithm=serialization.NoEncryption(),
30 | )
31 |
32 | dask_internal = x509.Name(
33 | [x509.NameAttribute(NameOID.COMMON_NAME, "dask-internal")]
34 | )
35 | altnames = x509.SubjectAlternativeName(
36 | [
37 | x509.DNSName(sni),
38 | x509.DNSName("dask-internal"),
39 | # allow skein appmaster and dask to share credentials
40 | x509.DNSName("skein-internal"),
41 | ]
42 | )
43 | now = datetime.now(timezone.utc)
44 | cert = (
45 | x509.CertificateBuilder()
46 | .subject_name(dask_internal)
47 | .issuer_name(dask_internal)
48 | .add_extension(altnames, critical=False)
49 | .public_key(key.public_key())
50 | .serial_number(x509.random_serial_number())
51 | .not_valid_before(now)
52 | .not_valid_after(now + timedelta(days=365))
53 | .sign(key, hashes.SHA256(), default_backend())
54 | )
55 |
56 | cert_bytes = cert.public_bytes(serialization.Encoding.PEM)
57 |
58 | return cert_bytes, key_bytes
59 |
--------------------------------------------------------------------------------
/dask-gateway-server/dask_gateway_server/traitlets.py:
--------------------------------------------------------------------------------
1 | from traitlets import Integer, List, TraitError, TraitType
2 | from traitlets import Type as _Type
3 | from traitlets import Unicode
4 | from traitlets.config import Application
5 |
6 | # We replace the class of the default formatter used via a configuration change.
7 | #
8 | # References:
9 | #
10 | # - Traitlets' Application.logging_config defaults:
11 | # https://github.com/ipython/traitlets/blob/e2c731ef72dd41d4be527d4d93dd87ccc409830d/traitlets/config/application.py#L229-L256
12 | # - Python official schema for Application.logging_config:
13 | # https://docs.python.org/3/library/logging.config.html#logging-config-dictschema
14 | #
15 | Application.logging_config = {
16 | "formatters": {
17 | "console": {
18 | "class": "dask_gateway_server.utils.LogFormatter",
19 | },
20 | },
21 | }
22 | Application.log_level.default_value = "INFO"
23 | Application.log_format.default_value = (
24 | "%(log_color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d "
25 | "%(name)s]%(reset)s %(message)s"
26 | )
27 |
28 |
29 | # Adapted from JupyterHub
30 | class MemoryLimit(Integer):
31 | """A specification of a memory limit, with optional units.
32 |
33 | Supported units are:
34 | - K -> Kibibytes
35 | - M -> Mebibytes
36 | - G -> Gibibytes
37 | - T -> Tebibytes
38 | """
39 |
40 | UNIT_SUFFIXES = {"K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
41 |
42 | def validate(self, obj, value):
43 | if isinstance(value, (int, float)):
44 | return int(value)
45 |
46 | try:
47 | num = float(value[:-1])
48 | except ValueError:
49 | raise TraitError(
50 | "{val} is not a valid memory specification. Must be an int or "
51 | "a string with suffix K, M, G, T".format(val=value)
52 | )
53 | suffix = value[-1]
54 |
55 | if suffix not in self.UNIT_SUFFIXES:
56 | raise TraitError(
57 | "{val} is not a valid memory specification. Must be an int or "
58 | "a string with suffix K, M, G, T".format(val=value)
59 | )
60 | return int(float(num) * self.UNIT_SUFFIXES[suffix])
61 |
62 |
63 | class Callable(TraitType):
64 | """A trait which is callable"""
65 |
66 | info_text = "a callable"
67 |
68 | def validate(self, obj, value):
69 | if callable(value):
70 | return value
71 | else:
72 | self.error(obj, value)
73 |
74 |
75 | class Type(_Type):
76 | """An implementation of `Type` with better errors"""
77 |
78 | def validate(self, obj, value):
79 | if isinstance(value, str):
80 | try:
81 | value = self._resolve_string(value)
82 | except ImportError as exc:
83 | raise TraitError(
84 | "Failed to import %r for trait '%s.%s':\n\n%s"
85 | % (value, type(obj).__name__, self.name, exc)
86 | )
87 | return super().validate(obj, value)
88 |
89 |
90 | class Command(List):
91 | """Traitlet for a command that should be a list of strings,
92 | but allows it to be specified as a single string.
93 | """
94 |
95 | def __init__(self, default_value=None, **kwargs):
96 | kwargs.setdefault("minlen", 1)
97 | if isinstance(default_value, str):
98 | default_value = [default_value]
99 | super().__init__(Unicode(), default_value, **kwargs)
100 |
101 | def validate(self, obj, value):
102 | if isinstance(value, str):
103 | value = [value]
104 | return super().validate(obj, value)
105 |
--------------------------------------------------------------------------------
/dask-gateway-server/pyproject.toml:
--------------------------------------------------------------------------------
1 | # build-system
2 | # - ref: https://peps.python.org/pep-0517/
3 | #
4 | [build-system]
5 | requires = ["hatchling"]
6 | build-backend = "hatchling.build"
7 |
8 | # project
9 | # - ref 1: https://peps.python.org/pep-0621/
10 | # - ref 2: https://hatch.pypa.io/latest/config/metadata/#project-metadata
11 | #
12 | [project]
13 | name = "dask-gateway-server"
14 | version = "2025.4.1-0.dev"
15 | description = "A multi-tenant server for securely deploying and managing multiple Dask clusters."
16 | readme = "README.rst"
17 | requires-python = ">=3.10"
18 | license = {file = "LICENSE"}
19 | keywords = ["dask", "hadoop", "kubernetes", "HPC", "distributed", "cluster"]
20 | authors = [
21 | {name = "Jim Crist-Harif", email = "jcristharif@gmail.com"},
22 | ]
23 | classifiers = [
24 | "Development Status :: 5 - Production/Stable",
25 | "License :: OSI Approved :: BSD License",
26 | "Intended Audience :: Developers",
27 | "Intended Audience :: Science/Research",
28 | "Intended Audience :: System Administrators",
29 | "Topic :: Scientific/Engineering",
30 | "Topic :: System :: Distributed Computing",
31 | "Topic :: System :: Systems Administration",
32 | "Programming Language :: Python :: 3",
33 | ]
34 | dependencies = [
35 | "aiohttp>=3.9.0",
36 | "async-timeout ; python_version < '3.11'",
37 | "colorlog",
38 | "cryptography",
39 | "traitlets>=5.2.2.post1",
40 | ]
41 |
42 | [project.optional-dependencies]
43 | kerberos = [
44 | # pykerberos is tricky to install and requires a system package to
45 | # successfully compile some C code, on ubuntu this is libkrb5-dev.
46 | "pykerberos",
47 | ]
48 | jobqueue = ["sqlalchemy>=2.0.0"]
49 | local = ["sqlalchemy>=2.0.0"]
50 | yarn = [
51 | "sqlalchemy>=2.0.0",
52 | "skein>=0.7.3",
53 | # FIXME: protobuf is a dependency for skein, and is being held back here for
54 | # now due to a error description reported in
55 | # https://github.com/jcrist/skein/issues/255
56 | #
57 | "protobuf<3.21",
58 | ]
59 | kubernetes = ["kubernetes_asyncio"]
60 | all_backends = [
61 | "sqlalchemy>=2.0.0",
62 | "skein>=0.7.3",
63 | "protobuf<3.21",
64 | "kubernetes_asyncio",
65 | ]
66 |
67 | [project.urls]
68 | Documentation = "https://gateway.dask.org/"
69 | Source = "https://github.com/dask/dask-gateway/"
70 | Issues = "https://github.com/dask/dask-gateway/issues"
71 |
72 | [project.scripts]
73 | dask-gateway-server = "dask_gateway_server.app:main"
74 | dask-gateway-jobqueue-launcher = "dask_gateway_server.backends.jobqueue.launcher:main"
75 |
76 |
77 | # Refs:
78 | # - https://hatch.pypa.io/latest/plugins/build-hook/custom/#pyprojecttoml
79 | # - https://hatch.pypa.io/1.3/plugins/build-hook/reference/#hatchling.builders.hooks.plugin.interface.BuildHookInterface.clean
80 | # - https://github.com/ofek/hatch-mypyc/blob/master/hatch_mypyc/plugin.py
81 | #
82 | [tool.hatch.build.hooks.custom]
83 | path = "hatch_build.py"
84 |
85 | [tool.hatch.build]
86 | include = [
87 | "**/*.py",
88 | "dask-gateway-proxy/**",
89 | ]
90 |
--------------------------------------------------------------------------------
/dask-gateway/.dockerignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | *.pyc
4 | .eggs/
5 | *.egg-info
6 | .cache/
7 | .pytest_cache/
8 |
--------------------------------------------------------------------------------
/dask-gateway/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 |
--------------------------------------------------------------------------------
/dask-gateway/Dockerfile:
--------------------------------------------------------------------------------
1 | # PURPOSE:
2 | #
3 | # This Dockerfile and image, ghcr.io/dask/dask-gateway, is used by the
4 | # dask-gateway Helm chart. It acts as the sample image for scheduler and workers
5 | # in Dask Clusters created by end users.
6 | #
7 | # The admin installing the dask-gateway Helm chart or its end users are meant to
8 | # specify an image for the scheduler and worker pods to use that meets their
9 | # needs for the Dask clusters they startup. Please build your own according to
10 | # the documentation if this very limited image doesn't meet your needs.
11 | #
12 | # See https://gateway.dask.org/install-kube.html#using-a-custom-image.
13 | #
14 |
15 |
16 | # The build stage
17 | # ---------------
18 | # This stage is building Python wheels for use in later stages by using a base
19 | # image that has more pre-requisites to do so, such as a C++ compiler.
20 | #
21 | # psutils, a dependency of distributed, is currently the sole reason we have to
22 | # have this build stage.
23 | #
24 | FROM python:3.13-bullseye as build-stage
25 |
26 | # Build wheels
27 | #
28 | # We set pip's cache directory and expose it across build stages via an
29 | # ephemeral docker cache (--mount=type=cache,target=${PIP_CACHE_DIR}).
30 | #
31 | COPY . /opt/dask-gateway
32 | ARG PIP_CACHE_DIR=/tmp/pip-cache
33 | RUN --mount=type=cache,target=${PIP_CACHE_DIR} \
34 | pip install build \
35 | && pip wheel \
36 | --wheel-dir=/tmp/wheels \
37 | -r /opt/dask-gateway/Dockerfile.requirements.txt
38 |
39 |
40 | # The final stage
41 | # ---------------
42 | #
43 | FROM python:3.13-slim-bullseye as slim-stage
44 |
45 | # Set labels based on the Open Containers Initiative (OCI):
46 | # https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys
47 | #
48 | LABEL org.opencontainers.image.source="https://github.com/dask/dask-gateway"
49 | LABEL org.opencontainers.image.url="https://github.com/dask/dask-gateway/blob/HEAD/dask-gateway/Dockerfile"
50 |
51 | # Install tini and update linux packages to patch known vulnerabilities.
52 | RUN apt-get update \
53 | && apt-get upgrade -y \
54 | && apt-get install -y \
55 | tini \
56 | && rm -rf /var/lib/apt/lists/*
57 |
58 | # Create a non-root user to run as
59 | RUN useradd --create-home --user-group --uid 1000 dask
60 | USER dask:dask
61 | ENV PATH=/home/dask/.local/bin:$PATH
62 | WORKDIR /home/dask/
63 |
64 | # Install dask-gateway
65 | COPY --chown=dask:dask . /opt/dask-gateway
66 | ARG PIP_CACHE_DIR=/tmp/pip-cache
67 | RUN --mount=type=cache,target=${PIP_CACHE_DIR} \
68 | --mount=type=cache,from=build-stage,source=/tmp/wheels,target=/tmp/wheels \
69 | pip install \
70 | --find-links=/tmp/wheels/ \
71 | -r /opt/dask-gateway/Dockerfile.requirements.txt
72 |
73 | # Only set ENTRYPOINT, CMD is configured at runtime by dask-gateway-server
74 | ENTRYPOINT ["tini", "-g", "--"]
75 |
--------------------------------------------------------------------------------
/dask-gateway/Dockerfile.requirements.in:
--------------------------------------------------------------------------------
1 | # These are the requirements we know we want to install in the Dockerfile, and
2 | # then we freeze them ahead of time to provide a clear description of the
3 | # dependencies we have installed.
4 | #
5 |
6 | # Install dask-gateway, which is the only thing needed for our CI test suite.
7 | .
8 |
9 | # We also install the bare minimum to provide end users with a primitive
10 | # end-to-end demonstrative test doing work in the worker pods and accessing the
11 | # scheduler dashboard without changing the image.
12 | #
13 | # - bokeh is required by the scheduler pod to present dashbaords.
14 | # - numpy is required for running a basic computation test:
15 | # https://gateway.dask.org/usage.html#run-computations-on-the-cluster
16 | #
17 | bokeh
18 | numpy
19 |
--------------------------------------------------------------------------------
/dask-gateway/Dockerfile.requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile with Python 3.13
3 | # by the following command:
4 | #
5 | # Use "Run workflow" button at https://github.com/dask/dask-gateway/actions/workflows/refreeze-dockerfile-requirements-txt.yaml
6 | #
7 | aiohappyeyeballs==2.6.1
8 | # via aiohttp
9 | aiohttp==3.11.16
10 | # via dask-gateway
11 | aiosignal==1.3.2
12 | # via aiohttp
13 | attrs==25.3.0
14 | # via aiohttp
15 | bokeh==3.7.2
16 | # via -r Dockerfile.requirements.in
17 | click==8.1.8
18 | # via
19 | # dask
20 | # dask-gateway
21 | # distributed
22 | cloudpickle==3.1.1
23 | # via
24 | # dask
25 | # distributed
26 | contourpy==1.3.1
27 | # via bokeh
28 | dask==2025.3.0
29 | # via
30 | # dask-gateway
31 | # distributed
32 | dask-gateway @ file:///opt/dask-gateway
33 | # via -r Dockerfile.requirements.in
34 | distributed==2025.3.0
35 | # via dask-gateway
36 | frozenlist==1.5.0
37 | # via
38 | # aiohttp
39 | # aiosignal
40 | fsspec==2025.3.2
41 | # via dask
42 | idna==3.10
43 | # via yarl
44 | jinja2==3.1.6
45 | # via
46 | # bokeh
47 | # distributed
48 | locket==1.0.0
49 | # via
50 | # distributed
51 | # partd
52 | markupsafe==3.0.2
53 | # via jinja2
54 | msgpack==1.1.0
55 | # via distributed
56 | multidict==6.4.2
57 | # via
58 | # aiohttp
59 | # yarl
60 | narwhals==1.34.1
61 | # via bokeh
62 | numpy==2.2.4
63 | # via
64 | # -r Dockerfile.requirements.in
65 | # bokeh
66 | # contourpy
67 | # pandas
68 | packaging==24.2
69 | # via
70 | # bokeh
71 | # dask
72 | # distributed
73 | pandas==2.2.3
74 | # via bokeh
75 | partd==1.4.2
76 | # via dask
77 | pillow==11.1.0
78 | # via bokeh
79 | propcache==0.3.1
80 | # via
81 | # aiohttp
82 | # yarl
83 | psutil==7.0.0
84 | # via distributed
85 | python-dateutil==2.9.0.post0
86 | # via pandas
87 | pytz==2025.2
88 | # via pandas
89 | pyyaml==6.0.2
90 | # via
91 | # bokeh
92 | # dask
93 | # dask-gateway
94 | # distributed
95 | six==1.17.0
96 | # via python-dateutil
97 | sortedcontainers==2.4.0
98 | # via distributed
99 | tblib==3.1.0
100 | # via distributed
101 | toolz==1.0.0
102 | # via
103 | # dask
104 | # distributed
105 | # partd
106 | tornado==6.4.2
107 | # via
108 | # bokeh
109 | # dask-gateway
110 | # distributed
111 | tzdata==2025.2
112 | # via pandas
113 | urllib3==2.3.0
114 | # via distributed
115 | xyzservices==2025.1.0
116 | # via bokeh
117 | yarl==1.19.0
118 | # via aiohttp
119 | zict==3.0.0
120 | # via distributed
121 |
--------------------------------------------------------------------------------
/dask-gateway/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019, Jim Crist-Harif
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/dask-gateway/README.rst:
--------------------------------------------------------------------------------
1 | dask-gateway
2 | ============
3 |
4 | A client library for interacting with a dask-gateway server. See `the
5 | documentation `__ for more information.
6 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/__init__.py:
--------------------------------------------------------------------------------
1 | # Load configuration
2 | from . import config
3 | from ._version import __version__
4 | from .auth import BasicAuth, JupyterHubAuth, KerberosAuth
5 | from .client import (
6 | Gateway,
7 | GatewayCluster,
8 | GatewayClusterError,
9 | GatewayServerError,
10 | GatewayWarning,
11 | )
12 | from .options import Options
13 |
14 | del config
15 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "2025.4.1-0.dev"
2 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/comm.py:
--------------------------------------------------------------------------------
1 | import ssl
2 | from concurrent.futures import ThreadPoolExecutor
3 | from urllib.parse import urlparse
4 |
5 | from distributed.comm.core import Connector
6 | from distributed.comm.registry import Backend, backends
7 | from distributed.comm.tcp import (
8 | MAX_BUFFER_SIZE,
9 | TLS,
10 | convert_stream_closed_error,
11 | get_stream_address,
12 | )
13 | from distributed.utils import ensure_ip, get_ip
14 | from tornado import netutil
15 | from tornado.iostream import StreamClosedError
16 | from tornado.tcpclient import TCPClient
17 |
18 |
19 | def parse_gateway_address(address):
20 | if not address.startswith("gateway://"):
21 | address = "gateway://" + address
22 | parsed = urlparse(address)
23 | if not parsed.path:
24 | raise ValueError("Gateway address %r missing path component" % address)
25 | path = parsed.path.strip("/")
26 | return parsed.hostname, parsed.port, path
27 |
28 |
29 | class GatewayConnector(Connector):
30 | _executor = ThreadPoolExecutor(2)
31 | _resolver = netutil.ExecutorResolver(close_executor=False, executor=_executor)
32 | client = TCPClient(resolver=_resolver)
33 |
34 | async def connect(self, address, deserialize=True, **connection_args):
35 | ip, port, path = parse_gateway_address(address)
36 | sni = "daskgateway-" + path
37 | ctx = connection_args.get("ssl_context")
38 | if not isinstance(ctx, ssl.SSLContext):
39 | raise TypeError(
40 | "Gateway expects a `ssl_context` argument of type "
41 | "ssl.SSLContext, instead got %s" % ctx
42 | )
43 |
44 | try:
45 | plain_stream = await self.client.connect(
46 | ip, port, max_buffer_size=MAX_BUFFER_SIZE
47 | )
48 | stream = await plain_stream.start_tls(
49 | False, ssl_options=ctx, server_hostname=sni
50 | )
51 | if stream.closed() and stream.error:
52 | raise StreamClosedError(stream.error)
53 |
54 | except StreamClosedError as e:
55 | # The socket connect() call failed
56 | convert_stream_closed_error(self, e)
57 |
58 | local_address = "tls://" + get_stream_address(stream)
59 | peer_address = "gateway://" + address
60 | return TLS(stream, local_address, peer_address, deserialize)
61 |
62 |
63 | class GatewayBackend(Backend):
64 | # I/O
65 | def get_connector(self):
66 | return GatewayConnector()
67 |
68 | def get_listener(self, *args, **kwargs):
69 | raise NotImplementedError("Listening on a gateway connection")
70 |
71 | # Address handling
72 | def get_address_host(self, loc):
73 | return parse_gateway_address(loc)[0]
74 |
75 | def get_address_host_port(self, loc):
76 | return parse_gateway_address(loc)[:2]
77 |
78 | def resolve_address(self, loc):
79 | host, port, path = parse_gateway_address(loc)
80 | host = ensure_ip(host)
81 | return "%s:%d/%s" % (host, port, path)
82 |
83 | def get_local_address_for(self, loc):
84 | host, port, path = parse_gateway_address(loc)
85 | host = ensure_ip(host)
86 | host = get_ip(host)
87 | return "%s:%d/%s" % (host, port, path)
88 |
89 |
90 | backends["gateway"] = GatewayBackend()
91 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import dask
4 | import yaml
5 |
6 | fn = os.path.join(os.path.dirname(__file__), "gateway.yaml")
7 | dask.config.ensure_file(source=fn)
8 |
9 | with open(fn) as f:
10 | defaults = yaml.safe_load(f)
11 |
12 | dask.config.update_defaults(defaults)
13 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/gateway.yaml:
--------------------------------------------------------------------------------
1 | gateway:
2 | address: null # The full address to the dask-gateway server.
3 | # May also be a template string, which will be formatted
4 | # with any environment variables before usage.
5 |
6 | public-address: null # The address to the dask-gateway server, as accessible
7 | # from a web browser. This will be used as the root of
8 | # all browser-facing links (e.g. the dask dashboard).
9 | # If `None` (default), `gateway.address` will be used.
10 | # May be a template string.
11 |
12 | proxy-address: null # The full address or port to the dask-gateway
13 | # scheduler proxy. If a port, the host/ip is taken from
14 | # ``address``. If null, defaults to `address`.
15 | # May also be a template string.
16 |
17 | auth:
18 | type: basic # The authentication type to use. Options are basic,
19 | # kerberos, jupyterhub, or a full class path to a
20 | # custom class.
21 |
22 | kwargs: {} # Keyword arguments to use when instantiating the
23 | # authentication class above. Values may be template
24 | # strings.
25 |
26 | http-client:
27 | proxy: true # The http proxy configuration to use when contacting
28 | # the dask-gateway server. If `true` (default), this is
29 | # inferred from your environment (i.e. `HTTP(S)_PROXY`
30 | # environment variables). Set to `false` to disable
31 | # this inference. You may also specify a proxy address
32 | # explicitly (e.g. `http://user:password@host:port`).
33 | # May be a template string.
34 |
35 | cluster:
36 | options: {} # Default options to use when calling ``new_cluster`` or
37 | # ``cluster_options``. Values may be template strings.
38 |
--------------------------------------------------------------------------------
/dask-gateway/dask_gateway/utils.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import os
3 |
4 |
5 | def format_template(x):
6 | if isinstance(x, str):
7 | return x.format(**os.environ)
8 | return x
9 |
10 |
11 | async def cancel_task(task):
12 | task.cancel()
13 | try:
14 | await task
15 | except asyncio.CancelledError:
16 | pass
17 |
--------------------------------------------------------------------------------
/dask-gateway/pyproject.toml:
--------------------------------------------------------------------------------
1 | # build-system
2 | # - ref: https://peps.python.org/pep-0517/
3 | #
4 | [build-system]
5 | requires = ["hatchling"]
6 | build-backend = "hatchling.build"
7 |
8 | # project
9 | # - ref 1: https://peps.python.org/pep-0621/
10 | # - ref 2: https://hatch.pypa.io/latest/config/metadata/#project-metadata
11 | #
12 | [project]
13 | name = "dask-gateway"
14 | version = "2025.4.1-0.dev"
15 | description = "A client library for interacting with a dask-gateway server"
16 | readme = "README.rst"
17 | requires-python = ">=3.10"
18 | license = {file = "LICENSE"}
19 | keywords = ["dask", "hadoop", "kubernetes", "HPC", "distributed", "cluster"]
20 | authors = [
21 | {name = "Jim Crist-Harif", email = "jcristharif@gmail.com"},
22 | ]
23 |
24 | classifiers = [
25 | "Development Status :: 5 - Production/Stable",
26 | "License :: OSI Approved :: BSD License",
27 | "Intended Audience :: Developers",
28 | "Intended Audience :: Science/Research",
29 | "Topic :: Scientific/Engineering",
30 | "Topic :: System :: Distributed Computing",
31 | "Programming Language :: Python :: 3",
32 | ]
33 | dependencies = [
34 | "aiohttp",
35 | "click>=8.1.3",
36 | "dask>=2022.4.0",
37 | "distributed>=2022.4.0",
38 | "pyyaml",
39 | "tornado",
40 | ]
41 |
42 | [project.optional-dependencies]
43 | kerberos = [
44 | 'pykerberos;platform_system!="Windows"',
45 | 'winkerberos;platform_system=="Windows"',
46 | ]
47 |
48 | [project.urls]
49 | Documentation = "https://gateway.dask.org/"
50 | Source = "https://github.com/dask/dask-gateway/"
51 | Issues = "https://github.com/dask/dask-gateway/issues"
52 |
53 | [tool.hatch.build]
54 | include = [
55 | "**/*.py",
56 | "**/*.yaml",
57 | ]
58 |
--------------------------------------------------------------------------------
/dev-environment.yaml:
--------------------------------------------------------------------------------
1 | # A conda environment file to help setup dependencies to build and test
2 | # dask-gateway locally.
3 | #
4 | # Install:
5 | #
6 | # export DASK_GATEWAY_SERVER__NO_PROXY=true
7 | #
8 | # conda env create -f dev-environment.yaml
9 | # conda activate dask-gateway-dev
10 | #
11 | # unset DASK_GATEWAY_SERVER__NO_PROXY
12 | #
13 | # Cleanup:
14 | #
15 | # conda deactivate
16 | # conda env remove -n dask-gateway-dev
17 | #
18 | name: dask-gateway-dev
19 | channels:
20 | - conda-forge
21 | dependencies:
22 | - python=3.13
23 | - pip
24 |
25 | # Golang with compiler is required to compile dask-gateway-server's bundled
26 | # dask-gateway-proxy executable binary.
27 | - go
28 | - go-cgo
29 |
30 | - pip:
31 | - pre-commit
32 | - --requirement=docs/requirements.txt
33 | - --requirement=tests/requirements.txt
34 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation generated by sphinx-quickstart
2 | # ----------------------------------------------------------------------------
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
21 |
22 |
23 | # Manually added commands
24 | # ----------------------------------------------------------------------------
25 |
26 | # For local development:
27 | # - builds and rebuilds html on changes to source
28 | # - starts a livereload enabled webserver and opens up a browser
29 | devenv:
30 | sphinx-autobuild -b html --open-browser "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS)
31 |
32 | # For local development and CI:
33 | # - verifies that links are valid
34 | linkcheck:
35 | $(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)/linkcheck" $(SPHINXOPTS)
36 | @echo
37 | @echo "Link check complete; look for any errors in the above output " \
38 | "or in $(BUILDDIR)/linkcheck/output.txt."
39 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file describes the requirements to build the documentation, which you can
2 | # do by the following commands:
3 | #
4 | # pip install -r docs/requirements.txt
5 | #
6 | # cd docs
7 | # make html
8 | #
9 | dask-sphinx-theme>=3.0.5
10 | myst-parser
11 |
12 | # FIXME: This workaround is required until we have sphinx>=5, as enabled by
13 | # dask-sphinx-theme no longer pinning sphinx-book-theme==0.2.0. This is
14 | # tracked in https://github.com/dask/dask-sphinx-theme/issues/68.
15 | #
16 | sphinxcontrib-applehelp<1.0.5
17 | sphinxcontrib-devhelp<1.0.6
18 | sphinxcontrib-htmlhelp<2.0.5
19 | sphinxcontrib-serializinghtml<1.1.10
20 | sphinxcontrib-qthelp<1.0.7
21 |
22 | # sphinx-autobuild enables the "make devenv" command defined in the Makefile to
23 | # automatically rebuild the documentation on changes and update live-reload a
24 | # browser.
25 | sphinx-autobuild
26 |
27 | # autodoc-traits will inspect the dask-gateway and dask-gateway-server's Python
28 | # code to generate reference documentation. It will omit files if ImportErrors
29 | # are thrown so we install these packages with all dependencies to avoid that.
30 | #
31 | # Note that we don't have to install pykerberos for autodoc-traits inspection of
32 | # dask-gateway. This is because .py files can be loaded without errors as
33 | # "import kerberos" statements only show up within functions.
34 | #
35 | autodoc-traits
36 | --editable="./dask-gateway"
37 | --editable="./dask-gateway-server[all_backends]"
38 |
--------------------------------------------------------------------------------
/docs/source/_images/adapt-widget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/adapt-widget.png
--------------------------------------------------------------------------------
/docs/source/_images/options-widget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/options-widget.png
--------------------------------------------------------------------------------
/docs/source/_images/scale-widget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/docs/source/_images/scale-widget.png
--------------------------------------------------------------------------------
/docs/source/api-client.rst:
--------------------------------------------------------------------------------
1 | Client API
2 | ==========
3 |
4 | .. currentmodule:: dask_gateway
5 |
6 |
7 | Gateway
8 | -------
9 |
10 | .. autoclass:: Gateway
11 | :members:
12 |
13 |
14 | GatewayCluster
15 | --------------
16 |
17 | .. autoclass:: GatewayCluster
18 | :members:
19 |
20 |
21 | Options
22 | -------
23 |
24 | .. autoclass:: dask_gateway.options.Options
25 |
26 |
27 | Authentication
28 | --------------
29 |
30 | .. autoclass:: dask_gateway.auth.GatewayAuth
31 |
32 | .. autoclass:: dask_gateway.auth.BasicAuth
33 |
34 | .. autoclass:: dask_gateway.auth.KerberosAuth
35 |
36 | .. autoclass:: dask_gateway.auth.JupyterHubAuth
37 |
38 |
39 | Exceptions
40 | ----------
41 |
42 | .. autoclass:: dask_gateway.GatewayClusterError
43 |
44 | .. autoclass:: dask_gateway.GatewayServerError
45 |
--------------------------------------------------------------------------------
/docs/source/api-server.rst:
--------------------------------------------------------------------------------
1 | Configuration Reference
2 | =======================
3 |
4 | Gateway Server
5 | --------------
6 |
7 | .. autoconfigurable:: dask_gateway_server.app.DaskGateway
8 |
9 |
10 | Authentication
11 | --------------
12 |
13 | .. _kerberos-auth-config:
14 |
15 | KerberosAuthenticator
16 | ^^^^^^^^^^^^^^^^^^^^^
17 |
18 | .. autoconfigurable:: dask_gateway_server.auth.KerberosAuthenticator
19 |
20 |
21 | .. _jupyterhub-auth-config:
22 |
23 | JupyterHubAuthenticator
24 | ^^^^^^^^^^^^^^^^^^^^^^^
25 |
26 | .. autoconfigurable:: dask_gateway_server.auth.JupyterHubAuthenticator
27 |
28 |
29 | .. _simple-auth-config:
30 |
31 | SimpleAuthenticator
32 | ^^^^^^^^^^^^^^^^^^^
33 |
34 | .. autoconfigurable:: dask_gateway_server.auth.SimpleAuthenticator
35 |
36 |
37 | .. _cluster-backends-reference:
38 |
39 | Cluster Backends
40 | ----------------
41 |
42 | Base Class
43 | ^^^^^^^^^^
44 |
45 | .. _cluster-config:
46 |
47 | ClusterConfig
48 | ~~~~~~~~~~~~~
49 |
50 | .. autoconfigurable:: dask_gateway_server.backends.base.ClusterConfig
51 |
52 | Backend
53 | ~~~~~~~
54 |
55 | .. autoconfigurable:: dask_gateway_server.backends.base.Backend
56 |
57 |
58 | Local Processes
59 | ^^^^^^^^^^^^^^^
60 |
61 | LocalClusterConfig
62 | ~~~~~~~~~~~~~~~~~~
63 |
64 | .. autoconfigurable:: dask_gateway_server.backends.local.LocalClusterConfig
65 |
66 | LocalBackend
67 | ~~~~~~~~~~~~
68 |
69 | .. autoconfigurable:: dask_gateway_server.backends.local.LocalBackend
70 |
71 | UnsafeLocalBackend
72 | ~~~~~~~~~~~~~~~~~~
73 |
74 | .. autoconfigurable:: dask_gateway_server.backends.local.UnsafeLocalBackend
75 |
76 |
77 | YARN
78 | ^^^^
79 |
80 | .. _yarn-config:
81 |
82 | YarnClusterConfig
83 | ~~~~~~~~~~~~~~~~~
84 |
85 | .. autoconfigurable:: dask_gateway_server.backends.yarn.YarnClusterConfig
86 |
87 | YarnBackend
88 | ~~~~~~~~~~~
89 |
90 | .. autoconfigurable:: dask_gateway_server.backends.yarn.YarnBackend
91 |
92 |
93 | Kubernetes
94 | ^^^^^^^^^^
95 |
96 | .. _kube-cluster-config:
97 |
98 | KubeClusterConfig
99 | ~~~~~~~~~~~~~~~~~
100 |
101 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.KubeClusterConfig
102 |
103 | KubeBackend
104 | ~~~~~~~~~~~
105 |
106 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.KubeBackend
107 |
108 | KubeController
109 | ~~~~~~~~~~~~~~
110 |
111 | .. autoconfigurable:: dask_gateway_server.backends.kubernetes.controller.KubeController
112 |
113 |
114 | .. _jobqueue-config:
115 |
116 | Job Queues
117 | ^^^^^^^^^^
118 |
119 | PBSClusterConfig
120 | ~~~~~~~~~~~~~~~~
121 |
122 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.pbs.PBSClusterConfig
123 |
124 | PBSBackend
125 | ~~~~~~~~~~
126 |
127 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.pbs.PBSBackend
128 |
129 | SlurmClusterConfig
130 | ~~~~~~~~~~~~~~~~~~
131 |
132 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.slurm.SlurmClusterConfig
133 |
134 | SlurmBackend
135 | ~~~~~~~~~~~~
136 |
137 | .. autoconfigurable:: dask_gateway_server.backends.jobqueue.slurm.SlurmBackend
138 |
139 |
140 | Proxy
141 | -----
142 |
143 | Proxy
144 | ^^^^^
145 |
146 | .. autoconfigurable:: dask_gateway_server.proxy.Proxy
147 |
148 |
149 | Cluster Manager Options
150 | -----------------------
151 |
152 | .. autoclass:: dask_gateway_server.options.Options
153 |
154 | .. autoclass:: dask_gateway_server.options.Integer
155 |
156 | .. autoclass:: dask_gateway_server.options.Float
157 |
158 | .. autoclass:: dask_gateway_server.options.String
159 |
160 | .. autoclass:: dask_gateway_server.options.Bool
161 |
162 | .. autoclass:: dask_gateway_server.options.Select
163 |
164 | .. autoclass:: dask_gateway_server.options.Mapping
165 |
166 |
167 | Models
168 | ------
169 |
170 | User
171 | ^^^^
172 |
173 | .. autoclass:: dask_gateway_server.models.User
174 |
175 | Cluster
176 | ^^^^^^^
177 |
178 | .. autoclass:: dask_gateway_server.models.Cluster
179 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import dask_gateway_server
5 |
6 | # Project settings
7 | project = "Dask Gateway"
8 | copyright = "2021, Jim Crist-Harif"
9 | author = "Jim Crist-Harif"
10 | release = version = dask_gateway_server.__version__
11 |
12 | source_suffix = [".rst", ".md"]
13 | root_doc = master_doc = "index"
14 | language = None
15 | # Commenting this out for now, if we register dask pygments,
16 | # then eventually this line can be:
17 | # pygments_style = "dask"
18 | exclude_patterns = []
19 |
20 | # Sphinx Extensions
21 | docs = os.path.dirname(os.path.dirname(__file__))
22 | sys.path.insert(0, os.path.join(docs, "sphinxext"))
23 | extensions = [
24 | "autodoc_traits",
25 | "myst_parser",
26 | "sphinx.ext.autodoc",
27 | "sphinx.ext.autosummary",
28 | "sphinx.ext.extlinks",
29 | "sphinx.ext.napoleon",
30 | ]
31 |
32 | extlinks = {
33 | "issue": ("https://github.com/dask/dask-gateway/issues/%s", "Issue #"),
34 | "pr": ("https://github.com/dask/dask-gateway/pull/%s", "PR #"),
35 | }
36 |
37 | # Sphinx Theme
38 | html_theme = "dask_sphinx_theme"
39 | templates_path = ["_templates"]
40 |
41 | # -- Options for linkcheck builder -------------------------------------------
42 | # http://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-the-linkcheck-builder
43 | #
44 | linkcheck_ignore = [
45 | r"(.*)github\.com(.*)#", # javascript based anchors
46 | r"https://github.com/[^/]*$", # too many github usernames / searches in changelog
47 | "https://github.com/jupyterhub/oauthenticator/pull/", # too many PRs in changelog
48 | "https://github.com/jupyterhub/oauthenticator/compare/", # too many comparisons in changelog
49 | ]
50 | linkcheck_anchors_ignore = [
51 | "/#!",
52 | "/#%21",
53 | ]
54 |
--------------------------------------------------------------------------------
/docs/source/configuration-user.rst:
--------------------------------------------------------------------------------
1 | Configuration
2 | =============
3 |
4 | .. currentmodule:: dask_gateway
5 |
6 | Specifying all parameters to the :class:`Gateway` or :class:`GatewayCluster`
7 | constructors every time may be error prone, especially when sharing this
8 | workflow with new users. To simplify things you can provide defaults in a
9 | configuration file, traditionally held in ``~/.config/dask/gateway.yaml`` or
10 | ``/etc/dask/gateway.yaml``. Note that this configuration is *optional*, and
11 | only changes the defaults when not specified in the constructors. You only need
12 | to set the fields you care about, unset fields will fall back to the `default
13 | configuration`_.
14 |
15 | We recommend administrators create a configuration file to share with their
16 | users, specifying the addresses and authentication necessary to connect to
17 | their ``dask-gateway-server``. For example:
18 |
19 | **Example:**
20 |
21 | .. code-block:: yaml
22 |
23 | # ~/.config/dask/gateway.yaml
24 | gateway:
25 | # The full address to the dask-gateway server.
26 | address: http://146.148.58.187
27 |
28 | # The full address to the dask-gateway scheduler proxy
29 | proxy-address: tls://35.202.68.87:8786
30 |
31 | auth:
32 | # Use kerberos for authentication
33 | type: kerberos
34 |
35 |
36 | Users can now create :class:`Gateway` or :class:`GatewayCluster` objects
37 | without specifying any additional information.
38 |
39 | .. code-block:: python
40 |
41 | from dask_gateway import GatewayCluster
42 |
43 | cluster = GatewayCluster()
44 | cluster.scale(20)
45 |
46 | For more information on Dask configuration see the `Dask configuration
47 | documentation `_.
48 |
49 |
50 | Default Configuration
51 | ---------------------
52 |
53 | The default configuration file is as follows
54 |
55 | .. literalinclude:: ../../dask-gateway/dask_gateway/gateway.yaml
56 | :language: yaml
57 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Dask Gateway
2 | ============
3 |
4 | Dask Gateway provides a secure, multi-tenant server for managing Dask_
5 | clusters. It allows users to launch and use Dask clusters in a shared,
6 | centrally managed cluster environment, without requiring users to have direct
7 | access to the underlying cluster backend (e.g. Kubernetes, Hadoop/YARN, HPC Job
8 | queues, etc...).
9 |
10 | Dask Gateway is one of many options for deploying Dask clusters, see `Deploying Dask`_ in the Dask documentation for an overview of additional options.
11 |
12 |
13 | Highlights
14 | ----------
15 |
16 | - **Centrally Managed**: Administrators do the heavy lifting of configuring the
17 | Gateway, users simply connect to the Gateway to get a new cluster. Eases deployment,
18 | and allows enforcing consistent configuration across all users.
19 |
20 | - **Secure by Default**: Cluster communication is automatically encrypted with
21 | TLS. All operations are authenticated with a configurable protocol, allowing
22 | you to use what makes sense for your organization.
23 |
24 | - **Flexible**: The gateway is designed to support multiple backends, and runs
25 | equally well in the cloud as on-premise. Natively supports Kubernetes,
26 | Hadoop/YARN, and HPC Job Queueing systems.
27 |
28 | - **Robust to Failure**: The gateway can be restarted or experience failover
29 | without losing existing clusters. Allows for seamless upgrades and restarts
30 | without disrupting users.
31 |
32 |
33 | Architecture Overview
34 | ---------------------
35 |
36 | Dask Gateway is divided into three separate components:
37 |
38 | - Multiple active **Dask Clusters** (potentially more than one per user)
39 | - A **Proxy** for proxying both the connection between the user's client
40 | and their respective scheduler, and the Dask Web UI for each cluster
41 | - A central **Gateway** that manages authentication and cluster startup/shutdown
42 |
43 |
44 | .. image:: /_images/architecture.svg
45 | :width: 90 %
46 | :align: center
47 | :alt: Dask-Gateway high-level architecture
48 |
49 |
50 | The gateway is designed to be flexible and pluggable, and makes heavy use of
51 | traitlets_ (the same technology used by the Jupyter_ ecosystem). In particular,
52 | both the cluster backend and the authentication protocol are pluggable.
53 |
54 | **Cluster Backends**
55 |
56 | - Kubernetes_
57 | - `Hadoop/YARN`_
58 | - Job Queue Systems (PBS_, Slurm_, ...)
59 | - Local Processes
60 |
61 | **Authentication Methods**
62 |
63 | - `Kerberos `__
64 | - `JupyterHub service plugin `__
65 | - HTTP Basic
66 |
67 |
68 | .. toctree::
69 | :maxdepth: 1
70 | :hidden:
71 | :caption: For Users
72 |
73 | install-user
74 | usage
75 | configuration-user
76 |
77 | .. toctree::
78 | :maxdepth: 1
79 | :hidden:
80 | :caption: Admin - Installation
81 |
82 | install-local
83 | install-hadoop
84 | install-kube
85 | install-jobqueue
86 |
87 | .. toctree::
88 | :maxdepth: 1
89 | :hidden:
90 | :caption: Admin - Customization
91 |
92 | authentication
93 | security
94 | cluster-options
95 | resource-limits
96 |
97 | .. toctree::
98 | :maxdepth: 1
99 | :hidden:
100 | :caption: Reference
101 |
102 | api-client
103 | api-server
104 | develop
105 | changelog
106 |
107 |
108 | .. _Dask: https://dask.org/
109 | .. _traitlets: https://traitlets.readthedocs.io/en/stable/
110 | .. _Jupyter: https://jupyter.org/
111 | .. _Hadoop/YARN: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html
112 | .. _PBS: ttps://www.openpbs.org/
113 | .. _Slurm: https://slurm.schedmd.com/
114 | .. _Kubernetes: https://kubernetes.io/
115 | .. _Deploying Dask: https://docs.dask.org/en/stable/deploying.html
116 |
--------------------------------------------------------------------------------
/docs/source/install-local.rst:
--------------------------------------------------------------------------------
1 | Install Locally (Quickstart)
2 | ============================
3 |
4 | This page describes how to deploy and interact with a ``dask-gateway-server``
5 | locally. This can be useful for testing, demos, and development purposes, but
6 | is not a normal method of deployment.
7 |
8 | If you're a user connecting to an existing ``dask-gateway-server`` instance,
9 | you may want to start at :doc:`usage` instead.
10 |
11 | .. currentmodule:: dask_gateway
12 |
13 | Installation
14 | ------------
15 |
16 | Dask-Gateway can be installed with ``conda`` or ``pip``. It's composed of two packages:
17 |
18 | - ``dask-gateway-server``: the gateway server. Administrators usually install this once on a cluster.
19 | - ``dask-gateway``: the client library. Users only need this library to use a running Gateway.
20 |
21 | **Install with conda**
22 |
23 | .. code-block:: console
24 |
25 | $ conda install -c conda-forge dask-gateway dask-gateway-server-local
26 |
27 | **Install with pip**
28 |
29 | .. code-block:: console
30 |
31 | $ pip install dask-gateway dask-gateway-server[local]
32 |
33 |
34 | Start the gateway server
35 | ------------------------
36 |
37 | To start the Gateway server, run:
38 |
39 | .. code-block:: console
40 |
41 | $ dask-gateway-server
42 |
43 |
44 | This starts ``dask-gateway`` locally with the default configuration. This uses:
45 |
46 | - ``UnsafeLocalBackend`` to manage local clusters without any process isolation
47 | - ``SimpleAuthenticator`` to authenticate users using a simple and insecure authentication scheme
48 |
49 | *Both of these options are insecure and not-advised for any real-world
50 | deployments.* They are perfectly fine for testing and experimentation though.
51 |
52 |
53 | Connect to the gateway server
54 | -----------------------------
55 |
56 | To connect to the gateway, create a :class:`Gateway` client with the URL output
57 | above. By default this is ``http://127.0.0.1:8000``.
58 |
59 | .. code-block:: python
60 |
61 | >>> from dask_gateway import Gateway
62 | >>> gateway = Gateway("http://127.0.0.1:8000")
63 | >>> gateway
64 | Gateway
65 |
66 | To check that everything is setup properly, query the gateway server to see any
67 | existing clusters (should be an empty list).
68 |
69 | .. code-block:: python
70 |
71 | >>> gateway.list_clusters()
72 | []
73 |
74 |
75 | Interact with the gateway server
76 | --------------------------------
77 |
78 | At this point you can use the :class:`Gateway` client to interact with the
79 | gateway server. You can use the client to create new clusters and interact with
80 | existing clusters. We direct you to the :doc:`usage` documentation for more
81 | information, starting from the :ref:`usage-create-new-cluster` section.
82 |
83 |
84 | Shutdown the gateway server
85 | ---------------------------
86 |
87 | When you're done with local usage, you'll want to shutdown the Dask-Gateway
88 | server. To do this, ``Ctrl-C`` in the same terminal you started the process in.
89 | Note that any active clusters will also be shutdown.
90 |
--------------------------------------------------------------------------------
/docs/source/install-user.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | Dask-Gateway is composed of two packages:
5 |
6 | - ``dask-gateway``: the client library, installed by *users*.
7 | - ``dask-gateway-server``: the gateway server, installed by *administrators*.
8 |
9 | Dask-Gateway *users* only need the ``dask-gateway`` client package to interact
10 | with the server. It can be installed with ``conda`` or ``pip``.
11 |
12 | **Install with conda**
13 |
14 | .. code-block:: console
15 |
16 | $ conda install -c conda-forge dask-gateway
17 |
18 | **Install with pip**
19 |
20 | .. code-block:: console
21 |
22 | $ pip install dask-gateway
23 |
24 | The version of the client library should match that of ``dask-gateway-server``
25 | running on the server. If you don't know the version running on your server,
26 | contact your administrator.
27 |
28 |
29 | Kerberos Authentication Dependencies (Optional)
30 | -----------------------------------------------
31 |
32 | If your Dask-Gateway server uses Kerberos_ for authentication, you'll also need
33 | to install the kerberos dependencies. This can be done with either ``conda`` or
34 | ``pip``:
35 |
36 | **Install with conda**
37 |
38 | .. code-block:: console
39 |
40 | $ conda install -c conda-forge dask-gateway-kerberos
41 |
42 | **Install with pip**
43 |
44 | .. code-block:: console
45 |
46 | $ pip install dask-gateway[kerberos]
47 |
48 |
49 | .. _Kerberos: https://en.wikipedia.org/wiki/Kerberos_(protocol)
50 |
--------------------------------------------------------------------------------
/docs/source/resource-limits.rst:
--------------------------------------------------------------------------------
1 | Cluster Resource Limits
2 | =======================
3 |
4 | By default users can create clusters with as many workers and resources as they
5 | want. In shared environments this may not always be desirable. To remedy this
6 | administrators can set per-cluster resource limits.
7 |
8 | A few limits are available:
9 |
10 | - :data:`c.ClusterConfig.cluster_max_cores`: Maximum number of cores per cluster
11 | - :data:`c.ClusterConfig.cluster_max_memory`: Maximum amount of memory per cluster
12 | - :data:`c.ClusterConfig.cluster_max_workers`: Maximum number of workers per cluster
13 |
14 | If a cluster is at capacity for any of these limits, requests for new workers
15 | or workers will warn with an informative message saying they're at capacity.
16 |
17 | Example
18 | -------
19 |
20 | Here we limit each cluster to:
21 |
22 | - A max of 80 active cores
23 | - A max of 1 TiB of RAM
24 |
25 | .. code-block:: python
26 |
27 | c.ClusterConfig.cluster_max_cores = 80
28 | c.ClusterConfig.cluster_max_memory = "1 T"
29 |
--------------------------------------------------------------------------------
/docs/source/security.rst:
--------------------------------------------------------------------------------
1 | Security settings
2 | =================
3 |
4 | Here we present a few common security fields you'll likely want to configure in
5 | a production deployment.
6 |
7 |
8 | Enabling TLS
9 | ------------
10 |
11 | As a web application, any production deployment of Dask-Gateway should be run
12 | with TLS encryption (HTTPS_) enabled. There are a few common options for
13 | enabling this.
14 |
15 | Using your own TLS certificate
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 |
18 | If you have your own TLS certificate/key pair, you can specify the file
19 | locations in your ``dask_gateway_config.py`` file. The relevant configuration
20 | fields are:
21 |
22 | - :data:`c.Proxy.tls_cert`
23 | - :data:`c.Proxy.tls_key`
24 |
25 | .. code-block:: python
26 |
27 | c.Proxy.tls_cert = "/path/to/my.cert"
28 | c.Proxy.tls_key = "/path/to/my.key"
29 |
30 | Note that the certificate and key *must* be stored in a secure location where
31 | they are readable only by admin users.
32 |
33 | Using letsencrypt
34 | ^^^^^^^^^^^^^^^^^
35 |
36 | It is also possible to use letsencrypt_ to automatically obtain TLS
37 | certificates. If you have letsencrypt running using the default options, you
38 | can configure this by adding the following to your ``dask_gateway_config.py``
39 | file:
40 |
41 | .. code-block:: python
42 |
43 | c.Proxy.tls_cert = "/etc/letsencrypt/live/{FQDN}/fullchain.pem"
44 | c.Proxy.tls_key = "/etc/letsencrpyt/live/{FQDN}/privkey.pem"
45 |
46 | where ``FQDN`` is the `fully qualified domain name`_ for your server.
47 |
48 | Using external TLS termination
49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
50 |
51 | If ``dask-gateway-server`` is running behind a proxy that does TLS termination
52 | (e.g. NGINX_), then no further configuration is needed.
53 |
54 |
55 | Proxy authentication tokens
56 | ---------------------------
57 |
58 | To secure communication between the proxy and the gateway server, a secret
59 | token is used. By default this token is generated automatically. It's necessary
60 | for an admin to configure this explicitly if the proxies are being externally
61 | managed (i.e. :data:`c.Proxy.externally_managed` is set to true). To do this
62 | you have two options:
63 |
64 | - Configure :data:`c.Proxy.api_token` in your ``dask_gateway_config.py`` file.
65 | Since the token should be kept secret, the config file *must* be readable
66 | only by admin users.
67 | - Set the ``DASK_GATEWAY_PROXY_TOKEN`` environment variable. For security
68 | reasons, this environment variable should only be visible by the gateway
69 | server and proxy.
70 |
71 | In either case both options take 32 byte random strings, encoded as hex. One way
72 | to create these is through the ``openssl`` CLI:
73 |
74 | .. code-block:: shell
75 |
76 | $ openssl rand -hex 32
77 |
78 |
79 | .. _HTTPS: https://en.wikipedia.org/wiki/HTTPS
80 | .. _letsencrypt: https://letsencrypt.org/
81 | .. _fully qualified domain name: https://en.wikipedia.org/wiki/Fully_qualified_domain_name
82 | .. _NGINX: https://docs.nginx.com/nginx/admin-guide/security-controls/terminating-ssl-http/
83 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # isort is used for autoformatting Python code
2 | #
3 | # ref: https://pycqa.github.io/isort/
4 | #
5 | [tool.isort]
6 | profile = "black"
7 |
8 |
9 | # black is used for autoformatting Python code
10 | #
11 | # ref: https://black.readthedocs.io/en/stable/
12 | #
13 | [tool.black]
14 | line-length = 88
15 | target_version = [
16 | "py310",
17 | "py311",
18 | "py312",
19 | "py313",
20 | ]
21 |
22 |
23 | # pytest is used for running Python based tests
24 | #
25 | # ref: https://docs.pytest.org/en/stable/
26 | #
27 | [tool.pytest.ini_options]
28 | addopts = "--verbose --color=yes --durations=10"
29 | testpaths = ["tests"]
30 | asyncio_mode = "auto"
31 | asyncio_default_fixture_loop_scope = "function"
32 |
33 |
34 | # pytest-cov / coverage is used to measure code coverage of tests
35 | #
36 | # ref: https://coverage.readthedocs.io/en/stable/config.html
37 | #
38 | [tool.coverage.run]
39 | omit = [
40 | "tests/*.py",
41 | "*/_version.py",
42 | "dask-gateway-server/dask_gateway_server/managers/jobqueue/launcher.py",
43 | ]
44 | source = [
45 | "dask-gateway-server/dask_gateway_server",
46 | "dask-gateway/dask_gateway",
47 | ]
48 |
49 |
50 | # tbump is used to simplify and standardize the release process when updating
51 | # the version, making a git commit and tag, and pushing changes.
52 | #
53 | # ref: https://github.com/your-tools/tbump#readme
54 | #
55 | [tool.tbump]
56 | github_url = "https://github.com/dask/dask-gateway"
57 |
58 | [tool.tbump.version]
59 | current = "2025.4.1-0.dev"
60 | regex = '''
61 | (?P\d+)
62 | \.
63 | (?P\d+)
64 | \.
65 | (?P\d+)
66 | (\-
67 | (?P((alpha|beta|rc)\.\d+|0\.dev))
68 | )?
69 | '''
70 |
71 | [tool.tbump.git]
72 | message_template = "Release {new_version}"
73 | tag_template = "{new_version}"
74 |
75 | [[tool.tbump.file]]
76 | src = "dask-gateway/pyproject.toml"
77 | search = 'version = "{current_version}"'
78 |
79 | [[tool.tbump.file]]
80 | src = "dask-gateway/dask_gateway/_version.py"
81 | search = '__version__ = "{current_version}"'
82 |
83 | [[tool.tbump.file]]
84 | src = "dask-gateway-server/pyproject.toml"
85 | search = 'version = "{current_version}"'
86 |
87 | [[tool.tbump.file]]
88 | src = "dask-gateway-server/dask_gateway_server/_version.py"
89 | search = '__version__ = "{current_version}"'
90 |
91 | [[tool.tbump.file]]
92 | src = "resources/helm/dask-gateway/Chart.yaml"
93 | search = 'appVersion: "{current_version}"'
94 |
--------------------------------------------------------------------------------
/resources/README.rst:
--------------------------------------------------------------------------------
1 | Resources
2 | =========
3 |
4 | This directory contains extra resources for deploying dask gateway.
5 |
--------------------------------------------------------------------------------
/resources/helm/README.rst:
--------------------------------------------------------------------------------
1 | Helm Chart
2 | ==========
3 |
4 | A helm chart for deploying Dask Gateway on Kubernetes.
5 |
--------------------------------------------------------------------------------
/resources/helm/chartpress.yaml:
--------------------------------------------------------------------------------
1 | # This is configuration for chartpress, a CLI for Helm chart management.
2 | #
3 | # chartpress is used to test, package, and publish the dask-gateway Helm chart
4 | # to the gh-pages based Helm chart repository at https://helm.dask.org and
5 | # https://github.com/dask/helm-chart respectively. Note that a Helm chart
6 | # repository is just a website that can serve a "index.yaml" file pointing to
7 | # packaged Helm charts that can be downloaded.
8 | #
9 | # chartpress is used to:
10 | # - Build images for multiple CPU architectures
11 | # - Update Chart.yaml (version) and values.yaml (image tags)
12 | # - Package and publish Helm charts to a GitHub based Helm chart repository
13 | #
14 | # Configuration reference:
15 | # https://github.com/jupyterhub/chartpress#configuration
16 | #
17 | charts:
18 | - name: dask-gateway
19 | imagePrefix: ghcr.io/dask/
20 | repo:
21 | git: dask/helm-chart
22 | published: https://helm.dask.org
23 | images:
24 | # Used for clusters' scheduler and workers pods by default
25 | dask-gateway:
26 | imageName: ghcr.io/dask/dask-gateway
27 | contextPath: ../../dask-gateway
28 | valuesPath:
29 | - gateway.backend.image
30 | # Used for the api and controller pods
31 | dask-gateway-server:
32 | imageName: ghcr.io/dask/dask-gateway-server
33 | contextPath: ../../dask-gateway-server
34 | valuesPath:
35 | - gateway.image
36 | - controller.image
37 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .gitkeep
9 | .bzr/
10 | .bzrignore
11 | .hg/
12 | .hgignore
13 | .svn/
14 | # Common backup files
15 | *.swp
16 | *.bak
17 | *.tmp
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
25 | # Manually added entries
26 | *.rst
27 | .gitkeep
28 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/Chart.yaml:
--------------------------------------------------------------------------------
1 | # Chart.yaml v2 reference: https://helm.sh/docs/topics/charts/#the-chartyaml-file
2 | apiVersion: v2
3 | name: dask-gateway
4 | version: 0.0.1-set.by.chartpress
5 | appVersion: "2025.4.1-0.dev"
6 | description: A multi-tenant server for deploying and managing Dask clusters
7 | home: https://gateway.dask.org/
8 | sources:
9 | - https://github.com/dask/dask-gateway/
10 | icon: https://avatars3.githubusercontent.com/u/17131925?v=3&s=200
11 | kubeVersion: ">=1.30.0-0"
12 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/crds/daskclusters.yaml:
--------------------------------------------------------------------------------
1 | # Any change to these resources should be followed by an entry in the changelog
2 | # about needing to manually patch them, read more about this in
3 | # https://github.com/dask/dask-gateway/issues/553.
4 | #
5 | apiVersion: apiextensions.k8s.io/v1
6 | kind: CustomResourceDefinition
7 | metadata:
8 | name: daskclusters.gateway.dask.org
9 | creationTimestamp: null
10 | spec:
11 | group: gateway.dask.org
12 | names:
13 | kind: DaskCluster
14 | listKind: DaskClusterList
15 | plural: daskclusters
16 | singular: daskcluster
17 | scope: Namespaced
18 | versions:
19 | - name: v1alpha1
20 | served: true
21 | storage: true
22 | subresources:
23 | status: {}
24 | schema:
25 | # NOTE: While we define a schema, it is a dummy schema that doesn't
26 | # validate anything. We just have it to comply with the schema of
27 | # a CustomResourceDefinition that requires it.
28 | #
29 | # A decision has been made to not implement an actual schema at
30 | # this point in time due to the additional maintenance work it
31 | # would require.
32 | #
33 | # Reference: https://github.com/dask/dask-gateway/issues/434
34 | #
35 | openAPIV3Schema:
36 | type: object
37 | x-kubernetes-preserve-unknown-fields: true
38 | status:
39 | acceptedNames:
40 | kind: ""
41 | plural: ""
42 | conditions: []
43 | storedVersions: []
44 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/extensions/README.rst:
--------------------------------------------------------------------------------
1 | Extensions
2 | ==========
3 |
4 | Some Dask Gateway deployments will require non-trivial configuration (e.g. a
5 | new ``Authenticator`` class). You have a few options to add such "extensions":
6 |
7 | 1. Add all extension code in the ``gateway.extraConfig`` of your Helm values
8 | file. For simple extensions this is the recommended approach.
9 | 2. Package your code as part of a custom image, and configure the Dask Gateway
10 | api server to use that image via ``gateway.image``. Recommended if your
11 | extension is large enough (Helm charts have a size limit of 1 MiB) or
12 | requires additional dependencies.
13 | 3. Clone the helm chart locally, and make use of the ``extensions`` directory.
14 | This approach prevents using the published Helm chart, but may be useful in
15 | some cases.
16 |
17 | To use the ``extensions`` directory, clone the Helm chart locally, and copy
18 | whatever extra files you require into the ``extensions/gateway`` directory.
19 | All files in ``extensions/gateway`` will be copied into ``/etc/dask-gateway``
20 | in the deployed Dask Gateway API server pods. This directory is added to
21 | ``PYTHONPATH``, so any Python code will be importable. You can then import what
22 | functionality you need in a smaller section in ``gateway.extraConfig`` to
23 | configure the Dask Gateway server as needed.
24 |
25 | Example
26 | -------
27 |
28 | For example, say ``myauthenticator.py`` contains a custom ``Authenticator``
29 | class:
30 |
31 | .. code-block:: python
32 |
33 | from dask_gateway_server.auth import Authenticator
34 |
35 | class MyAuthenticator(Authenticator):
36 | """My custom authenticator"""
37 | ...
38 |
39 | After adding ``myauthenticator.py`` to ``extensions/gateway``, you can
40 | configure the Dask Gateway API server to use your authenticator via the proper
41 | fields in ``values.yaml``. For an authenticator, you can make use of
42 | ``gateway.auth``:
43 |
44 | .. code-block:: yaml
45 |
46 | gateway:
47 | auth:
48 | type: custom
49 | custom:
50 | class: myauthenticator.MyAuthenticator
51 |
52 | For other types of extensions (say ``c.KubeBackend.cluster_options``) you'd
53 | need to import and configure things in ``gateway.extraConfig``:
54 |
55 | .. code-block:: yaml
56 |
57 | gateway:
58 | extraConfig:
59 | my-extension: |
60 | # import your extension and configure appropriately
61 | from myextension import my_cluster_options
62 | c.KubeBackend.cluster_options = my_cluster_options
63 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/extensions/gateway/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/resources/helm/dask-gateway/extensions/gateway/.gitkeep
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | You've installed Dask-Gateway version {{ .Chart.AppVersion }}, from chart
2 | version {{ .Chart.Version }}!
3 |
4 | Your release is named {{ .Release.Name | quote }} and installed into the
5 | namespace {{ .Release.Namespace | quote }}.
6 |
7 | You can find the public address(es) at:
8 |
9 | $ kubectl --namespace={{ .Release.Namespace }} get service {{ include "dask-gateway.traefikName" . }}
10 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/* vim: set filetype=mustache: */}}
2 | {{/*
3 | Expand the name of the chart.
4 | */}}
5 | {{- define "dask-gateway.name" -}}
6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
7 | {{- end -}}
8 |
9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "dask-gateway.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 |
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "dask-gateway.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 |
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "dask-gateway.labels" -}}
38 | app.kubernetes.io/name: {{ include "dask-gateway.name" . }}
39 | helm.sh/chart: {{ include "dask-gateway.chart" . }}
40 | app.kubernetes.io/instance: {{ .Release.Name }}
41 | {{- if .Chart.AppVersion }}
42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
43 | {{- end }}
44 | app.kubernetes.io/managed-by: {{ .Release.Service }}
45 | gateway.dask.org/instance: {{ include "dask-gateway.fullname" . }}
46 | {{- end -}}
47 |
48 | {{/*
49 | Match labels
50 | */}}
51 | {{- define "dask-gateway.matchLabels" -}}
52 | app.kubernetes.io/name: {{ include "dask-gateway.name" . }}
53 | app.kubernetes.io/instance: {{ .Release.Name }}
54 | {{- end -}}
55 |
56 | {{/*
57 | API Server name
58 | */}}
59 | {{- define "dask-gateway.apiName" -}}
60 | {{ include "dask-gateway.fullname" . | printf "api-%s" | trunc 63 | trimSuffix "-" }}
61 | {{- end -}}
62 |
63 | {{/*
64 | Traefik name
65 | */}}
66 | {{- define "dask-gateway.traefikName" -}}
67 | {{ include "dask-gateway.fullname" . | printf "traefik-%s" | trunc 63 | trimSuffix "-" }}
68 | {{- end -}}
69 |
70 | {{/*
71 | Controller name
72 | */}}
73 | {{- define "dask-gateway.controllerName" -}}
74 | {{ include "dask-gateway.fullname" . | printf "controller-%s" | trunc 63 | trimSuffix "-" }}
75 | {{- end -}}
76 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/controller/configmap.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.controller.enabled -}}
2 | kind: ConfigMap
3 | apiVersion: v1
4 | metadata:
5 | name: {{ include "dask-gateway.controllerName" . }}
6 | labels:
7 | {{- include "dask-gateway.labels" . | nindent 4 }}
8 | data:
9 | dask_gateway_config.py: |-
10 | # Configure addresses
11 | c.KubeController.address = ":8000"
12 | c.KubeController.api_url = 'http://{{ include "dask-gateway.apiName" . }}.{{ .Release.Namespace }}:8000/api'
13 | c.KubeController.gateway_instance = '{{ include "dask-gateway.fullname" . }}'
14 | c.KubeController.proxy_prefix = "{{ .Values.gateway.prefix }}"
15 | c.KubeController.proxy_web_middlewares = [
16 | {"name": '{{ include "dask-gateway.fullname" . | printf "clusters-prefix-%s" | trunc 63 | trimSuffix "-" }}',
17 | "namespace": '{{ .Release.Namespace }}'}
18 | ]
19 | c.KubeController.log_level = "{{ .Values.controller.loglevel }}"
20 | c.KubeController.completed_cluster_max_age = {{ .Values.controller.completedClusterMaxAge }}
21 | c.KubeController.completed_cluster_cleanup_period = {{ .Values.controller.completedClusterCleanupPeriod }}
22 | c.KubeController.backoff_base_delay = {{ .Values.controller.backoffBaseDelay }}
23 | c.KubeController.backoff_max_delay = {{ .Values.controller.backoffMaxDelay }}
24 | c.KubeController.k8s_api_rate_limit = {{ .Values.controller.k8sApiRateLimit }}
25 | c.KubeController.k8s_api_rate_limit_burst = {{ .Values.controller.k8sApiRateLimitBurst }}
26 | {{- if eq (toString .Values.traefik.service.ports.tcp.port) "web" }}
27 | c.KubeController.proxy_tcp_entrypoint = "web"
28 | {{- end }}
29 | {{- end }}
30 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/controller/deployment.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.controller.enabled -}}
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | name: {{ include "dask-gateway.controllerName" . }}
6 | labels:
7 | {{- include "dask-gateway.labels" . | nindent 4 }}
8 | spec:
9 | replicas: 1
10 | strategy:
11 | type: Recreate
12 | selector:
13 | matchLabels:
14 | {{- include "dask-gateway.matchLabels" . | nindent 6 }}
15 | app.kubernetes.io/component: controller
16 | template:
17 | metadata:
18 | labels:
19 | {{- include "dask-gateway.labels" . | nindent 8 }}
20 | app.kubernetes.io/component: controller
21 | annotations:
22 | checksum/configmap: {{ include (print .Template.BasePath "/controller/configmap.yaml") . | sha256sum }}
23 | {{- with .Values.controller.annotations }}
24 | {{- . | toYaml | nindent 8 }}
25 | {{- end }}
26 | spec:
27 | {{- if .Values.rbac.enabled }}
28 | {{- if .Values.rbac.controller.serviceAccountName }}
29 | serviceAccountName: {{ .Values.rbac.controller.serviceAccountName }}
30 | {{- else }}
31 | serviceAccountName: {{ include "dask-gateway.controllerName" . }}
32 | {{- end }}
33 | {{- end }}
34 | volumes:
35 | - name: configmap
36 | configMap:
37 | name: {{ include "dask-gateway.controllerName" . }}
38 | {{- with .Values.controller.imagePullSecrets }}
39 | imagePullSecrets:
40 | {{- . | toYaml | nindent 8 }}
41 | {{- end }}
42 | containers:
43 | - name: controller
44 | image: {{ .Values.controller.image.name }}:{{ .Values.controller.image.tag }}
45 | imagePullPolicy: {{ .Values.controller.image.pullPolicy }}
46 | args:
47 | - dask-gateway-server
48 | - kube-controller
49 | - --config
50 | - /etc/dask-gateway/dask_gateway_config.py
51 | {{- with .Values.controller.resources }}
52 | resources:
53 | {{- . | toYaml | nindent 12 }}
54 | {{- end }}
55 | volumeMounts:
56 | - mountPath: /etc/dask-gateway/
57 | name: configmap
58 | ports:
59 | - containerPort: 8000
60 | name: api
61 | {{- with .Values.controller.affinity }}
62 | affinity:
63 | {{- . | toYaml | nindent 8 }}
64 | {{- end }}
65 | {{- with .Values.controller.tolerations }}
66 | tolerations:
67 | {{- . | toYaml | nindent 8 }}
68 | {{- end }}
69 | {{- with .Values.controller.nodeSelector }}
70 | nodeSelector:
71 | {{- . | toYaml | nindent 8 }}
72 | {{- end }}
73 | {{- end }}
74 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/controller/rbac.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.controller.enabled -}}
2 | {{- if .Values.rbac.enabled -}}
3 | {{- if not .Values.rbac.controller.serviceAccountName -}}
4 | apiVersion: v1
5 | kind: ServiceAccount
6 | metadata:
7 | name: {{ include "dask-gateway.controllerName" . }}
8 | labels:
9 | {{- include "dask-gateway.labels" . | nindent 4 }}
10 | ---
11 | apiVersion: rbac.authorization.k8s.io/v1
12 | kind: ClusterRole
13 | metadata:
14 | name: {{ include "dask-gateway.controllerName" . }}
15 | labels:
16 | {{- include "dask-gateway.labels" . | nindent 4 }}
17 | rules:
18 | - apiGroups: ["gateway.dask.org"]
19 | resources: ["daskclusters", "daskclusters/status"]
20 | verbs: ["*"]
21 | - apiGroups: ["traefik.io"]
22 | resources: ["ingressroutes", "ingressroutetcps"]
23 | verbs: ["get", "create", "delete"]
24 | - apiGroups: [""]
25 | resources: ["pods"]
26 | verbs: ["get", "list", "watch", "create", "delete"]
27 | - apiGroups: [""]
28 | resources: ["endpoints"]
29 | verbs: ["get", "list", "watch"]
30 | - apiGroups: [""]
31 | resources: ["secrets", "services"]
32 | verbs: ["create", "delete"]
33 | ---
34 | kind: ClusterRoleBinding
35 | apiVersion: rbac.authorization.k8s.io/v1
36 | metadata:
37 | name: {{ include "dask-gateway.controllerName" . }}
38 | labels:
39 | {{- include "dask-gateway.labels" . | nindent 4 }}
40 | subjects:
41 | - kind: ServiceAccount
42 | name: {{ include "dask-gateway.controllerName" . }}
43 | namespace: {{ .Release.Namespace }}
44 | roleRef:
45 | kind: ClusterRole
46 | name: {{ include "dask-gateway.controllerName" . }}
47 | apiGroup: rbac.authorization.k8s.io
48 | {{- end }}
49 | {{- end }}
50 | {{- end }}
51 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/gateway/ingressroute.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: traefik.io/v1alpha1
2 | kind: IngressRoute
3 | metadata:
4 | name: {{ include "dask-gateway.apiName" . }}
5 | labels:
6 | {{- include "dask-gateway.labels" . | nindent 4 }}
7 | spec:
8 | entryPoints:
9 | - web
10 | routes:
11 | - match: PathPrefix(`{{ .Values.gateway.prefix }}`)
12 | kind: Rule
13 | services:
14 | - name: {{ include "dask-gateway.apiName" . }}
15 | port: 8000
16 | {{- if ne .Values.gateway.prefix "/"}}
17 | middlewares:
18 | - name: '{{ include "dask-gateway.fullname" . | printf "api-prefix-%s" | trunc 63 | trimSuffix "-" }}'
19 | {{- end }}
20 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/gateway/middleware.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: traefik.io/v1alpha1
2 | kind: Middleware
3 | metadata:
4 | name: {{ include "dask-gateway.fullname" . | printf "clusters-prefix-%s" | trunc 63 | trimSuffix "-" }}
5 | labels:
6 | {{- include "dask-gateway.labels" . | nindent 4 }}
7 | spec:
8 | stripPrefixRegex:
9 | regex:
10 | - '{{ .Values.gateway.prefix | trimSuffix "/" }}/clusters/[a-zA-Z0-9.-]+'
11 | {{- if ne .Values.gateway.prefix "/" }}
12 | ---
13 | apiVersion: traefik.io/v1alpha1
14 | kind: Middleware
15 | metadata:
16 | name: {{ include "dask-gateway.fullname" . | printf "api-prefix-%s" | trunc 63 | trimSuffix "-" }}
17 | labels:
18 | {{- include "dask-gateway.labels" . | nindent 4 }}
19 | spec:
20 | stripPrefix:
21 | prefixes:
22 | - '{{ .Values.gateway.prefix | trimSuffix "/" }}'
23 | {{- end }}
24 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/gateway/rbac.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.rbac.enabled -}}
2 | {{- if not .Values.rbac.gateway.serviceAccountName -}}
3 | apiVersion: v1
4 | kind: ServiceAccount
5 | metadata:
6 | name: {{ include "dask-gateway.apiName" . }}
7 | labels:
8 | {{- include "dask-gateway.labels" . | nindent 4 }}
9 | ---
10 | apiVersion: rbac.authorization.k8s.io/v1
11 | kind: ClusterRole
12 | metadata:
13 | name: {{ include "dask-gateway.apiName" . }}
14 | labels:
15 | {{- include "dask-gateway.labels" . | nindent 4 }}
16 | rules:
17 | - apiGroups: [""]
18 | resources: ["secrets"]
19 | verbs: ["get"]
20 | - apiGroups: ["gateway.dask.org"]
21 | resources: ["daskclusters"]
22 | verbs: ["*"]
23 | ---
24 | kind: ClusterRoleBinding
25 | apiVersion: rbac.authorization.k8s.io/v1
26 | metadata:
27 | name: {{ include "dask-gateway.apiName" . }}
28 | labels:
29 | {{- include "dask-gateway.labels" . | nindent 4 }}
30 | subjects:
31 | - kind: ServiceAccount
32 | name: {{ include "dask-gateway.apiName" . }}
33 | namespace: {{ .Release.Namespace }}
34 | roleRef:
35 | kind: ClusterRole
36 | name: {{ include "dask-gateway.apiName" . }}
37 | apiGroup: rbac.authorization.k8s.io
38 | {{- end }}
39 | {{- end }}
40 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/gateway/secret.yaml:
--------------------------------------------------------------------------------
1 | {{- if and (eq .Values.gateway.auth.type "jupyterhub") .Values.gateway.auth.jupyterhub.apiToken -}}
2 | kind: Secret
3 | apiVersion: v1
4 | metadata:
5 | name: {{ include "dask-gateway.apiName" . }}
6 | labels:
7 | {{- include "dask-gateway.labels" . | nindent 4 }}
8 | type: Opaque
9 | data:
10 | jupyterhub-api-token: {{ .Values.gateway.auth.jupyterhub.apiToken | b64enc | quote }}
11 | {{- end }}
12 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/gateway/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: {{ include "dask-gateway.apiName" . }}
5 | labels:
6 | {{- include "dask-gateway.labels" . | nindent 4 }}
7 | {{- with .Values.gateway.service.annotations }}
8 | annotations:
9 | {{- . | toYaml | nindent 4 }}
10 | {{- end }}
11 | spec:
12 | type: ClusterIP
13 | selector:
14 | {{- include "dask-gateway.matchLabels" . | nindent 4 }}
15 | app.kubernetes.io/component: gateway
16 | ports:
17 | - protocol: TCP
18 | port: 8000
19 | targetPort: 8000
20 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/traefik/dashboard.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.traefik.dashboard -}}
2 | {{- if .Values.traefik.installTraefik -}}
3 | apiVersion: traefik.io/v1alpha1
4 | kind: IngressRoute
5 | metadata:
6 | name: {{ include "dask-gateway.fullname" . | printf "traefik-dashboard-%s" | trunc 63 | trimSuffix "-" }}
7 | labels:
8 | {{- include "dask-gateway.labels" . | nindent 4 }}
9 | spec:
10 | entryPoints:
11 | - traefik
12 | routes:
13 | - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`)
14 | kind: Rule
15 | services:
16 | - name: api@internal
17 | kind: TraefikService
18 | {{- end }}
19 | {{- end }}
20 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/traefik/deployment.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.traefik.installTraefik -}}
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | name: {{ include "dask-gateway.traefikName" . }}
6 | labels:
7 | {{- include "dask-gateway.labels" . | nindent 4 }}
8 | spec:
9 | replicas: {{ .Values.traefik.replicas }}
10 | selector:
11 | matchLabels:
12 | {{- include "dask-gateway.matchLabels" . | nindent 6 }}
13 | app.kubernetes.io/component: traefik
14 | template:
15 | metadata:
16 | labels:
17 | {{- include "dask-gateway.labels" . | nindent 8 }}
18 | app.kubernetes.io/component: traefik
19 | {{- with .Values.traefik.annotations }}
20 | annotations:
21 | {{- . | toYaml | nindent 8 }}
22 | {{- end }}
23 | spec:
24 | {{- if .Values.rbac.enabled }}
25 | {{- if .Values.rbac.traefik.serviceAccountName }}
26 | serviceAccountName: {{ .Values.rbac.traefik.serviceAccountName }}
27 | {{- else }}
28 | serviceAccountName: {{ include "dask-gateway.traefikName" . }}
29 | {{- end }}
30 | {{- end }}
31 | terminationGracePeriodSeconds: 60
32 | containers:
33 | - name: traefik
34 | image: {{ .Values.traefik.image.name }}:{{ .Values.traefik.image.tag }}
35 | imagePullPolicy: {{ .Values.gateway.image.pullPolicy }}
36 | securityContext:
37 | runAsUser: 1000
38 | runAsGroup: 1000
39 | {{- with .Values.traefik.resources }}
40 | resources:
41 | {{- . | toYaml | nindent 12 }}
42 | {{- end }}
43 | # The Dockerfile's entrypoint is traefik the CLI, and we provide args
44 | # to it as documented here:
45 | # https://doc.traefik.io/traefik/reference/static-configuration/cli/
46 | #
47 | args:
48 | - "--global.checknewversion=false"
49 | - "--global.sendanonymoususage=false"
50 | - "--ping=true"
51 | - "--providers.kubernetescrd"
52 | - "--providers.kubernetescrd.allowcrossnamespace=true"
53 | - '--providers.kubernetescrd.labelselector=gateway.dask.org/instance={{ include "dask-gateway.fullname" . }}'
54 | - "--providers.kubernetescrd.throttleduration=2"
55 | - "--log.level={{ .Values.traefik.loglevel }}"
56 | - "--entrypoints.traefik.address=:9000"
57 | - "--entrypoints.web.address=:8000"
58 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }}
59 | - "--entrypoints.tcp.address=:8786"
60 | {{- end }}
61 | {{- if .Values.traefik.dashboard }}
62 | - "--api.dashboard=true"
63 | - "--api.insecure=true"
64 | {{- end }}
65 | {{- range .Values.traefik.additionalArguments }}
66 | - {{ . | quote }}
67 | {{- end }}
68 | ports:
69 | - name: traefik
70 | containerPort: 9000
71 | - name: web
72 | containerPort: 8000
73 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }}
74 | - name: tcp
75 | containerPort: 8786
76 | {{- end }}
77 | readinessProbe:
78 | httpGet:
79 | path: /ping
80 | port: 9000
81 | failureThreshold: 1
82 | initialDelaySeconds: 10
83 | periodSeconds: 10
84 | successThreshold: 1
85 | timeoutSeconds: 2
86 | livenessProbe:
87 | httpGet:
88 | path: /ping
89 | port: 9000
90 | failureThreshold: 3
91 | initialDelaySeconds: 10
92 | periodSeconds: 10
93 | successThreshold: 1
94 | timeoutSeconds: 2
95 | {{- with .Values.traefik.affinity }}
96 | affinity:
97 | {{- . | toYaml | nindent 8 }}
98 | {{- end }}
99 | {{- with .Values.traefik.tolerations }}
100 | tolerations:
101 | {{- . | toYaml | nindent 8 }}
102 | {{- end }}
103 | {{- with .Values.traefik.nodeSelector }}
104 | nodeSelector:
105 | {{- . | toYaml | nindent 8 }}
106 | {{- end }}
107 | {{- with .Values.traefik.imagePullSecrets }}
108 | imagePullSecrets:
109 | {{- . | toYaml | nindent 8 }}
110 | {{- end }}
111 | {{- end }}
112 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/traefik/rbac.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.rbac.enabled -}}
2 | {{- if .Values.traefik.installTraefik -}}
3 | {{- if not .Values.rbac.traefik.serviceAccountName -}}
4 | kind: ServiceAccount
5 | apiVersion: v1
6 | metadata:
7 | name: {{ include "dask-gateway.traefikName" . }}
8 | ---
9 | kind: ClusterRole
10 | apiVersion: rbac.authorization.k8s.io/v1
11 | metadata:
12 | name: {{ include "dask-gateway.traefikName" . }}
13 | # The rules below are from Traefik's Helm chart, most recently 21 Jan 2025 from
14 | # commit 4e15c7c for use with Traefik v3.2.0.
15 | #
16 | # To update them again, you can do:
17 | #
18 | # git clone https://github.com/traefik/traefik-helm-chart
19 | # cd traefik-helm-chart
20 | # helm template traefik --show-only templates/rbac/clusterrole.yaml --set providers.kubernetesIngress.enabled=false
21 | #
22 | rules:
23 | - apiGroups:
24 | - ""
25 | resources:
26 | - nodes
27 | - secrets
28 | - services
29 | verbs:
30 | - get
31 | - list
32 | - watch
33 | - apiGroups:
34 | - discovery.k8s.io
35 | resources:
36 | - endpointslices
37 | verbs:
38 | - list
39 | - watch
40 | - apiGroups:
41 | - extensions
42 | - networking.k8s.io
43 | resources:
44 | - ingressclasses
45 | verbs:
46 | - get
47 | - list
48 | - watch
49 | - apiGroups:
50 | - traefik.io
51 | resources:
52 | - ingressroutes
53 | - ingressroutetcps
54 | - ingressrouteudps
55 | - middlewares
56 | - middlewaretcps
57 | - serverstransports
58 | - serverstransporttcps
59 | - tlsoptions
60 | - tlsstores
61 | - traefikservices
62 | verbs:
63 | - get
64 | - list
65 | - watch
66 | ---
67 | kind: ClusterRoleBinding
68 | apiVersion: rbac.authorization.k8s.io/v1
69 | metadata:
70 | name: {{ include "dask-gateway.traefikName" . }}
71 | roleRef:
72 | apiGroup: rbac.authorization.k8s.io
73 | kind: ClusterRole
74 | name: {{ include "dask-gateway.traefikName" . }}
75 | subjects:
76 | - kind: ServiceAccount
77 | name: {{ include "dask-gateway.traefikName" . }}
78 | namespace: {{ .Release.Namespace }}
79 | {{- end }}
80 | {{- end }}
81 | {{- end }}
82 |
--------------------------------------------------------------------------------
/resources/helm/dask-gateway/templates/traefik/service.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.traefik.installTraefik -}}
2 | apiVersion: v1
3 | kind: Service
4 | metadata:
5 | name: {{ include "dask-gateway.traefikName" . }}
6 | labels:
7 | {{- include "dask-gateway.labels" . | nindent 4 }}
8 | {{- with .Values.traefik.service.annotations }}
9 | annotations:
10 | {{- . | toYaml | nindent 4 }}
11 | {{- end }}
12 | spec:
13 | type: {{ .Values.traefik.service.type }}
14 | selector:
15 | {{- include "dask-gateway.matchLabels" . | nindent 4 }}
16 | app.kubernetes.io/component: traefik
17 | {{- with .Values.traefik.service.spec }}
18 | {{- . | toYaml | nindent 2 }}
19 | {{- end }}
20 | ports:
21 | - name: web
22 | targetPort: 8000
23 | port: {{ .Values.traefik.service.ports.web.port }}
24 | {{- with .Values.traefik.service.ports.web.nodePort }}
25 | nodePort: {{ . }}
26 | {{- end }}
27 | {{- if ne (toString .Values.traefik.service.ports.tcp.port) "web" }}
28 | - name: tcp
29 | targetPort: 8786
30 | port: {{ .Values.traefik.service.ports.tcp.port }}
31 | {{- with .Values.traefik.service.ports.tcp.nodePort }}
32 | nodePort: {{ . }}
33 | {{- end }}
34 | {{- end }}
35 | {{- if .Values.traefik.dashboard }}
36 | - name: traefik
37 | targetPort: 9000
38 | port: 9000
39 | {{- end }}
40 | {{- end }}
41 |
--------------------------------------------------------------------------------
/resources/helm/testing/chart-install-values.yaml:
--------------------------------------------------------------------------------
1 | gateway:
2 | loglevel: DEBUG
3 | prefix: /services/dask-gateway
4 | backend:
5 | scheduler:
6 | cores:
7 | request: 0.1
8 | memory:
9 | request: 256M
10 | worker:
11 | cores:
12 | request: 0.1
13 | memory:
14 | request: 256M
15 |
16 | controller:
17 | loglevel: DEBUG
18 | completedClusterMaxAge: 60
19 | completedClusterCleanupPeriod: 30
20 |
21 | traefik:
22 | loglevel: INFO
23 | service:
24 | ports:
25 | web:
26 | nodePort: 30200
27 |
--------------------------------------------------------------------------------
/resources/helm/testing/skaffold.yaml:
--------------------------------------------------------------------------------
1 | gateway:
2 | loglevel: DEBUG
3 | backend:
4 | scheduler:
5 | cores:
6 | request: 0.1
7 | memory:
8 | request: 256M
9 |
10 | worker:
11 | cores:
12 | request: 0.1
13 | memory:
14 | request: 256M
15 |
16 | controller:
17 | loglevel: DEBUG
18 | completedClusterMaxAge: 60
19 | completedClusterCleanupPeriod: 30
20 |
21 | traefik:
22 | loglevel: INFO
23 |
--------------------------------------------------------------------------------
/resources/helm/tools/compare-values-schema-content.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | This script is meant to assist in a manual validation that the content of
4 | values.schema.yaml covers values.yaml, and vice versa.
5 |
6 | FIXME: It would be nice to run this as part of our CI pipeline to report if
7 | values.schema.yaml and values.yaml gets out of sync, but first we need to
8 | address what it means to be out of sync.
9 |
10 | Consider if values.schema.yaml describes extraLabels, and we in this helm chart
11 | have an extra label set in values, how should our comparison realize that
12 | its nothing to bother about?
13 |
14 | That kind of complexity is an issue for labels, resources,
15 | containerSecurityContext, readiness- and livenessProbe's for example.
16 |
17 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is
18 | not yet extracted to be a standalone package, but may be in the future.
19 | """
20 |
21 | import os
22 | from collections.abc import MutableMapping
23 |
24 | import yaml
25 |
26 | here_dir = os.path.abspath(os.path.dirname(__file__))
27 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml")
28 | values_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.yaml")
29 | lint_and_validate_values_yaml = os.path.join(
30 | here_dir, os.pardir, "testing", "chart-install-values.yaml"
31 | )
32 |
33 |
34 | def reduce_schema(d):
35 | """
36 | Takes a jsonschema loaded as a dictionary and return a reduced structure
37 | ignoring everything apart from the structure it describes.
38 | """
39 | r = {}
40 | CONTAINS_KEYS = "properties"
41 | if CONTAINS_KEYS in d:
42 | for k, v in d[CONTAINS_KEYS].items():
43 | if isinstance(v, MutableMapping) and v.get(CONTAINS_KEYS):
44 | r[k] = reduce_schema(v)
45 | else:
46 | r[k] = None
47 | return r
48 |
49 |
50 | def flatten(d, parent_key="", sep="."):
51 | """
52 | Takes a nested dictionary and return all keys flattened using a separator,
53 | so one element returned would for example be "gateway.image.tag".
54 | """
55 | items = []
56 | for k, v in d.items():
57 | new_key = parent_key + sep + k if parent_key else k
58 | if isinstance(v, MutableMapping):
59 | if v:
60 | items.extend(flatten(v, parent_key=new_key, sep=sep))
61 | else:
62 | items.append(new_key)
63 | else:
64 | items.append(new_key)
65 | if not parent_key:
66 | return set(items)
67 | else:
68 | return items
69 |
70 |
71 | def run():
72 | # Using these sets, we can validate further manually by printing the results
73 | # of set operations.
74 | with open(schema_yaml) as f:
75 | schema = yaml.safe_load(f)
76 | with open(values_yaml) as f:
77 | values = yaml.safe_load(f)
78 | # with open(lint_and_validate_values_yaml) as f:
79 | # lint_and_validate_values = yaml.safe_load(f)
80 |
81 | schema = flatten(reduce_schema(schema))
82 | values = flatten(values)
83 | # lint_and_validate_values = flatten(lint_and_validate_values)
84 |
85 | print(
86 | "The keys from values.yaml minus those from values.schema.yaml:\n",
87 | "\n".join(sorted(values - schema)),
88 | "\n\n",
89 | sep="\n",
90 | )
91 | print(
92 | "The keys from values.schema.yaml minus those from values.yaml:\n",
93 | "\n".join(sorted(schema - values)),
94 | "\n\n",
95 | sep="\n",
96 | )
97 |
98 |
99 | run()
100 |
--------------------------------------------------------------------------------
/resources/helm/tools/generate-json-schema.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | This script reads values.schema.yaml and generates a values.schema.json that we
4 | can package with the Helm chart. This is allowing the helm CLI to perform
5 | validation of the passed configuration values.
6 |
7 | While we can directly generate a values.schema.json from values.schema.yaml, it
8 | contains a lot of description text we use to generate our configuration
9 | reference that isn't helpful to ship along the validation schema. Due to that,
10 | we trim away everything that isn't needed.
11 |
12 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is
13 | not yet extracted to be a standalone package, but may be in the future.
14 | """
15 |
16 | import json
17 | import os
18 | from collections.abc import MutableMapping
19 |
20 | import yaml
21 |
22 | here_dir = os.path.abspath(os.path.dirname(__file__))
23 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml")
24 | values_schema_json = os.path.join(
25 | here_dir, os.pardir, "dask-gateway", "values.schema.json"
26 | )
27 |
28 |
29 | def clean_jsonschema(d, parent_key=""):
30 | """
31 | Modifies a dictionary representing a jsonschema in place to not contain
32 | jsonschema keys not relevant for a values.schema.json file solely for use by
33 | the helm CLI.
34 | """
35 | JSONSCHEMA_KEYS_TO_REMOVE = {"description"}
36 |
37 | # start by cleaning up the current level
38 | for k in set.intersection(JSONSCHEMA_KEYS_TO_REMOVE, set(d.keys())):
39 | del d[k]
40 |
41 | # Recursively cleanup nested levels, bypassing one level where there could
42 | # be a valid Helm chart configuration named just like the jsonschema
43 | # specific key to remove.
44 | if "properties" in d:
45 | for k, v in d["properties"].items():
46 | if isinstance(v, MutableMapping):
47 | clean_jsonschema(v, k)
48 |
49 |
50 | def run():
51 | # Using these sets, we can validate further manually by printing the results
52 | # of set operations.
53 | with open(schema_yaml) as f:
54 | schema = yaml.safe_load(f)
55 |
56 | # Drop what isn't relevant for a values.schema.json file packaged with the
57 | # Helm chart, such as the description keys only relevant for our
58 | # configuration reference.
59 | clean_jsonschema(schema)
60 |
61 | # dump schema to values.schema.json
62 | with open(values_schema_json, "w") as f:
63 | json.dump(schema, f)
64 |
65 | print("dask-gateway/values.schema.json created")
66 |
67 |
68 | run()
69 |
--------------------------------------------------------------------------------
/resources/helm/tools/validate-against-schema.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | This scripts validates the charts default values against the values.schema.yaml
4 | file, and optionally also another file against the values.schema.yaml.
5 |
6 | This script originated from the jupyterhub/zero-to-jupyterhub-k8s project. It is
7 | not yet extracted to be a standalone package, but may be in the future.
8 | """
9 |
10 | import os
11 |
12 | import jsonschema
13 | import yaml
14 |
15 | here_dir = os.path.abspath(os.path.dirname(__file__))
16 | schema_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.schema.yaml")
17 | values_yaml = os.path.join(here_dir, os.pardir, "dask-gateway", "values.yaml")
18 | lint_and_validate_values_yaml = os.path.join(
19 | here_dir, os.pardir, "testing", "chart-install-values.yaml"
20 | )
21 |
22 | with open(schema_yaml) as f:
23 | schema = yaml.safe_load(f)
24 | with open(values_yaml) as f:
25 | values = yaml.safe_load(f)
26 | with open(lint_and_validate_values_yaml) as f:
27 | lint_and_validate_values = yaml.safe_load(f)
28 |
29 | # Validate values.yaml against schema
30 | print("Validating values.yaml against values.schema.yaml...")
31 | jsonschema.validate(values, schema)
32 | print("OK!")
33 | print()
34 |
35 | # FIXME: Create a lint-and-validate-values.yaml file that covers all kinds of
36 | # configuration properly and let it be tested to function with the schema
37 | # and successfully render valid k8s templates.
38 | #
39 | # # Validate chart-install-values.yaml against schema
40 | # print("Validating chart-install-values.yaml against values.schema.yaml...")
41 | # jsonschema.validate(lint_and_validate_values, schema)
42 | # print("OK!")
43 |
--------------------------------------------------------------------------------
/skaffold.yaml:
--------------------------------------------------------------------------------
1 | # "skaffold" is a command line tool we can use to rebuild images for a Helm
2 | # chart and even do some "live reload" of parts of an installation of a Helm
3 | # chart.
4 | #
5 | # skaffolds purpose in this project is to be of assistance for local
6 | # development, while we use the tool "chartpress" for testing and publishing of
7 | # the Helm chart in our GitHub Workflows.
8 | #
9 | # Skaffold configuration reference: https://skaffold.dev/docs/references/yaml/
10 | #
11 | # FIXME:
12 | # - Add notes on how to use skaffold
13 | # - Update the skaffold/v2alpha3 configuration to a modern one
14 | #
15 | apiVersion: skaffold/v2alpha3
16 | kind: Config
17 |
18 | build:
19 | local:
20 | push: false
21 | useBuildkit: true
22 | artifacts:
23 | - image: ghcr.io/dask/dask-gateway-server
24 | context: ./dask-gateway-server
25 | docker:
26 | dockerfile: Dockerfile
27 | - image: ghcr.io/dask/dask-gateway
28 | context: ./dask-gateway
29 | docker:
30 | dockerfile: Dockerfile
31 |
32 | deploy:
33 | helm:
34 | releases:
35 | - name: dask-gateway
36 | chartPath: resources/helm/dask-gateway/
37 | namespace: default
38 | imageStrategy:
39 | helm: {}
40 | values:
41 | gateway.image: ghcr.io/dask/dask-gateway-server
42 | gateway.backend.image: ghcr.io/dask/dask-gateway
43 | controller.image: ghcr.io/dask/dask-gateway-server
44 | valuesFiles:
45 | - resources/helm/testing/skaffold.yaml
46 | flags:
47 | upgrade:
48 | - --install
49 |
50 | profiles:
51 | - name: local-controller
52 | patches:
53 | - op: add
54 | path: /deploy/helm/releases/0/setValues
55 | value:
56 | controller.enabled: false
57 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import pytest
4 |
5 |
6 | @pytest.fixture(autouse=True)
7 | def reset_logs():
8 | # PDB's stdout/stderr capture can close fds that our loggers are configured
9 | # to write to. To prevent this, reset the log handlers before every test.
10 | logging.getLogger("DaskGateway").handlers.clear()
11 |
12 |
13 | def pytest_configure(config):
14 | # Adds a marker here, rather than setup.cfg, since the repository has two packages.
15 | config.addinivalue_line("markers", "kubernetes: marks a test as kubernetes-related")
16 |
--------------------------------------------------------------------------------
/tests/kubernetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/kubernetes/__init__.py
--------------------------------------------------------------------------------
/tests/kubernetes/test_helm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/dask-gateway/6f2ad287d7a65238eec86f31af7675032ee0b796/tests/kubernetes/test_helm.py
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file describes the requirements to test the Python code in dask-gateway
2 | # and dask-gateway server.
3 | #
4 | # This is how you would install and run most tests:
5 | #
6 | # pip install -r tests/requirements.txt
7 | # pytest
8 | #
9 | #
10 | # FIXME:
11 | # - Make kubernetes test like other backend tests, something you opt into rather
12 | # than out out of.
13 | #
14 |
15 | # chartpress helps update the Helm chart's Chart.yaml and values.yaml with
16 | # tagged images etc.
17 | chartpress
18 |
19 | # pyyaml is used by our generate-json-schema.py script.
20 | pyyaml
21 |
22 | pytest
23 | pytest-asyncio
24 | pytest-timeout
25 |
26 | # dask-gateway and dask-gateway-server and all their dependencies are assumed to
27 | # be installed.
28 | --editable="./dask-gateway"
29 | --editable="./dask-gateway-server[all_backends]"
30 |
31 | # ipython and ipywidget is optional integrations allowing for fancy rendering of
32 | # end user provided configuration options. Tests in test_options.py will be
33 | # skipped without this installed.
34 | ipython
35 | ipywidgets
36 |
37 | # bokeh needs to be installed for test_dashboard_link_from_public_address to not
38 | # be skipped.
39 | #
40 | # FIXME: clarify why bokeh is needed for this test.
41 | #
42 | bokeh
43 |
44 | # trustme is a utility used in the code of the test ca_and_tls_proxy in
45 | # test_proxies.py.
46 | trustme
47 |
48 | # IMPORTANT: These environment variables indicating tests should be run with
49 | # integration against external dask cluster providers (backends).
50 | #
51 | # For this to work, there needs to be various things running in the
52 | # background.
53 | #
54 | # TEST_DASK_GATEWAY_YARN - test_yarn_backend.py, and test_kerberos_auth in test_auth.py
55 | # TEST_DASK_GATEWAY_PBS - test_pbs_backend.py
56 | # TEST_DASK_GATEWAY_SLURM - test_slurm_backend.py
57 | # TEST_DASK_GATEWAY_KUBE - kubernetes/test_integration.py
58 | #
59 | # TEST_DASK_GATEWAY_KUBE_ADDRESS is also used to describe how to reach the
60 | # traefik pod used as a proxy to access dask-gateway-server running in the api
61 | # pod.
62 | #
63 |
64 | # IMPORTANT: Not installed Python packages with system dependencies
65 | #
66 | # - To run tests related to KerberosAuthenticator, you need to install
67 | # pykerberos which is tricky to install with pip but easy with conda. For
68 | # example, to install pykerberos with pip on ubunutu, you need to first
69 | # install the apt package libkrb5-dev.
70 | # - To run tests related to JupyterHubAuthenticator, you need to install
71 | # jupyterhub and the Node npm package configurable-http-proxy that JupyterHub
72 | # depends on to route traffic.
73 | # - To run tests related to the Helm chart, you need to install the helm CLI.
74 | #
75 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from dask_gateway_server.app import DaskGateway
5 | from dask_gateway_server.proxy.core import _PROXY_EXE, ProxyApp
6 |
7 |
8 | def test_generate_config(tmpdir, capfd):
9 | cfg_file = str(tmpdir.join("dask_gateway_config.py"))
10 | orig_text = "c.foo = 'bar'"
11 |
12 | with open(cfg_file, "w") as f:
13 | f.write(orig_text)
14 |
15 | with pytest.raises(SystemExit) as exc:
16 | DaskGateway.launch_instance(["generate-config", "--output", cfg_file])
17 | DaskGateway.clear_instance()
18 | assert "already exists" in exc.value.code
19 | out, err = capfd.readouterr()
20 | assert not out
21 | assert not err
22 |
23 | assert os.path.exists(cfg_file)
24 | with open(cfg_file) as f:
25 | cfg_text = f.read()
26 | assert cfg_text == orig_text
27 |
28 | DaskGateway.launch_instance(["generate-config", "--force", "--output", cfg_file])
29 | DaskGateway.clear_instance()
30 | out, err = capfd.readouterr()
31 | assert cfg_file in out
32 | assert not err
33 |
34 | with open(cfg_file) as f:
35 | cfg_text = f.read()
36 |
37 | assert "DaskGateway.backend_class" in cfg_text
38 | assert "Backend.cluster_options" in cfg_text
39 |
40 |
41 | def test_proxy_cli(tmpdir, monkeypatch):
42 | cfg_file = str(tmpdir.join("dask_gateway_config.py"))
43 |
44 | text = (
45 | "c.DaskGateway.address = '127.0.0.1:8888'\n"
46 | "c.Proxy.address = '127.0.0.1:8866'\n"
47 | "c.Proxy.tcp_address = '127.0.0.1:8867'\n"
48 | "c.Proxy.log_level = 'debug'\n"
49 | "c.Proxy.api_token = 'abcde'"
50 | )
51 | with open(cfg_file, "w") as f:
52 | f.write(text)
53 |
54 | called_with = []
55 |
56 | def mock_execle(*args):
57 | called_with.extend(args)
58 |
59 | monkeypatch.setattr(os, "execle", mock_execle)
60 | DaskGateway.launch_instance(["proxy", "-f", cfg_file, "--log-level", "warn"])
61 | DaskGateway.clear_instance()
62 | ProxyApp.clear_instance()
63 |
64 | assert called_with
65 | env = called_with.pop()
66 |
67 | assert called_with == [
68 | _PROXY_EXE,
69 | "dask-gateway-proxy",
70 | "-address",
71 | "127.0.0.1:8866",
72 | "-tcp-address",
73 | "127.0.0.1:8867",
74 | "-api-url",
75 | "http://127.0.0.1:8888/api/v1/routes",
76 | "-log-level",
77 | "warn",
78 | ]
79 |
80 | assert "DASK_GATEWAY_PROXY_TOKEN" in env
81 |
--------------------------------------------------------------------------------
/tests/test_local_backend.py:
--------------------------------------------------------------------------------
1 | from .utils_test import (
2 | LocalTestingBackend,
3 | temp_gateway,
4 | wait_for_workers,
5 | with_retries,
6 | )
7 |
8 |
9 | async def test_local_cluster_backend():
10 | async with temp_gateway(backend_class=LocalTestingBackend) as g:
11 | async with g.gateway_client() as gateway:
12 | async with gateway.new_cluster() as cluster:
13 | db_cluster = g.gateway.backend.db.get_cluster(cluster.name)
14 |
15 | res = await g.gateway.backend.do_check_clusters([db_cluster])
16 | assert res == [True]
17 |
18 | await cluster.scale(3)
19 | await wait_for_workers(cluster, exact=3)
20 | await cluster.scale(1)
21 | await wait_for_workers(cluster, exact=1)
22 |
23 | db_workers = list(db_cluster.workers.values())
24 |
25 | async def test():
26 | res = await g.gateway.backend.do_check_workers(db_workers)
27 | assert sum(res) == 1
28 |
29 | await with_retries(test, 20, 0.5)
30 |
31 | async with cluster.get_client(set_as_default=False) as client:
32 | res = await client.submit(lambda x: x + 1, 1)
33 | assert res == 2
34 |
35 | await cluster.scale(0)
36 | await wait_for_workers(cluster, exact=0)
37 |
38 | async def test():
39 | res = await g.gateway.backend.do_check_workers(db_workers)
40 | assert sum(res) == 0
41 |
42 | await with_retries(test, 20, 0.5)
43 |
44 | # No-op for shutdown of already shutdown worker
45 | db_worker = db_workers[0]
46 | res = await g.gateway.backend.do_stop_worker(db_worker)
47 |
48 | async def test():
49 | res = await g.gateway.backend.do_check_clusters([db_cluster])
50 | assert res == [False]
51 |
52 | await with_retries(test, 20, 0.5)
53 |
--------------------------------------------------------------------------------
/tests/test_traitlets.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from dask_gateway_server.traitlets import Command, Type
3 | from traitlets import HasTraits, TraitError
4 |
5 |
6 | def test_Type_traitlet():
7 | class Foo(HasTraits):
8 | typ = Type(klass="dask_gateway_server.auth.Authenticator")
9 |
10 | with pytest.raises(TraitError) as exc:
11 | Foo(typ="dask_gateway_server.auth.not_a_real_path")
12 | assert "Failed to import" in str(exc.value)
13 |
14 | Foo(typ="dask_gateway_server.auth.SimpleAuthenticator")
15 |
16 |
17 | def test_Command_traitlet():
18 | class C(HasTraits):
19 | cmd = Command("default command")
20 | cmd2 = Command(["default_cmd"])
21 |
22 | c = C()
23 | assert c.cmd == ["default command"]
24 | assert c.cmd2 == ["default_cmd"]
25 | c.cmd = "foo bar"
26 | assert c.cmd == ["foo bar"]
27 |
28 |
29 | def test_worker_threads_kube_cluster():
30 | kube_backend = pytest.importorskip("dask_gateway_server.backends.kubernetes")
31 | # KubeClusterConfig allows floats, so determining worker_threads is more complex
32 | assert kube_backend.KubeClusterConfig().worker_threads == 1
33 | assert kube_backend.KubeClusterConfig(worker_threads=None).worker_threads == 1
34 | assert kube_backend.KubeClusterConfig(worker_cores=0.1).worker_threads == 1
35 | assert kube_backend.KubeClusterConfig(worker_cores_limit=0.1).worker_threads == 1
36 |
37 | assert kube_backend.KubeClusterConfig(worker_cores=2.1).worker_threads == 2
38 | assert kube_backend.KubeClusterConfig(worker_cores_limit=2.1).worker_threads == 1
39 | assert (
40 | kube_backend.KubeClusterConfig(
41 | worker_cores=2.1, worker_threads=None
42 | ).worker_threads
43 | == 2
44 | )
45 | assert (
46 | kube_backend.KubeClusterConfig(
47 | worker_cores_limit=2.1, worker_threads=None
48 | ).worker_threads
49 | == 1
50 | )
51 | assert (
52 | kube_backend.KubeClusterConfig(
53 | worker_cores=2.1, worker_threads=1
54 | ).worker_threads
55 | == 1
56 | )
57 | assert (
58 | kube_backend.KubeClusterConfig(
59 | worker_cores_limit=2.1, worker_threads=1
60 | ).worker_threads
61 | == 1
62 | )
63 |
--------------------------------------------------------------------------------
/tests/test_yarn_backend.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from traitlets.config import Config
5 |
6 | skein = pytest.importorskip("skein")
7 |
8 | if not os.environ.get("TEST_DASK_GATEWAY_YARN"):
9 | pytest.skip("Not running YARN tests", allow_module_level=True)
10 |
11 | from dask_gateway.auth import BasicAuth
12 | from dask_gateway_server.backends.yarn import YarnBackend
13 |
14 | from .utils_test import temp_gateway, wait_for_workers, with_retries
15 |
16 | pytestmark = pytest.mark.usefixtures("cleanup_applications")
17 |
18 |
19 | _APPIDS = set()
20 |
21 |
22 | @pytest.fixture(scope="module")
23 | def cleanup_applications():
24 | yield
25 |
26 | if not _APPIDS:
27 | return
28 |
29 | with skein.Client(principal="dask", keytab="/home/dask/dask.keytab") as client:
30 | for appid in _APPIDS:
31 | try:
32 | client.kill_application(appid)
33 | except OSError:
34 | pass
35 | print("-- Stopped %d lost clusters --" % len(_APPIDS))
36 |
37 |
38 | class YarnTestingBackend(YarnBackend):
39 | async def do_start_cluster(self, cluster):
40 | async for state in super().do_start_cluster(cluster):
41 | _APPIDS.add(state["app_id"])
42 | yield state
43 |
44 | async def do_stop_cluster(self, cluster):
45 | appid = cluster.state.get("app_id")
46 | await super().do_stop_cluster(cluster)
47 | _APPIDS.discard(appid)
48 |
49 |
50 | @pytest.mark.timeout(90)
51 | async def test_yarn_backend():
52 | c = Config()
53 | c.YarnClusterConfig.scheduler_cmd = "/opt/python/bin/dask-scheduler"
54 | c.YarnClusterConfig.worker_cmd = "/opt/python/bin/dask-worker"
55 | c.YarnClusterConfig.scheduler_memory = "256M"
56 | c.YarnClusterConfig.worker_memory = "256M"
57 | c.YarnClusterConfig.scheduler_cores = 1
58 | c.YarnClusterConfig.worker_cores = 1
59 |
60 | c.YarnBackend.keytab = "/home/dask/dask.keytab"
61 | c.YarnBackend.principal = "dask"
62 |
63 | c.DaskGateway.backend_class = YarnTestingBackend
64 |
65 | async with temp_gateway(config=c) as g:
66 | auth = BasicAuth(username="alice")
67 | async with g.gateway_client(auth=auth) as gateway:
68 | async with gateway.new_cluster() as cluster:
69 | db_cluster = g.gateway.backend.db.get_cluster(cluster.name)
70 |
71 | res = await g.gateway.backend.do_check_clusters([db_cluster])
72 | assert res == [True]
73 |
74 | await cluster.scale(2)
75 | await wait_for_workers(cluster, exact=2)
76 | await cluster.scale(1)
77 | await wait_for_workers(cluster, exact=1)
78 |
79 | db_workers = list(db_cluster.workers.values())
80 |
81 | async def test():
82 | res = await g.gateway.backend.do_check_workers(db_workers)
83 | assert sum(res) == 1
84 |
85 | await with_retries(test, 30, 0.25)
86 |
87 | async with cluster.get_client(set_as_default=False) as client:
88 | res = await client.submit(lambda x: x + 1, 1)
89 | assert res == 2
90 |
91 | await cluster.scale(0)
92 | await wait_for_workers(cluster, exact=0)
93 |
94 | async def test():
95 | res = await g.gateway.backend.do_check_workers(db_workers)
96 | assert sum(res) == 0
97 |
98 | await with_retries(test, 30, 0.25)
99 |
100 | # No-op for shutdown of already shutdown worker
101 | async def test():
102 | res = await g.gateway.backend.do_check_clusters([db_cluster])
103 | assert res == [False]
104 |
105 | await with_retries(test, 30, 0.25)
106 |
--------------------------------------------------------------------------------