├── .flake8
├── .github
    ├── dependabot.yaml
    └── workflows
    │   ├── python-publish.yml
    │   └── test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── RELEASE.md
├── SPAWNERS.md
├── batchspawner
    ├── __init__.py
    ├── _version.py
    ├── api.py
    ├── batchspawner.py
    ├── singleuser.py
    └── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   └── test_spawners.py
├── pyproject.toml
└── setup.py


/.flake8:
--------------------------------------------------------------------------------
 1 | # flake8 is used for linting Python code setup to automatically run with
 2 | # pre-commit.
 3 | #
 4 | # ref: https://flake8.pycqa.org/en/latest/user/configuration.html
 5 | #
 6 | [flake8]
 7 | # E: style errors
 8 | # W: style warnings
 9 | # C: complexity
10 | # D: docstring warnings (unused pydocstyle extension)
11 | ignore = E, C, W, D
12 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | # dependabot.yaml reference: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 2 | #
 3 | # Notes:
 4 | # - Status and logs from dependabot are provided at
 5 | #   https://github.com/jupyterhub/tmpauthenticator/network/updates.
 6 | #
 7 | version: 2
 8 | updates:
 9 |   # Maintain dependencies in our GitHub Workflows
10 |   - package-ecosystem: github-actions
11 |     directory: /
12 |     labels: [ci]
13 |     schedule:
14 |       interval: monthly
15 |       time: "05:00"
16 |       timezone: Etc/UTC
17 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | #
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [released]
 9 | 
10 | jobs:
11 |   deploy:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - uses: actions/setup-python@v5
17 |         with:
18 |           python-version: "3.x"
19 | 
20 |       - name: install build package
21 |         run: |
22 |           pip install --upgrade pip
23 |           pip install build
24 |           pip freeze
25 | 
26 |       - name: build release
27 |         run: |
28 |           python -m build --sdist --wheel .
29 |           ls -l dist
30 |           sha256sum dist/* | tee SHA256SUMS
31 | 
32 |       - name: Publish to PyPI
33 |         env:
34 |           TWINE_USERNAME: __token__
35 |           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
36 |         run: |
37 |           pip install twine
38 |           twine upload --skip-existing dist/*
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | # This is a GitHub workflow defining a set of jobs with a set of steps.
 2 | # ref: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
 3 | #
 4 | name: Test
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     paths-ignore:
 9 |       - "**.md"
10 |       - ".github/workflows/*.yaml"
11 |       - "!.github/workflows/test.yaml"
12 |   push:
13 |     paths-ignore:
14 |       - "**.md"
15 |       - ".github/workflows/*.yaml"
16 |       - "!.github/workflows/test.yaml"
17 |     branches-ignore:
18 |       - "dependabot/**"
19 |       - "pre-commit-ci-update-config"
20 |     tags: ["**"]
21 |   workflow_dispatch:
22 | 
23 | jobs:
24 |   pytest:
25 |     name: Run pytest
26 |     runs-on: ${{ matrix.runs-on || 'ubuntu-22.04' }}
27 | 
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         include:
32 |           # test oldest supported version
33 |           - python-version: "3.6"
34 |             pip-install-spec: "jupyterhub==1.5.1 sqlalchemy==1.*"
35 |             runs-on: ubuntu-20.04 # python 3.6 is only available in 20.04
36 | 
37 |           - python-version: "3.7"
38 |             pip-install-spec: "jupyterhub==2.* sqlalchemy==1.*"
39 |           - python-version: "3.8"
40 |             pip-install-spec: "jupyterhub==3.*"
41 |           - python-version: "3.10"
42 |             pip-install-spec: "jupyterhub==4.*"
43 |           - python-version: "3.11"
44 |             pip-install-spec: "jupyterhub==4.*"
45 |           - python-version: "3.12"
46 |             pip-install-spec: "jupyterhub==4.*"
47 | 
48 |           # test unreleased jupyterhub, failures tolerated
49 |           - python-version: "3.X"
50 |             pip-install-spec: "git+https://github.com/jupyterhub/jupyterhub"
51 |             allow-failure: true
52 | 
53 |     steps:
54 |       - uses: actions/checkout@v4
55 |       - uses: actions/setup-node@v4
56 |         with:
57 |           node-version: "lts/*"
58 |       - uses: actions/setup-python@v5
59 |         with:
60 |           python-version: "${{ matrix.python-version }}"
61 | 
62 |       - name: Install Node dependencies
63 |         run: |
64 |           npm install -g configurable-http-proxy
65 | 
66 |       - name: Install Python dependencies
67 |         run: |
68 |           pip install --upgrade pip
69 |           pip install ${{ matrix.pip-install-spec }}
70 |           pip install -e ".[test]"
71 | 
72 |       - name: List dependencies
73 |         run: |
74 |           pip freeze
75 | 
76 |       - name: pytest
77 |         run: |
78 |           pytest
79 | 
80 |       # GitHub action reference: https://github.com/codecov/codecov-action
81 |       - uses: codecov/codecov-action@v4
82 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info/
2 | *.log
3 | *.pyc
4 | __pycache__/
5 | .cache/
6 | .coverage
7 | .pytest_cache
8 | *~
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # pre-commit is a tool to perform a predefined set of tasks manually and/or
 2 | # automatically before git commits are made.
 3 | #
 4 | # Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level
 5 | #
 6 | # Common tasks
 7 | #
 8 | # - Run on all files:   pre-commit run --all-files
 9 | # - Register git hooks: pre-commit install --install-hooks
10 | #
11 | repos:
12 |   # Autoformat: Python code, syntax patterns are modernized
13 |   - repo: https://github.com/asottile/pyupgrade
14 |     rev: v3.15.2
15 |     hooks:
16 |       - id: pyupgrade
17 |         args:
18 |           - --py38-plus
19 | 
20 |   # Autoformat: Python code
21 |   - repo: https://github.com/PyCQA/autoflake
22 |     rev: v2.3.1
23 |     hooks:
24 |       - id: autoflake
25 |         # args ref: https://github.com/PyCQA/autoflake#advanced-usage
26 |         args:
27 |           - --in-place
28 | 
29 |   # Autoformat: Python code
30 |   - repo: https://github.com/pycqa/isort
31 |     rev: 5.13.2
32 |     hooks:
33 |       - id: isort
34 | 
35 |   # Autoformat: Python code
36 |   - repo: https://github.com/psf/black
37 |     rev: "24.4.2"
38 |     hooks:
39 |       - id: black
40 | 
41 |   # Autoformat: markdown, yaml
42 |   - repo: https://github.com/pre-commit/mirrors-prettier
43 |     rev: v4.0.0-alpha.8
44 |     hooks:
45 |       - id: prettier
46 | 
47 |   # Lint: Python code
48 |   - repo: https://github.com/PyCQA/flake8
49 |     rev: "7.0.0"
50 |     hooks:
51 |       - id: flake8
52 | 
53 |   # Misc...
54 |   - repo: https://github.com/pre-commit/pre-commit-hooks
55 |     rev: v4.6.0
56 |     # ref: https://github.com/pre-commit/pre-commit-hooks#hooks-available
57 |     hooks:
58 |       # Autoformat: Makes sure files end in a newline and only a newline.
59 |       - id: end-of-file-fixer
60 | 
61 |       # Autoformat: Sorts entries in requirements.txt.
62 |       - id: requirements-txt-fixer
63 | 
64 |       # Prevent giant (500kB) files from being committed.
65 |       - id: check-added-large-files
66 | 
67 |       # Lint: Check for files with names that would conflict on a
68 |       # case-insensitive filesystem like MacOS HFS+ or Windows FAT.
69 |       - id: check-case-conflict
70 | 
71 |       # Lint: Checks that non-binary executables have a proper shebang.
72 |       - id: check-executables-have-shebangs
73 | 
74 | # pre-commit.ci config reference: https://pre-commit.ci/#configuration
75 | ci:
76 |   autoupdate_schedule: monthly
77 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## v1.3
  4 | 
  5 | ### v1.3.0 - 2024-03-19
  6 | 
  7 | This release requires Python >=3.6 and JupyterHub >=1.5.1.
  8 | 
  9 | #### New features added
 10 | 
 11 | - allow for req_keepvars_extra to be configured [#295](https://github.com/jupyterhub/batchspawner/pull/295) ([@mark-tomich](https://github.com/mark-tomich), [@minrk](https://github.com/minrk))
 12 | 
 13 | #### Bugs fixed
 14 | 
 15 | - Remove `which jupyterhub-singleuser` command from `SlurmSpawner.batch_script` [#265](https://github.com/jupyterhub/batchspawner/pull/265) ([@t20100](https://github.com/t20100), [@consideRatio](https://github.com/consideRatio))
 16 | 
 17 | #### Maintenance and upkeep improvements
 18 | 
 19 | - TST: don't assume test user is OS user [#301](https://github.com/jupyterhub/batchspawner/pull/301) ([@minrk](https://github.com/minrk))
 20 | - Add python 3.12 for tests [#299](https://github.com/jupyterhub/batchspawner/pull/299) ([@Ph0tonic](https://github.com/Ph0tonic), [@consideRatio](https://github.com/consideRatio))
 21 | - maint: req py36+ and jh 1.5.1+, fix tests, add RELEASE.md, add pre-commit hooks, add dependabot [#273](https://github.com/jupyterhub/batchspawner/pull/273) ([@consideRatio](https://github.com/consideRatio), [@mbmilligan](https://github.com/mbmilligan), [@ryanlovett](https://github.com/ryanlovett), [@yuvipanda](https://github.com/yuvipanda), [@mahendrapaipuri](https://github.com/mahendrapaipuri))
 22 | - Upgrade singleuser.py to JupyterHub 4 [#267](https://github.com/jupyterhub/batchspawner/pull/267) ([@mahendrapaipuri](https://github.com/mahendrapaipuri), [@minrk](https://github.com/minrk), [@consideRatio](https://github.com/consideRatio))
 23 | - Remove reading/setting HubAuth SSL attributes in singeuser [#259](https://github.com/jupyterhub/batchspawner/pull/259) ([@cmd-ntrf](https://github.com/cmd-ntrf), [@consideRatio](https://github.com/consideRatio))
 24 | - Fix Slurm test used regular expression [#256](https://github.com/jupyterhub/batchspawner/pull/256) ([@t20100](https://github.com/t20100), [@consideRatio](https://github.com/consideRatio))
 25 | - Quell async warning, and POST with body for jupyterhub 3.0 [#247](https://github.com/jupyterhub/batchspawner/pull/247) ([@ryanlovett](https://github.com/ryanlovett), [@mbmilligan](https://github.com/mbmilligan), [@rcthomas](https://github.com/rcthomas), [@minrk](https://github.com/minrk), [@jbeal-work](https://github.com/jbeal-work), [@mawigh](https://github.com/mawigh), [@cmd-ntrf](https://github.com/cmd-ntrf), [@jaescartin1](https://github.com/jaescartin1))
 26 | - Improve submit_batch_script logging [#219](https://github.com/jupyterhub/batchspawner/pull/219) ([@cmd-ntrf](https://github.com/cmd-ntrf), [@consideRatio](https://github.com/consideRatio), [@mbmilligan](https://github.com/mbmilligan))
 27 | 
 28 | #### Documentation improvements
 29 | 
 30 | - Add temporary info about a temporary bug with JupyterHub 3+ [#290](https://github.com/jupyterhub/batchspawner/pull/290) ([@krokicki](https://github.com/krokicki), [@consideRatio](https://github.com/consideRatio))
 31 | 
 32 | #### Continuous integration improvements
 33 | 
 34 | - Modernize test matrix [#252](https://github.com/jupyterhub/batchspawner/pull/252) ([@mbmilligan](https://github.com/mbmilligan))
 35 | 
 36 | #### Contributors to this release
 37 | 
 38 | The following people contributed discussions, new ideas, code and documentation contributions, and review.
 39 | See [our definition of contributors](https://github-activity.readthedocs.io/en/latest/#how-does-this-tool-define-contributions-in-the-reports).
 40 | 
 41 | ([GitHub contributors page for this release](https://github.com/jupyterhub/batchspawner/graphs/contributors?from=2022-10-05&to=2024-03-19&type=c))
 42 | 
 43 | @basnijholt ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Abasnijholt+updated%3A2022-10-05..2024-03-19&type=Issues)) | @cmd-ntrf ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Acmd-ntrf+updated%3A2022-10-05..2024-03-19&type=Issues)) | @consideRatio ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3AconsideRatio+updated%3A2022-10-05..2024-03-19&type=Issues)) | @jaescartin1 ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Ajaescartin1+updated%3A2022-10-05..2024-03-19&type=Issues)) | @jbeal-work ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Ajbeal-work+updated%3A2022-10-05..2024-03-19&type=Issues)) | @krokicki ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Akrokicki+updated%3A2022-10-05..2024-03-19&type=Issues)) | @mahendrapaipuri ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Amahendrapaipuri+updated%3A2022-10-05..2024-03-19&type=Issues)) | @mark-tomich ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Amark-tomich+updated%3A2022-10-05..2024-03-19&type=Issues)) | @mawigh ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Amawigh+updated%3A2022-10-05..2024-03-19&type=Issues)) | @mbmilligan ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Ambmilligan+updated%3A2022-10-05..2024-03-19&type=Issues)) | @minrk ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Aminrk+updated%3A2022-10-05..2024-03-19&type=Issues)) | @opoplawski ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Aopoplawski+updated%3A2022-10-05..2024-03-19&type=Issues)) | @Ph0tonic ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3APh0tonic+updated%3A2022-10-05..2024-03-19&type=Issues)) | @rcthomas ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Arcthomas+updated%3A2022-10-05..2024-03-19&type=Issues)) | @ryanlovett ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Aryanlovett+updated%3A2022-10-05..2024-03-19&type=Issues)) | @t20100 ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3At20100+updated%3A2022-10-05..2024-03-19&type=Issues)) | @yuvipanda ([activity](https://github.com/search?q=repo%3Ajupyterhub%2Fbatchspawner+involves%3Ayuvipanda+updated%3A2022-10-05..2024-03-19&type=Issues))
 44 | 
 45 | ## v1.2
 46 | 
 47 | ### v1.2.0 - 2022-10-04
 48 | 
 49 | Changed
 50 | 
 51 | - PR #237: Replace use of scripts with entry_points
 52 | - PR #208 #238 #239 #240 #241: updates to CI - bumping versions and aligning with Jupyterhub standards
 53 | - PR #220: remove code supporting Jupyterhub earlier than 0.9
 54 | 
 55 | Fixed
 56 | 
 57 | - PR #229: LSF jobs with multiple slots display each hostname ':' separated
 58 | 
 59 | ## v1.1
 60 | 
 61 | ### v1.1.0 - 2021-04-07
 62 | 
 63 | Added (user)
 64 | 
 65 | - PR #170: SlurmSpawner: add `req_gres` to specify `-go-res`.
 66 | - PR #137: GridEngineSpawner: spawner will now add the following system environment values to the spawner environment, in accordance with the Univa Admin Guide: `SGE_CELL`, `SGE_EXECD`, `SGE_ROOT`, `SGE_CLUSTER_NAME`, `SGE_QMASTER_PORT`, `SGE_EXECD_PORT`, `PATH`
 67 | 
 68 | Added (developer)
 69 | 
 70 | - PR #187: support for unknown job state
 71 | 
 72 | Changed
 73 | 
 74 | - PR #177: Fail on first error in batch script by setting `set -e` to script templates.
 75 | - PR #165: SlurmSpawner: Update template to use `--chdir` instead of `--workdir`. Users of Slurm older than 17.11 may need to revert this locally.
 76 | - PR #189: remove bashism from default script template
 77 | - PR #195: fix exception handling in run_command
 78 | - PR #198: change from Travis to gh-actions for testing
 79 | - PR #196: documentation
 80 | - PR #199: update setup.py
 81 | 
 82 | ## v1.0
 83 | 
 84 | ### v1.0.1 - 2020-11-04
 85 | 
 86 | - PR #189: batchspawner/batchspawner: Don't use `-o pipefail` in /bin/sh scripts
 87 | - PR #180: travis: Attempt to fix CI
 88 | - PR #177: Fail hard on first error in batch script
 89 | - PR #170: add 'gres' option to SlurmSpawner
 90 | - PR #165: Update batchspawner.py to use --chdir instead of --workdir
 91 | - PR #137: Grab environment variables needed for grid engine
 92 | 
 93 | ### v1.0.0 - 2020-07-21
 94 | 
 95 | This release requires minimum JupyterHub 0.9 and Python 3.5.
 96 | 
 97 | Added (user)
 98 | 
 99 | - Add support for JupyterHub named servers. #167
100 | - Add Jinja2 templating as an option for all scripts and commands. If '{{' or `{%` is used anywhere in the string, it is used as a jinja2 template.
101 | - Add new option exec_prefix, which defaults to `sudo -E -u {username}`. This replaces explicit `sudo` in every batch command - changes in local commands may be needed.
102 | - New option: `req_keepvars_extra`, which allows keeping extra variables in addition to what is defined by JupyterHub itself (addition of variables to keep instead of replacement). #99
103 | - Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96
104 | - SlurmSpawner: add the `req_reservation` option. #91
105 | - Add basic support for JupyterHub progress updates, but this is not used much yet. #86
106 | 
107 | Added (developer)
108 | 
109 | - Add many more tests.
110 | - Add a new page `SPAWNERS.md` which information on specific spawners. Begin trying to collect a list of spawner-specific contacts. #97
111 | - Rename `current_ip` and `current_port` commands to `ip` and `port`. No user impact. #139
112 | - Update to Python 3.5 `async` / `await` syntax to support JupyterHub progress updates. #90
113 | 
114 | Changed
115 | 
116 | - PR #58 and #141 changes logic of port selection, so that it is selected _after_ the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes:
117 |   - `jupyterhub_config.py` _must_ explicitely import `batchspawner`
118 |   - Add a new option `batchspawner_singleuser_cmd` which is used as a wrapper in the single-user servers, which conveys the remote port back to JupyterHub. This is now an integral part of the spawn process.
119 |   - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`.
120 | - Update minimum requirements to JupyterHub 0.9 and Python 3.5. #143
121 | - Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited.
122 | - Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these environment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82.
123 | 
124 | Fixed
125 | 
126 | - Improve debugging on failed submission by raising errors including error messages from the commands. #106
127 | - Many other non-user or developer visible changes. #107 #106 #100
128 | - In Travis CI, blacklist jsonschema=3.0.0a1 because it breaks tests
129 | 
130 | Removed
131 | 
132 | ## v0.8
133 | 
134 | ### v0.8.1 - 2018-05-02
135 | 
136 | - Fix regression: single-user server binding address is overwritten by previous session server address, resulting in failure to start. Issue #76
137 | 
138 | ### v0.8.0 - 2018-04-24
139 | 
140 | This release is compatible with JupyterHub 0.5.0 through 0.8.1/0.9dev.
141 | 
142 | - SlurmSpawner: Remove `--uid` for (at least) Slurm 17.11 compatibility. If you use `sudo`, this should not be necessary, but because this is security related you should check that user management is as you expect. If your configuration does not use `sudo` then you may need to add the `--uid` option in a custom `batch_script`.
143 | - add base options `req_ngpus` `req_partition` `req_account` and `req_options`
144 | - Fix up logging
145 | - Merge `user_options` with the template substitution vars instead of having it as a separate key
146 | - Update ip/port handling for JupyterHub 0.8
147 | - Add `LICENSE` (BSD3) and `CONTRIBUTING.md`
148 | - Add `LsfSpawner` for IBM LFS
149 | - Add `MultiSlurmSpawner`
150 | - Add `MoabSpawner`
151 | - Add `condorSpawner`
152 | - Add `GridEngineSpawner`
153 | - SlurmSpawner: add `req_qos` option
154 | - WrapSpawner and ProfilesSpawner, which provide mechanisms for runtime configuration of spawners, have been split out and moved to the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner) package
155 | - Enable CI testing via Travis-CI
156 | 
157 | ## v0.3
158 | 
159 | ### v0.3.0 - 2015-11-30
160 | 
161 | - initial release containing `TorqueSpawner` and `SlurmSpawner`
162 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | Welcome! As a [Jupyter](https://jupyter.org) project, we follow the [Jupyter contributor guide](https://jupyter.readthedocs.io/en/latest/contributing/content-contributor.html).
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Project Jupyter Contributors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # batchspawner for Jupyterhub
  2 | 
  3 | [![Latest PyPI version](https://img.shields.io/pypi/v/batchspawner?logo=pypi)](https://pypi.python.org/pypi/batchspawner)
  4 | [![Latest conda-forge version](https://img.shields.io/conda/vn/conda-forge/batchspawner?logo=conda-forge)](https://anaconda.org/conda-forge/batchspawner)
  5 | [![GitHub Workflow Status - Test](https://img.shields.io/github/actions/workflow/status/jupyterhub/batchspawner/test.yaml?logo=github&label=tests)](https://github.com/jupyterhub/batchspawner/actions)
  6 | [![Test coverage of code](https://codecov.io/gh/jupyterhub/batchspawner/branch/main/graph/badge.svg)](https://codecov.io/gh/jupyterhub/batchspawner)
  7 | [![Issue tracking - GitHub](https://img.shields.io/badge/issue_tracking-github-blue?logo=github)](https://github.com/jupyterhub/batchspawner/issues)
  8 | [![Help forum - Discourse](https://img.shields.io/badge/help_forum-discourse-blue?logo=discourse)](https://discourse.jupyter.org/c/jupyterhub)
  9 | [![Contribute](https://img.shields.io/badge/I_want_to_contribute!-grey?logo=jupyter)](https://github.com/jupyterhub/batchspawner/blob/master/CONTRIBUTING.md)
 10 | 
 11 | This is a custom spawner for [Jupyterhub](https://jupyterhub.readthedocs.io/) that is designed for installations on clusters using batch scheduling software.
 12 | 
 13 | This began as a generalization of [mkgilbert's batchspawner](https://github.com/mkgilbert/slurmspawner) which in turn was inspired by [Andrea Zonca's blog post](http://zonca.github.io/2015/04/jupyterhub-hpc.html "Run jupyterhub on a Supercomputer") where he explains his implementation for a spawner that uses SSH and Torque. His github repo is found [here](http://www.github.com/zonca/remotespawner "RemoteSpawner").
 14 | 
 15 | This package formerly included WrapSpawner and ProfilesSpawner, which provide mechanisms for runtime configuration of spawners. These have been split out and moved to the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner) package.
 16 | 
 17 | ## Installation
 18 | 
 19 | 1. from root directory of this repo (where setup.py is), run `pip install -e .`
 20 | 
 21 |    If you don't actually need an editable version, you can simply run
 22 |    `pip install batchspawner`
 23 | 
 24 | 2. add lines in jupyterhub_config.py for the spawner you intend to use, e.g.
 25 | 
 26 |    ```python
 27 |       c = get_config()
 28 |       c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner'
 29 |       import batchspawner    # Even though not used, needed to register batchspawner interface
 30 |    ```
 31 | 
 32 | 3. Depending on the spawner, additional configuration will likely be needed.
 33 | 
 34 | ## Batch Spawners
 35 | 
 36 | For information on the specific spawners, see [SPAWNERS.md](SPAWNERS.md).
 37 | 
 38 | ### Overview
 39 | 
 40 | This file contains an abstraction layer for batch job queueing systems (`BatchSpawnerBase`), and implements
 41 | Jupyterhub spawners for Torque, Moab, SLURM, SGE, HTCondor, LSF, and eventually others.
 42 | Common attributes of batch submission / resource manager environments will include notions of:
 43 | 
 44 | - queue names, resource manager addresses
 45 | - resource limits including runtime, number of processes, memory
 46 | - singleuser child process running on (usually remote) host not known until runtime
 47 | - job submission and monitoring via resource manager utilities
 48 | - remote execution via submission of templated scripts
 49 | - job names instead of PIDs
 50 | 
 51 | `BatchSpawnerBase` provides several general mechanisms:
 52 | 
 53 | - configurable traits `req_foo` that are exposed as `{foo}` in job template scripts. Templates (submit scripts in particular) may also use the full power of [jinja2](http://jinja.pocoo.org/). Templates are automatically detected if a `{{` or `{%` is present, otherwise str.format() used.
 54 | - configurable command templates for submitting/querying/cancelling jobs
 55 | - a generic concept of job-ID and ID-based job state tracking
 56 | - overrideable hooks for subclasses to plug in logic at numerous points
 57 | 
 58 | ### Example
 59 | 
 60 | Every effort has been made to accommodate highly diverse systems through configuration
 61 | only. This example consists of the (lightly edited) configuration used by the author
 62 | to run Jupyter notebooks on an academic supercomputer cluster.
 63 | 
 64 | ```python
 65 | # Select the Torque backend and increase the timeout since batch jobs may take time to start
 66 | import batchspawner
 67 | c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner'
 68 | c.Spawner.http_timeout = 120
 69 | 
 70 | #------------------------------------------------------------------------------
 71 | # BatchSpawnerBase configuration
 72 | #    These are simply setting parameters used in the job script template below
 73 | #------------------------------------------------------------------------------
 74 | c.BatchSpawnerBase.req_nprocs = '2'
 75 | c.BatchSpawnerBase.req_queue = 'mesabi'
 76 | c.BatchSpawnerBase.req_host = 'mesabi.xyz.edu'
 77 | c.BatchSpawnerBase.req_runtime = '12:00:00'
 78 | c.BatchSpawnerBase.req_memory = '4gb'
 79 | #------------------------------------------------------------------------------
 80 | # TorqueSpawner configuration
 81 | #    The script below is nearly identical to the default template, but we needed
 82 | #    to add a line for our local environment. For most sites the default templates
 83 | #    should be a good starting point.
 84 | #------------------------------------------------------------------------------
 85 | c.TorqueSpawner.batch_script = '''#!/bin/sh
 86 | #PBS -q {queue}@{host}
 87 | #PBS -l walltime={runtime}
 88 | #PBS -l nodes=1:ppn={nprocs}
 89 | #PBS -l mem={memory}
 90 | #PBS -N jupyterhub-singleuser
 91 | #PBS -v {keepvars}
 92 | module load python3
 93 | {cmd}
 94 | '''
 95 | # For our site we need to munge the execution hostname returned by qstat
 96 | c.TorqueSpawner.state_exechost_exp = r'int-\1.mesabi.xyz.edu'
 97 | ```
 98 | 
 99 | ### Security
100 | 
101 | Unless otherwise stated for a specific spawner, assume that spawners
102 | _do_ evaluate shell environment for users and thus the [security
103 | requirements of JupyterHub security for untrusted
104 | users](https://jupyterhub.readthedocs.io/en/stable/reference/websecurity.html)
105 | are not fulfilled because some (most?) spawners _do_ start a user
106 | shell which will execute arbitrary user environment configuration
107 | (`.profile`, `.bashrc` and the like) unless users do not have
108 | access to their own cluster user account. This is something which we
109 | are working on.
110 | 
111 | ## Provide different configurations of BatchSpawner
112 | 
113 | ### Overview
114 | 
115 | `ProfilesSpawner`, available as part of the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner)
116 | package, allows the Jupyterhub administrator to define a set of different spawning configurations,
117 | both different spawners and different configurations of the same spawner.
118 | The user is then presented a dropdown menu for choosing the most suitable configuration for their needs.
119 | 
120 | This method provides an easy and safe way to provide different configurations of `BatchSpawner` to the
121 | users, see an example below.
122 | 
123 | ### Example
124 | 
125 | The following is based on the author's configuration (at the same site as the example above)
126 | showing how to give users access to multiple job configurations on the batch scheduled
127 | clusters, as well as an option to run a local notebook directly on the jupyterhub server.
128 | 
129 | ```python
130 | # Same initial setup as the previous example
131 | import batchspawner
132 | c.JupyterHub.spawner_class = 'wrapspawner.ProfilesSpawner'
133 | c.Spawner.http_timeout = 120
134 | #------------------------------------------------------------------------------
135 | # BatchSpawnerBase configuration
136 | #   Providing default values that we may omit in the profiles
137 | #------------------------------------------------------------------------------
138 | c.BatchSpawnerBase.req_host = 'mesabi.xyz.edu'
139 | c.BatchSpawnerBase.req_runtime = '12:00:00'
140 | c.TorqueSpawner.state_exechost_exp = r'in-\1.mesabi.xyz.edu'
141 | #------------------------------------------------------------------------------
142 | # ProfilesSpawner configuration
143 | #------------------------------------------------------------------------------
144 | # List of profiles to offer for selection. Signature is:
145 | #   List(Tuple( Unicode, Unicode, Type(Spawner), Dict ))
146 | # corresponding to profile display name, unique key, Spawner class,
147 | # dictionary of spawner config options.
148 | #
149 | # The first three values will be exposed in the input_template as {display},
150 | # {key}, and {type}
151 | #
152 | c.ProfilesSpawner.profiles = [
153 |    ( "Local server", 'local', 'jupyterhub.spawner.LocalProcessSpawner', {'ip':'0.0.0.0'} ),
154 |    ('Mesabi - 2 cores, 4 GB, 8 hours', 'mesabi2c4g12h', 'batchspawner.TorqueSpawner',
155 |       dict(req_nprocs='2', req_queue='mesabi', req_runtime='8:00:00', req_memory='4gb')),
156 |    ('Mesabi - 12 cores, 128 GB, 4 hours', 'mesabi128gb', 'batchspawner.TorqueSpawner',
157 |       dict(req_nprocs='12', req_queue='ram256g', req_runtime='4:00:00', req_memory='125gb')),
158 |    ('Mesabi - 2 cores, 4 GB, 24 hours', 'mesabi2c4gb24h', 'batchspawner.TorqueSpawner',
159 |       dict(req_nprocs='2', req_queue='mesabi', req_runtime='24:00:00', req_memory='4gb')),
160 |    ('Interactive Cluster - 2 cores, 4 GB, 8 hours', 'lab', 'batchspawner.TorqueSpawner',
161 |       dict(req_nprocs='2', req_host='labhost.xyz.edu', req_queue='lab',
162 |           req_runtime='8:00:00', req_memory='4gb', state_exechost_exp='')),
163 |    ]
164 | c.ProfilesSpawner.ip = '0.0.0.0'
165 | ```
166 | 
167 | ## Debugging batchspawner
168 | 
169 | Sometimes it can be hard to debug batchspawner, but it's not really
170 | once you know how the pieces interact. Check the following places for
171 | error messages:
172 | 
173 | - Check the JupyterHub logs for errors.
174 | 
175 | - Check the JupyterHub logs for the batch script that got submitted
176 |   and the command used to submit it. Are these correct? (Note that
177 |   there are submission environment variables too, which aren't
178 |   displayed.)
179 | 
180 | - At this point, it's a matter of checking the batch system. Is the
181 |   job ever scheduled? Does it run? Does it succeed? Check the batch
182 |   system status and output of the job. The most comon failure
183 |   patterns are a) job never starting due to bad scheduler options, b)
184 |   job waiting in the queue beyond the `start_timeout`, causing
185 |   JupyterHub to kill the job.
186 | 
187 | - At this point the job starts. Does it fail immediately, or before
188 |   Jupyter starts? Check the scheduler output files (stdout/stderr of
189 |   the job), wherever it is stored. To debug the job script, you can
190 |   add debugging into the batch script, such as an `env` or `set -x`.
191 | 
192 | - At this point Jupyter itself starts - check its error messages. Is
193 |   it starting with the right options? Can it communicate with the
194 |   hub? At this point there usually isn't anything
195 |   batchspawner-specific, with the one exception below. The error log
196 |   would be in the batch script output (same file as above). There may
197 |   also be clues in the JupyterHub logfile.
198 | - Are you running on an NFS filesystem? It's possible for Jupyter to
199 |   experience issues due to varying implementations of the fcntl() system
200 |   call. (See also [Jupyterhub-Notes and Tips: SQLite](https://jupyterhub.readthedocs.io/en/latest/reference/database.html?highlight=NFS#sqlite))
201 | 
202 | Common problems:
203 | 
204 | - Did you `import batchspawner` in the `jupyterhub_config.py` file?
205 |   This is needed in order to activate the batchspawer API in
206 |   JupyterHub.
207 | 
208 | ## Changelog
209 | 
210 | See [CHANGELOG.md](CHANGELOG.md).
211 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # How to make a release
 2 | 
 3 | `batchspawner` is a package available on [PyPI] and on [conda-forge].
 4 | 
 5 | These are the instructions on how to make a release.
 6 | 
 7 | ## Pre-requisites
 8 | 
 9 | - Push rights to this GitHub repository
10 | 
11 | ## Steps to make a release
12 | 
13 | 1. Create a PR updating `CHANGELOG.md` with [github-activity] and continue when
14 |    its merged.
15 | 
16 |    Advice on this procedure can be found in [this team compass
17 |    issue](https://github.com/jupyterhub/team-compass/issues/563).
18 | 
19 | 2. Checkout main and make sure it is up to date.
20 | 
21 |    ```shell
22 |    git checkout main
23 |    git fetch origin main
24 |    git reset --hard origin/main
25 |    ```
26 | 
27 | 3. Update the version, make commits, and push a git tag with `tbump`.
28 | 
29 |    ```shell
30 |    pip install tbump
31 |    ```
32 | 
33 |    `tbump` will ask for confirmation before doing anything.
34 | 
35 |    ```shell
36 |    # Example versions to set: 1.0.0, 1.0.0b1
37 |    VERSION=
38 |    tbump ${VERSION}
39 |    ```
40 | 
41 |    Following this, the [CI system] will build and publish a release.
42 | 
43 | 4. Reset the version back to dev, e.g. `1.0.1.dev` after releasing `1.0.0`.
44 | 
45 |    ```shell
46 |    # Example version to set: 1.0.1.dev
47 |    NEXT_VERSION=
48 |    tbump --no-tag ${NEXT_VERSION}.dev
49 |    ```
50 | 
51 | 5. Following the release to PyPI, an automated PR should arrive within 24 hours
52 |    to [conda-forge/batchspawner-feedstock] with instructions on releasing to
53 |    conda-forge. You are welcome to volunteer doing this, but aren't required as
54 |    part of making this release to PyPI.
55 | 
56 | [github-activity]: https://github.com/executablebooks/github-activity
57 | [pypi]: https://pypi.org/project/batchspawner/
58 | [ci system]: https://github.com/jupyterhub/batchspawner/actions/workflows/release.yaml
59 | [conda-forge]: https://anaconda.org/conda-forge/batchspawner
60 | [conda-forge/batchspawner-feedstock]: https://github.com/conda-forge/batchspawner-feedstock
61 | 


--------------------------------------------------------------------------------
/SPAWNERS.md:
--------------------------------------------------------------------------------
 1 | # Notes on specific spawners
 2 | 
 3 | **Spawner maintainers**: Included below are "spawner maintainers",
 4 | when available. There aren't official obligations, but the general
 5 | idea is that you should watch the repository and feel especially
 6 | empowered to comment on issues when you think it might be relevant to
 7 | you (obviously everyone should be, but this is our attempt at even
 8 | more outreach). You should let us know when we break something and
 9 | provide a diversity of opinions in general. Submitting PRs and
10 | testing is nice but not required.
11 | 
12 | To be listed as a maintainer, just submit an issue or PR adding you,
13 | and please watch the repository on Github.
14 | 
15 | ## `TorqueSpawner`
16 | 
17 | Maintainers:
18 | 
19 | ## `MoabSpawner`
20 | 
21 | Subclass of TorqueSpawner
22 | 
23 | Maintainers:
24 | 
25 | ## `SlurmSpawner`
26 | 
27 | Maintainers: @rkdarst
28 | 
29 | This spawner enforces the environment if `srun` is used to wrap the
30 | spawner command, which is the default. If you _do_ want user
31 | environment to be used, set `req_srun=''`. However, this is not
32 | perfect: there is still a bash shell begun as the user which could run
33 | arbitrary startup, define shell aliases for `srun`, etc.
34 | 
35 | Use of `srun` is required to gracefully terminate.
36 | 
37 | ## `GridengineSpawner`
38 | 
39 | Maintainers:
40 | 
41 | ## `CondorSpawner`
42 | 
43 | Maintainers:
44 | 
45 | ## `LsfSpawner`
46 | 
47 | Maintainers:
48 | 
49 | # Checklist for making spawners
50 | 
51 | Please document each of these things under the spawner list above, -
52 | even if it is "OK", we need to track status of all spawners. If it is
53 | a bug, users really need to know.
54 | 
55 | - Does your spawner read shell environment before starting? (See
56 |   [Jupyterhub
57 |   Security](https://jupyterhub.readthedocs.io/en/stable/reference/websecurity.html).
58 | 
59 | - Does your spawner send SIGTERM to the jupyterhub-singleuser process
60 |   before SIGKILL? It should, so that the process can terminate
61 |   gracefully. Add `echo "terminated gracefully"` to the end of the
62 |   batch script - if you see this in your singleuser server output, you
63 |   know that you DO receive SIGTERM and terminate gracefully. If your
64 |   batch system can not automatically send SIGTERM before SIGKILL, PR
65 |   #75 might help here, ask for it to be finished.
66 | 


--------------------------------------------------------------------------------
/batchspawner/__init__.py:
--------------------------------------------------------------------------------
1 | from . import api  # noqa
2 | from ._version import __version__, version_info  # noqa
3 | from .batchspawner import *  # noqa
4 | 


--------------------------------------------------------------------------------
/batchspawner/_version.py:
--------------------------------------------------------------------------------
1 | # __version__ should be updated using tbump, based on configuration in
2 | # pyproject.toml, according to instructions in RELEASE.md.
3 | #
4 | __version__ = "1.3.1.dev"
5 | 
6 | # version_info looks like (1, 2, 3, "dev") if __version__ is 1.2.3.dev
7 | version_info = tuple(int(p) if p.isdigit() else p for p in __version__.split("."))
8 | 


--------------------------------------------------------------------------------
/batchspawner/api.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from jupyterhub.apihandlers import APIHandler, default_handlers
 4 | from tornado import web
 5 | 
 6 | 
 7 | class BatchSpawnerAPIHandler(APIHandler):
 8 |     @web.authenticated
 9 |     def post(self):
10 |         """POST set user spawner data"""
11 |         if hasattr(self, "current_user"):
12 |             # Jupyterhub compatability, (september 2018, d79a99323ef1d)
13 |             user = self.current_user
14 |         else:
15 |             # Previous jupyterhub, 0.9.4 and before.
16 |             user = self.get_current_user()
17 |         token = self.get_auth_token()
18 |         spawner = None
19 |         for s in user.spawners.values():
20 |             if s.api_token == token:
21 |                 spawner = s
22 |                 break
23 |         data = self.get_json_body()
24 |         for key, value in data.items():
25 |             if hasattr(spawner, key):
26 |                 setattr(spawner, key, value)
27 |         self.finish(json.dumps({"message": "BatchSpawner data configured"}))
28 |         self.set_status(201)
29 | 
30 | 
31 | default_handlers.append((r"/api/batchspawner", BatchSpawnerAPIHandler))
32 | 


--------------------------------------------------------------------------------
/batchspawner/batchspawner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Regents of the University of Minnesota
  2 | # Copyright (c) Michael Gilbert
  3 | # Distributed under the terms of the Modified BSD License.
  4 | 
  5 | """Batch spawners
  6 | 
  7 | This file contains an abstraction layer for batch job queueing systems, and implements
  8 | Jupyterhub spawners for Torque, SLURM, and eventually others.
  9 | 
 10 | Common attributes of batch submission / resource manager environments will include notions of:
 11 |   * queue names, resource manager addresses
 12 |   * resource limits including runtime, number of processes, memory
 13 |   * singleuser child process running on (usually remote) host not known until runtime
 14 |   * job submission and monitoring via resource manager utilities
 15 |   * remote execution via submission of templated scripts
 16 |   * job names instead of PIDs
 17 | """
 18 | import asyncio
 19 | import os
 20 | import pwd
 21 | import re
 22 | import xml.etree.ElementTree as ET
 23 | from enum import Enum
 24 | 
 25 | from jinja2 import Template
 26 | from jupyterhub.spawner import Spawner, set_user_setuid
 27 | from traitlets import Dict, Float, Integer, Unicode, default
 28 | 
 29 | 
 30 | def format_template(template, *args, **kwargs):
 31 |     """Format a template, either using jinja2 or str.format().
 32 | 
 33 |     Use jinja2 if the template is a jinja2.Template, or contains '{{' or
 34 |     '{%'.  Otherwise, use str.format() for backwards compatability with
 35 |     old scripts (but you can't mix them).
 36 |     """
 37 |     if isinstance(template, Template):
 38 |         return template.render(*args, **kwargs)
 39 |     elif "{{" in template or "{%" in template:
 40 |         return Template(template).render(*args, **kwargs)
 41 |     return template.format(*args, **kwargs)
 42 | 
 43 | 
 44 | class JobStatus(Enum):
 45 |     NOTFOUND = 0
 46 |     RUNNING = 1
 47 |     PENDING = 2
 48 |     UNKNOWN = 3
 49 | 
 50 | 
 51 | class BatchSpawnerBase(Spawner):
 52 |     """Base class for spawners using resource manager batch job submission mechanisms
 53 | 
 54 |     This base class is developed targetting the TorqueSpawner and SlurmSpawner, so by default
 55 |     assumes a qsub-like command that reads a script from its stdin for starting jobs,
 56 |     a qstat-like command that outputs some data that can be parsed to check if the job is running
 57 |     and on what remote node, and a qdel-like command to cancel a job. The goal is to be
 58 |     sufficiently general that a broad range of systems can be supported with minimal overrides.
 59 | 
 60 |     At minimum, subclasses should provide reasonable defaults for the traits:
 61 |         batch_script
 62 |         batch_submit_cmd
 63 |         batch_query_cmd
 64 |         batch_cancel_cmd
 65 | 
 66 |     and must provide implementations for the methods:
 67 |         state_ispending
 68 |         state_isrunning
 69 |         state_gethost
 70 |     """
 71 | 
 72 |     # override default since batch systems typically need longer
 73 |     start_timeout = Integer(300).tag(config=True)
 74 | 
 75 |     # override default server ip since batch jobs normally running remotely
 76 |     ip = Unicode(
 77 |         "0.0.0.0",
 78 |         help="Address for singleuser server to listen at",
 79 |     ).tag(config=True)
 80 | 
 81 |     exec_prefix = Unicode(
 82 |         "sudo -E -u {username}",
 83 |         help="Standard executon prefix (e.g. the default sudo -E -u {username})",
 84 |     ).tag(config=True)
 85 | 
 86 |     # all these req_foo traits will be available as substvars for templated strings
 87 |     req_queue = Unicode(
 88 |         "",
 89 |         help="Queue name to submit job to resource manager",
 90 |     ).tag(config=True)
 91 | 
 92 |     req_host = Unicode(
 93 |         "",
 94 |         help="Host name of batch server to submit job to resource manager",
 95 |     ).tag(config=True)
 96 | 
 97 |     req_memory = Unicode(
 98 |         "",
 99 |         help="Memory to request from resource manager",
100 |     ).tag(config=True)
101 | 
102 |     req_nprocs = Unicode(
103 |         "",
104 |         help="Number of processors to request from resource manager",
105 |     ).tag(config=True)
106 | 
107 |     req_ngpus = Unicode(
108 |         "",
109 |         help="Number of GPUs to request from resource manager",
110 |     ).tag(config=True)
111 | 
112 |     req_runtime = Unicode(
113 |         "",
114 |         help="Length of time for submitted job to run",
115 |     ).tag(config=True)
116 | 
117 |     req_partition = Unicode(
118 |         "",
119 |         help="Partition name to submit job to resource manager",
120 |     ).tag(config=True)
121 | 
122 |     req_account = Unicode(
123 |         "",
124 |         help="Account name string to pass to the resource manager",
125 |     ).tag(config=True)
126 | 
127 |     req_options = Unicode(
128 |         "",
129 |         help="Other options to include into job submission script",
130 |     ).tag(config=True)
131 | 
132 |     req_prologue = Unicode(
133 |         "",
134 |         help="Script to run before single user server starts.",
135 |     ).tag(config=True)
136 | 
137 |     req_epilogue = Unicode(
138 |         "",
139 |         help="Script to run after single user server ends.",
140 |     ).tag(config=True)
141 | 
142 |     req_username = Unicode()
143 | 
144 |     @default("req_username")
145 |     def _req_username_default(self):
146 |         return self.user.name
147 | 
148 |     # Useful IF getpwnam on submit host returns correct info for exec host
149 |     req_homedir = Unicode()
150 | 
151 |     @default("req_homedir")
152 |     def _req_homedir_default(self):
153 |         return pwd.getpwnam(self.user.name).pw_dir
154 | 
155 |     req_keepvars = Unicode()
156 | 
157 |     @default("req_keepvars")
158 |     def _req_keepvars_default(self):
159 |         return ",".join(self.get_env().keys())
160 | 
161 |     req_keepvars_extra = Unicode(
162 |         help="Extra environment variables which should be configured, "
163 |         "added to the defaults in keepvars, "
164 |         "comma separated list.",
165 |     ).tag(config=True)
166 | 
167 |     batch_script = Unicode(
168 |         "",
169 |         help="Template for job submission script. Traits on this class named like req_xyz "
170 |         "will be substituted in the template for {xyz} using string.Formatter. "
171 |         "Must include {cmd} which will be replaced with the jupyterhub-singleuser command line.",
172 |     ).tag(config=True)
173 | 
174 |     batchspawner_singleuser_cmd = Unicode(
175 |         "batchspawner-singleuser",
176 |         help="A wrapper which is capable of special batchspawner setup: currently sets the port on "
177 |         "the remote host.  Not needed to be set under normal circumstances, unless path needs "
178 |         "specification.",
179 |     ).tag(config=True)
180 | 
181 |     # Raw output of job submission command unless overridden
182 |     job_id = Unicode()
183 | 
184 |     # Will get the raw output of the job status command unless overridden
185 |     job_status = Unicode()
186 | 
187 |     # Prepare substitution variables for templates using req_xyz traits
188 |     def get_req_subvars(self):
189 |         reqlist = [t for t in self.trait_names() if t.startswith("req_")]
190 |         subvars = {}
191 |         for t in reqlist:
192 |             subvars[t[4:]] = getattr(self, t)
193 |         if subvars.get("keepvars_extra"):
194 |             subvars["keepvars"] += "," + subvars["keepvars_extra"]
195 |         return subvars
196 | 
197 |     batch_submit_cmd = Unicode(
198 |         "",
199 |         help="Command to run to submit batch scripts. Formatted using req_xyz traits as {xyz}.",
200 |     ).tag(config=True)
201 | 
202 |     def parse_job_id(self, output):
203 |         "Parse output of submit command to get job id."
204 |         return output
205 | 
206 |     def cmd_formatted_for_batch(self):
207 |         """The command which is substituted inside of the batch script"""
208 |         return " ".join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args())
209 | 
210 |     async def run_command(self, cmd, input=None, env=None):
211 |         proc = await asyncio.create_subprocess_shell(
212 |             cmd,
213 |             env=env,
214 |             stdin=asyncio.subprocess.PIPE,
215 |             stdout=asyncio.subprocess.PIPE,
216 |             stderr=asyncio.subprocess.PIPE,
217 |         )
218 |         inbytes = None
219 | 
220 |         if input:
221 |             inbytes = input.encode()
222 | 
223 |         try:
224 |             out, eout = await proc.communicate(input=inbytes)
225 |         except:
226 |             self.log.debug("Exception raised when trying to run command: %s" % cmd)
227 |             proc.kill()
228 |             self.log.debug("Running command failed, killed process.")
229 |             try:
230 |                 out, eout = await asyncio.wait_for(proc.communicate(), timeout=2)
231 |                 out = out.decode().strip()
232 |                 eout = eout.decode().strip()
233 |                 self.log.error("Subprocess returned exitcode %s" % proc.returncode)
234 |                 self.log.error("Stdout:")
235 |                 self.log.error(out)
236 |                 self.log.error("Stderr:")
237 |                 self.log.error(eout)
238 |                 raise RuntimeError(f"{cmd} exit status {proc.returncode}: {eout}")
239 |             except asyncio.TimeoutError:
240 |                 self.log.error(
241 |                     "Encountered timeout trying to clean up command, process probably killed already: %s"
242 |                     % cmd
243 |                 )
244 |                 return ""
245 |             except:
246 |                 self.log.error(
247 |                     "Encountered exception trying to clean up command: %s" % cmd
248 |                 )
249 |                 raise
250 |         else:
251 |             eout = eout.decode().strip()
252 |             err = proc.returncode
253 |             if err != 0:
254 |                 self.log.error("Subprocess returned exitcode %s" % err)
255 |                 self.log.error(eout)
256 |                 raise RuntimeError(eout)
257 | 
258 |         out = out.decode().strip()
259 |         return out
260 | 
261 |     async def _get_batch_script(self, **subvars):
262 |         """Format batch script from vars"""
263 |         # Could be overridden by subclasses, but mainly useful for testing
264 |         return format_template(self.batch_script, **subvars)
265 | 
266 |     async def submit_batch_script(self):
267 |         subvars = self.get_req_subvars()
268 |         # `cmd` is submitted to the batch system
269 |         cmd = " ".join(
270 |             (
271 |                 format_template(self.exec_prefix, **subvars),
272 |                 format_template(self.batch_submit_cmd, **subvars),
273 |             )
274 |         )
275 |         # `subvars['cmd']` is what is run _inside_ the batch script,
276 |         # put into the template.
277 |         subvars["cmd"] = self.cmd_formatted_for_batch()
278 |         if hasattr(self, "user_options"):
279 |             subvars.update(self.user_options)
280 |         script = await self._get_batch_script(**subvars)
281 |         self.log.info("Spawner script options: %s", subvars)
282 |         self.log.info("Spawner submitting command: %s", cmd)
283 |         self.log.debug("Spawner submitting script:\n%s", script)
284 |         self.log.debug("Spawner submitting environment: %s", self.get_env())
285 |         out = await self.run_command(cmd, input=script, env=self.get_env())
286 |         try:
287 |             self.log.info("Job submitted. output: %s", out)
288 |             self.job_id = self.parse_job_id(out)
289 |         except:
290 |             self.log.error("Job submission failed. exit code: %s", out)
291 |             self.job_id = ""
292 |         return self.job_id
293 | 
294 |     # Override if your batch system needs something more elaborate to query the job status
295 |     batch_query_cmd = Unicode(
296 |         "",
297 |         help="Command to run to query job status. Formatted using req_xyz traits as {xyz} "
298 |         "and self.job_id as {job_id}.",
299 |     ).tag(config=True)
300 | 
301 |     async def query_job_status(self):
302 |         """Check job status, return JobStatus object."""
303 |         if self.job_id is None or len(self.job_id) == 0:
304 |             self.job_status = ""
305 |             return JobStatus.NOTFOUND
306 |         subvars = self.get_req_subvars()
307 |         subvars["job_id"] = self.job_id
308 |         cmd = " ".join(
309 |             (
310 |                 format_template(self.exec_prefix, **subvars),
311 |                 format_template(self.batch_query_cmd, **subvars),
312 |             )
313 |         )
314 |         self.log.debug("Spawner querying job: " + cmd)
315 |         try:
316 |             self.job_status = await self.run_command(cmd)
317 |         except RuntimeError as e:
318 |             # e.args[0] is stderr from the process
319 |             self.job_status = e.args[0]
320 |         except Exception:
321 |             self.log.error("Error querying job " + self.job_id)
322 |             self.job_status = ""
323 | 
324 |         if self.state_isrunning():
325 |             return JobStatus.RUNNING
326 |         elif self.state_ispending():
327 |             return JobStatus.PENDING
328 |         elif self.state_isunknown():
329 |             return JobStatus.UNKNOWN
330 |         else:
331 |             return JobStatus.NOTFOUND
332 | 
333 |     batch_cancel_cmd = Unicode(
334 |         "",
335 |         help="Command to stop/cancel a previously submitted job. Formatted like batch_query_cmd.",
336 |     ).tag(config=True)
337 | 
338 |     async def cancel_batch_job(self):
339 |         subvars = self.get_req_subvars()
340 |         subvars["job_id"] = self.job_id
341 |         cmd = " ".join(
342 |             (
343 |                 format_template(self.exec_prefix, **subvars),
344 |                 format_template(self.batch_cancel_cmd, **subvars),
345 |             )
346 |         )
347 |         self.log.info("Cancelling job " + self.job_id + ": " + cmd)
348 |         await self.run_command(cmd)
349 | 
350 |     def load_state(self, state):
351 |         """load job_id from state"""
352 |         super().load_state(state)
353 |         self.job_id = state.get("job_id", "")
354 |         self.job_status = state.get("job_status", "")
355 | 
356 |     def get_state(self):
357 |         """add job_id to state"""
358 |         state = super().get_state()
359 |         if self.job_id:
360 |             state["job_id"] = self.job_id
361 |         if self.job_status:
362 |             state["job_status"] = self.job_status
363 |         return state
364 | 
365 |     def clear_state(self):
366 |         """clear job_id state"""
367 |         super().clear_state()
368 |         self.job_id = ""
369 |         self.job_status = ""
370 | 
371 |     def make_preexec_fn(self, name):
372 |         """make preexec fn to change uid (if running as root) before job submission"""
373 |         return set_user_setuid(name)
374 | 
375 |     def state_ispending(self):
376 |         "Return boolean indicating if job is still waiting to run, likely by parsing self.job_status"
377 |         raise NotImplementedError("Subclass must provide implementation")
378 | 
379 |     def state_isrunning(self):
380 |         "Return boolean indicating if job is running, likely by parsing self.job_status"
381 |         raise NotImplementedError("Subclass must provide implementation")
382 | 
383 |     def state_isunknown(self):
384 |         "Return boolean indicating if job state retrieval failed because of the resource manager"
385 |         return None
386 | 
387 |     def state_gethost(self):
388 |         "Return string, hostname or addr of running job, likely by parsing self.job_status"
389 |         raise NotImplementedError("Subclass must provide implementation")
390 | 
391 |     async def poll(self):
392 |         """Poll the process"""
393 |         status = await self.query_job_status()
394 |         if status in (JobStatus.PENDING, JobStatus.RUNNING, JobStatus.UNKNOWN):
395 |             return None
396 |         else:
397 |             self.clear_state()
398 |             return 1
399 | 
400 |     startup_poll_interval = Float(
401 |         0.5,
402 |         help="Polling interval (seconds) to check job state during startup",
403 |     ).tag(config=True)
404 | 
405 |     async def start(self):
406 |         """Start the process"""
407 |         self.ip = self.traits()["ip"].default_value
408 |         self.port = self.traits()["port"].default_value
409 | 
410 |         if self.server:
411 |             self.server.port = self.port
412 | 
413 |         await self.submit_batch_script()
414 | 
415 |         # We are called with a timeout, and if the timeout expires this function will
416 |         # be interrupted at the next yield, and self.stop() will be called.
417 |         # So this function should not return unless successful, and if unsuccessful
418 |         # should either raise and Exception or loop forever.
419 |         if len(self.job_id) == 0:
420 |             raise RuntimeError(
421 |                 "Jupyter batch job submission failure (no jobid in output)"
422 |             )
423 |         while True:
424 |             status = await self.query_job_status()
425 |             if status == JobStatus.RUNNING:
426 |                 break
427 |             elif status == JobStatus.PENDING:
428 |                 self.log.debug("Job " + self.job_id + " still pending")
429 |             elif status == JobStatus.UNKNOWN:
430 |                 self.log.debug("Job " + self.job_id + " still unknown")
431 |             else:
432 |                 self.log.warning(
433 |                     "Job "
434 |                     + self.job_id
435 |                     + " neither pending nor running.\n"
436 |                     + self.job_status
437 |                 )
438 |                 self.clear_state()
439 |                 raise RuntimeError(
440 |                     "The Jupyter batch job has disappeared"
441 |                     " while pending in the queue or died immediately"
442 |                     " after starting."
443 |                 )
444 |             await asyncio.sleep(self.startup_poll_interval)
445 | 
446 |         self.ip = self.state_gethost()
447 |         while self.port == 0:
448 |             await asyncio.sleep(self.startup_poll_interval)
449 |             # Test framework: For testing, mock_port is set because we
450 |             # don't actually run the single-user server yet.
451 |             if hasattr(self, "mock_port"):
452 |                 self.port = self.mock_port
453 |             # Check if job is still running
454 |             status = await self.poll()
455 |             if status:
456 |                 raise RuntimeError(
457 |                     "The Jupyter batch job started"
458 |                     " but died before launching the single-user server."
459 |                 )
460 | 
461 |         self.db.commit()
462 |         self.log.info(
463 |             "Notebook server job {} started at {}:{}".format(
464 |                 self.job_id, self.ip, self.port
465 |             )
466 |         )
467 | 
468 |         return self.ip, self.port
469 | 
470 |     async def stop(self, now=False):
471 |         """Stop the singleuser server job.
472 | 
473 |         Returns immediately after sending job cancellation command if now=True, otherwise
474 |         tries to confirm that job is no longer running."""
475 | 
476 |         self.log.info("Stopping server job " + self.job_id)
477 |         await self.cancel_batch_job()
478 |         if now:
479 |             return
480 |         for i in range(10):
481 |             status = await self.query_job_status()
482 |             if status not in (JobStatus.RUNNING, JobStatus.UNKNOWN):
483 |                 return
484 |             await asyncio.sleep(1)
485 |         if self.job_id:
486 |             self.log.warning(
487 |                 "Notebook server job {} at {}:{} possibly failed to terminate".format(
488 |                     self.job_id, self.ip, self.port
489 |                 )
490 |             )
491 | 
492 |     async def progress(self):
493 |         while True:
494 |             if self.state_ispending():
495 |                 yield {"message": "Pending in queue..."}
496 |             elif self.state_isrunning():
497 |                 yield {"message": "Cluster job running... waiting to connect"}
498 |                 return
499 |             else:
500 |                 yield {"message": "Unknown status..."}
501 |             await asyncio.sleep(1)
502 | 
503 | 
504 | class BatchSpawnerRegexStates(BatchSpawnerBase):
505 |     """Subclass of BatchSpawnerBase that uses config-supplied regular expressions
506 |     to interact with batch submission system state. Provides implementations of
507 |         state_ispending
508 |         state_isrunning
509 |         state_gethost
510 | 
511 |     In their place, the user should supply the following configuration:
512 |         state_pending_re - regex that matches job_status if job is waiting to run
513 |         state_running_re - regex that matches job_status if job is running
514 |         state_exechost_re - regex with at least one capture group that extracts
515 |                             execution host from job_status
516 |         state_exechost_exp - if empty, notebook IP will be set to the contents of the
517 |             first capture group. If this variable is set, the match object
518 |             will be expanded using this string to obtain the notebook IP.
519 |             See Python docs: re.match.expand
520 |     """
521 | 
522 |     state_pending_re = Unicode(
523 |         "",
524 |         help="Regex that matches job_status if job is waiting to run",
525 |     ).tag(config=True)
526 |     state_running_re = Unicode(
527 |         "",
528 |         help="Regex that matches job_status if job is running",
529 |     ).tag(config=True)
530 |     state_exechost_re = Unicode(
531 |         "",
532 |         help="Regex with at least one capture group that extracts "
533 |         "the execution host from job_status output",
534 |     ).tag(config=True)
535 |     state_exechost_exp = Unicode(
536 |         "",
537 |         help="""If empty, notebook IP will be set to the contents of the first capture group.
538 | 
539 |         If this variable is set, the match object will be expanded using this string
540 |         to obtain the notebook IP.
541 |         See Python docs: re.match.expand""",
542 |     ).tag(config=True)
543 |     state_unknown_re = Unicode(
544 |         "",
545 |         help="Regex that matches job_status if the resource manager is not answering."
546 |         "Blank indicates not used.",
547 |     ).tag(config=True)
548 | 
549 |     def state_ispending(self):
550 |         assert self.state_pending_re, "Misconfigured: define state_running_re"
551 |         return self.job_status and re.search(self.state_pending_re, self.job_status)
552 | 
553 |     def state_isrunning(self):
554 |         assert self.state_running_re, "Misconfigured: define state_running_re"
555 |         return self.job_status and re.search(self.state_running_re, self.job_status)
556 | 
557 |     def state_isunknown(self):
558 |         # Blank means "not set" and this function always returns None.
559 |         if self.state_unknown_re:
560 |             return self.job_status and re.search(self.state_unknown_re, self.job_status)
561 | 
562 |     def state_gethost(self):
563 |         assert self.state_exechost_re, "Misconfigured: define state_exechost_re"
564 |         match = re.search(self.state_exechost_re, self.job_status)
565 |         if not match:
566 |             self.log.error(
567 |                 "Spawner unable to match host addr in job status: " + self.job_status
568 |             )
569 |             return
570 |         if not self.state_exechost_exp:
571 |             return match.groups()[0]
572 |         else:
573 |             return match.expand(self.state_exechost_exp)
574 | 
575 | 
576 | class TorqueSpawner(BatchSpawnerRegexStates):
577 |     batch_script = Unicode(
578 |         """#!/bin/sh
579 | #PBS -q {queue}@{host}
580 | #PBS -l walltime={runtime}
581 | #PBS -l nodes=1:ppn={nprocs}
582 | #PBS -l mem={memory}
583 | #PBS -N jupyterhub-singleuser
584 | #PBS -v {keepvars}
585 | #PBS {options}
586 | 
587 | set -eu
588 | 
589 | {prologue}
590 | {cmd}
591 | {epilogue}
592 | """
593 |     ).tag(config=True)
594 | 
595 |     # outputs job id string
596 |     batch_submit_cmd = Unicode("qsub").tag(config=True)
597 |     # outputs job data XML string
598 |     batch_query_cmd = Unicode("qstat -x {job_id}").tag(config=True)
599 |     batch_cancel_cmd = Unicode("qdel {job_id}").tag(config=True)
600 |     # search XML string for job_state - [QH] = pending, R = running, [CE] = done
601 |     state_pending_re = Unicode(r"<job_state>[QH]</job_state>").tag(config=True)
602 |     state_running_re = Unicode(r"<job_state>R</job_state>").tag(config=True)
603 |     state_exechost_re = Unicode(r"<exec_host>((?:[\w_-]+\.?)+)/\d+").tag(config=True)
604 | 
605 | 
606 | class MoabSpawner(TorqueSpawner):
607 |     # outputs job id string
608 |     batch_submit_cmd = Unicode("msub").tag(config=True)
609 |     # outputs job data XML string
610 |     batch_query_cmd = Unicode("mdiag -j {job_id} --xml").tag(config=True)
611 |     batch_cancel_cmd = Unicode("mjobctl -c {job_id}").tag(config=True)
612 |     state_pending_re = Unicode(r'State="Idle"').tag(config=True)
613 |     state_running_re = Unicode(r'State="Running"').tag(config=True)
614 |     state_exechost_re = Unicode(r'AllocNodeList="([^\r\n\t\f :"]*)').tag(config=True)
615 | 
616 | 
617 | class PBSSpawner(TorqueSpawner):
618 |     batch_script = Unicode(
619 |         """#!/bin/sh
620 | {% if queue or host %}#PBS -q {% if queue  %}{{queue}}{% endif %}\
621 | {% if host %}@{{host}}{% endif %}{% endif %}
622 | #PBS -l walltime={{runtime}}
623 | #PBS -l select=1:ncpus={{nprocs}}:mem={{memory}}
624 | #PBS -N jupyterhub-singleuser
625 | #PBS -o {{homedir}}/.jupyterhub.pbs.out
626 | #PBS -e {{homedir}}/.jupyterhub.pbs.err
627 | #PBS -v {{keepvars}}
628 | {% if options %}#PBS {{options}}{% endif %}
629 | 
630 | set -eu
631 | 
632 | {{prologue}}
633 | {{cmd}}
634 | {{epilogue}}
635 | """
636 |     ).tag(config=True)
637 | 
638 |     # outputs job data XML string
639 |     batch_query_cmd = Unicode("qstat -fx {job_id}").tag(config=True)
640 | 
641 |     state_pending_re = Unicode(r"job_state = [QH]").tag(config=True)
642 |     state_running_re = Unicode(r"job_state = R").tag(config=True)
643 |     state_exechost_re = Unicode(r"exec_host = ([\w_-]+)/").tag(config=True)
644 | 
645 | 
646 | class UserEnvMixin:
647 |     """Mixin class that computes values for USER, SHELL and HOME in the environment passed to
648 |     the job submission subprocess in case the batch system needs these for the batch script.
649 |     """
650 | 
651 |     def user_env(self, env):
652 |         """get user environment"""
653 |         env["USER"] = self.user.name
654 |         home = pwd.getpwnam(self.user.name).pw_dir
655 |         shell = pwd.getpwnam(self.user.name).pw_shell
656 |         if home:
657 |             env["HOME"] = home
658 |         if shell:
659 |             env["SHELL"] = shell
660 |         return env
661 | 
662 |     def get_env(self):
663 |         """Get user environment variables to be passed to the user's job
664 | 
665 |         Everything here should be passed to the user's job as
666 |         environment.  Caution: If these variables are used for
667 |         authentication to the batch system commands as an admin, be
668 |         aware that the user will receive access to these as well.
669 |         """
670 |         env = super().get_env()
671 |         env = self.user_env(env)
672 |         return env
673 | 
674 | 
675 | class SlurmSpawner(UserEnvMixin, BatchSpawnerRegexStates):
676 |     batch_script = Unicode(
677 |         """#!/bin/bash
678 | #SBATCH --output={{homedir}}/jupyterhub_slurmspawner_%j.log
679 | #SBATCH --job-name=spawner-jupyterhub
680 | #SBATCH --chdir={{homedir}}
681 | #SBATCH --export={{keepvars}}
682 | #SBATCH --get-user-env=L
683 | {% if partition  %}#SBATCH --partition={{partition}}
684 | {% endif %}{% if runtime    %}#SBATCH --time={{runtime}}
685 | {% endif %}{% if memory     %}#SBATCH --mem={{memory}}
686 | {% endif %}{% if gres       %}#SBATCH --gres={{gres}}
687 | {% endif %}{% if nprocs     %}#SBATCH --cpus-per-task={{nprocs}}
688 | {% endif %}{% if reservation%}#SBATCH --reservation={{reservation}}
689 | {% endif %}{% if options    %}#SBATCH {{options}}{% endif %}
690 | 
691 | set -euo pipefail
692 | 
693 | trap 'echo SIGTERM received' TERM
694 | {{prologue}}
695 | {% if srun %}{{srun}} {% endif %}{{cmd}}
696 | echo "jupyterhub-singleuser ended gracefully"
697 | {{epilogue}}
698 | """
699 |     ).tag(config=True)
700 | 
701 |     # all these req_foo traits will be available as substvars for templated strings
702 |     req_cluster = Unicode(
703 |         "",
704 |         help="Cluster name to submit job to resource manager",
705 |     ).tag(config=True)
706 | 
707 |     req_qos = Unicode(
708 |         "",
709 |         help="QoS name to submit job to resource manager",
710 |     ).tag(config=True)
711 | 
712 |     req_srun = Unicode(
713 |         "srun",
714 |         help="Set req_srun='' to disable running in job step, and note that "
715 |         "this affects environment handling.  This is effectively a "
716 |         "prefix for the singleuser command.",
717 |     ).tag(config=True)
718 | 
719 |     req_reservation = Unicode(
720 |         "",
721 |         help="Reservation name to submit to resource manager",
722 |     ).tag(config=True)
723 | 
724 |     req_gres = Unicode(
725 |         "",
726 |         help="Additional resources (e.g. GPUs) requested",
727 |     ).tag(config=True)
728 | 
729 |     # outputs line like "Submitted batch job 209"
730 |     batch_submit_cmd = Unicode("sbatch --parsable").tag(config=True)
731 |     # outputs status and exec node like "RUNNING hostname"
732 |     batch_query_cmd = Unicode("squeue -h -j {job_id} -o '%T %B'").tag(config=True)
733 |     batch_cancel_cmd = Unicode("scancel {job_id}").tag(config=True)
734 |     # use long-form states: PENDING,  CONFIGURING = pending
735 |     #  RUNNING,  COMPLETING = running
736 |     state_pending_re = Unicode(r"^(?:PENDING|CONFIGURING)").tag(config=True)
737 |     state_running_re = Unicode(r"^(?:RUNNING|COMPLETING)").tag(config=True)
738 |     state_unknown_re = Unicode(
739 |         r"^slurm_load_jobs error: (?:Socket timed out on send/recv|Unable to contact slurm controller)"
740 |     ).tag(config=True)
741 |     state_exechost_re = Unicode(r"\s+((?:[\w_-]+\.?)+)$").tag(config=True)
742 | 
743 |     def parse_job_id(self, output):
744 |         # make sure jobid is really a number
745 |         try:
746 |             # use only last line to circumvent slurm bug
747 |             output = output.splitlines()[-1]
748 |             id = output.split(";")[0]
749 |             int(id)
750 |         except Exception as e:
751 |             self.log.error("SlurmSpawner unable to parse job ID from text: " + output)
752 |             raise e
753 |         return id
754 | 
755 | 
756 | class MultiSlurmSpawner(SlurmSpawner):
757 |     """When slurm has been compiled with --enable-multiple-slurmd, the
758 |     administrator sets the name of the slurmd instance via the slurmd -N
759 |     option. This node name is usually different from the hostname and may
760 |     not be resolvable by JupyterHub. Here we enable the administrator to
761 |     map the node names onto the real hostnames via a traitlet."""
762 | 
763 |     daemon_resolver = Dict(
764 |         {},
765 |         help="Map node names to hostnames",
766 |     ).tag(config=True)
767 | 
768 |     def state_gethost(self):
769 |         host = SlurmSpawner.state_gethost(self)
770 |         return self.daemon_resolver.get(host, host)
771 | 
772 | 
773 | class GridengineSpawner(BatchSpawnerBase):
774 |     batch_script = Unicode(
775 |         """#!/bin/bash
776 | #$ -j yes
777 | #$ -N spawner-jupyterhub
778 | #$ -o {homedir}/.jupyterhub.sge.out
779 | #$ -e {homedir}/.jupyterhub.sge.err
780 | #$ -v {keepvars}
781 | #$ {options}
782 | 
783 | set -euo pipefail
784 | 
785 | {prologue}
786 | {cmd}
787 | {epilogue}
788 | """
789 |     ).tag(config=True)
790 | 
791 |     # outputs job id string
792 |     batch_submit_cmd = Unicode("qsub").tag(config=True)
793 |     # outputs job data XML string
794 |     batch_query_cmd = Unicode("qstat -xml").tag(config=True)
795 |     batch_cancel_cmd = Unicode("qdel {job_id}").tag(config=True)
796 | 
797 |     def parse_job_id(self, output):
798 |         return output.split(" ")[2]
799 | 
800 |     def state_ispending(self):
801 |         if self.job_status:
802 |             job_info = ET.fromstring(self.job_status).find(
803 |                 f".//job_list[JB_job_number='{self.job_id}']"
804 |             )
805 |             if job_info is not None:
806 |                 return job_info.attrib.get("state") == "pending"
807 |         return False
808 | 
809 |     def state_isrunning(self):
810 |         if self.job_status:
811 |             job_info = ET.fromstring(self.job_status).find(
812 |                 f".//job_list[JB_job_number='{self.job_id}']"
813 |             )
814 |             if job_info is not None:
815 |                 return job_info.attrib.get("state") == "running"
816 |         return False
817 | 
818 |     def state_gethost(self):
819 |         if self.job_status:
820 |             queue_name = ET.fromstring(self.job_status).find(
821 |                 f".//job_list[JB_job_number='{self.job_id}']/queue_name"
822 |             )
823 |             if queue_name is not None and queue_name.text:
824 |                 return queue_name.text.split("@")[1]
825 | 
826 |         self.log.error(
827 |             "Spawner unable to match host addr in job {} with status {}".format(
828 |                 self.job_id, self.job_status
829 |             )
830 |         )
831 |         return
832 | 
833 |     def get_env(self):
834 |         env = super().get_env()
835 | 
836 |         # SGE relies on environment variables to launch local jobs.  Ensure that these values are included
837 |         # in the environment used to run the spawner.
838 |         for key in [
839 |             "SGE_CELL",
840 |             "SGE_EXECD",
841 |             "SGE_ROOT",
842 |             "SGE_CLUSTER_NAME",
843 |             "SGE_QMASTER_PORT",
844 |             "SGE_EXECD_PORT",
845 |             "PATH",
846 |         ]:
847 |             if key in os.environ and key not in env:
848 |                 env[key] = os.environ[key]
849 |         return env
850 | 
851 | 
852 | class CondorSpawner(UserEnvMixin, BatchSpawnerRegexStates):
853 |     batch_script = Unicode(
854 |         """
855 | Executable = /bin/sh
856 | RequestMemory = {memory}
857 | RequestCpus = {nprocs}
858 | Arguments = \"-c 'exec {cmd}'\"
859 | Remote_Initialdir = {homedir}
860 | Output = {homedir}/.jupyterhub.condor.out
861 | Error = {homedir}/.jupyterhub.condor.err
862 | ShouldTransferFiles = False
863 | GetEnv = True
864 | {options}
865 | Queue
866 | """
867 |     ).tag(config=True)
868 | 
869 |     # outputs job id string
870 |     batch_submit_cmd = Unicode("condor_submit").tag(config=True)
871 |     # outputs job data XML string
872 |     batch_query_cmd = Unicode(
873 |         'condor_q {job_id} -format "%s, " JobStatus -format "%s" RemoteHost -format "\n" True'
874 |     ).tag(config=True)
875 |     batch_cancel_cmd = Unicode("condor_rm {job_id}").tag(config=True)
876 |     # job status: 1 = pending, 2 = running
877 |     state_pending_re = Unicode(r"^1,").tag(config=True)
878 |     state_running_re = Unicode(r"^2,").tag(config=True)
879 |     state_exechost_re = Unicode(r"^\w*, .*@([^ ]*)").tag(config=True)
880 | 
881 |     def parse_job_id(self, output):
882 |         match = re.search(r".*submitted to cluster ([0-9]+)", output)
883 |         if match:
884 |             return match.groups()[0]
885 | 
886 |         error_msg = "CondorSpawner unable to parse jobID from text: " + output
887 |         self.log.error(error_msg)
888 |         raise Exception(error_msg)
889 | 
890 |     def cmd_formatted_for_batch(self):
891 |         return super().cmd_formatted_for_batch().replace('"', '""').replace("'", "''")
892 | 
893 | 
894 | class LsfSpawner(BatchSpawnerBase):
895 |     """A Spawner that uses IBM's Platform Load Sharing Facility (LSF) to launch notebooks."""
896 | 
897 |     batch_script = Unicode(
898 |         """#!/bin/sh
899 | #BSUB -R "select[type==any]"    # Allow spawning on non-uniform hardware
900 | #BSUB -R "span[hosts=1]"        # Only spawn job on one server
901 | #BSUB -q {queue}
902 | #BSUB -J spawner-jupyterhub
903 | #BSUB -o {homedir}/.jupyterhub.lsf.out
904 | #BSUB -e {homedir}/.jupyterhub.lsf.err
905 | 
906 | set -eu
907 | 
908 | {prologue}
909 | {cmd}
910 | {epilogue}
911 | """
912 |     ).tag(config=True)
913 | 
914 |     batch_submit_cmd = Unicode("bsub").tag(config=True)
915 |     batch_query_cmd = Unicode('bjobs -a -noheader -o "STAT EXEC_HOST" {job_id}').tag(
916 |         config=True
917 |     )
918 |     batch_cancel_cmd = Unicode("bkill {job_id}").tag(config=True)
919 | 
920 |     def get_env(self):
921 |         env = super().get_env()
922 | 
923 |         # LSF relies on environment variables to launch local jobs.  Ensure that these values are included
924 |         # in the environment used to run the spawner.
925 |         for key in [
926 |             "LSF_ENVDIR",
927 |             "LSF_SERVERDIR",
928 |             "LSF_FULL_VERSION",
929 |             "LSF_LIBDIR",
930 |             "LSF_BINDIR",
931 |         ]:
932 |             if key in os.environ and key not in env:
933 |                 env[key] = os.environ[key]
934 |         return env
935 | 
936 |     def parse_job_id(self, output):
937 |         # Assumes output in the following form:
938 |         # "Job <1815> is submitted to default queue <normal>."
939 |         return output.split(" ")[1].strip("<>")
940 | 
941 |     def state_ispending(self):
942 |         # Parse results of batch_query_cmd
943 |         # Output determined by results of self.batch_query_cmd
944 |         if self.job_status:
945 |             return self.job_status.split(" ")[0].upper() in {"PEND", "PUSP"}
946 | 
947 |     def state_isrunning(self):
948 |         if self.job_status:
949 |             return self.job_status.split(" ")[0].upper() == "RUN"
950 | 
951 |     def state_gethost(self):
952 |         if self.job_status:
953 |             return self.job_status.split(" ")[1].strip().split(":")[0]
954 | 
955 |         self.log.error(
956 |             "Spawner unable to match host addr in job {} with status {}".format(
957 |                 self.job_id, self.job_status
958 |             )
959 |         )
960 |         return
961 | 
962 | 
963 | # vim: set ai expandtab softtabstop=4:
964 | 


--------------------------------------------------------------------------------
/batchspawner/singleuser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from runpy import run_path
 4 | from shutil import which
 5 | from urllib.parse import urlparse, urlunparse
 6 | 
 7 | import requests
 8 | from jupyterhub.services.auth import HubAuth
 9 | from jupyterhub.utils import random_port, url_path_join
10 | 
11 | 
12 | def main(argv=None):
13 |     port = random_port()
14 |     hub_auth = HubAuth()
15 | 
16 |     url = url_path_join(hub_auth.api_url, "batchspawner")
17 |     headers = {"Authorization": f"token {hub_auth.api_token}"}
18 | 
19 |     # internal_ssl kwargs
20 |     kwargs = {}
21 |     if hub_auth.certfile and hub_auth.keyfile:
22 |         kwargs["cert"] = (hub_auth.certfile, hub_auth.keyfile)
23 |     if hub_auth.client_ca:
24 |         kwargs["verify"] = hub_auth.client_ca
25 | 
26 |     requests.post(
27 |         url,
28 |         headers=headers,
29 |         json={"port": port},
30 |         **kwargs,
31 |     )
32 | 
33 |     # Read the env var JUPYTERHUB_SERVICE_URL and replace port in the URL
34 |     # with free port that we found here
35 |     # JUPYTERHUB_SERVICE_URL is added in JupyterHub 2.0
36 |     service_url_env = os.environ.get("JUPYTERHUB_SERVICE_URL", "")
37 |     if service_url_env:
38 |         url = urlparse(os.environ["JUPYTERHUB_SERVICE_URL"])
39 |         url = url._replace(netloc=f"{url.hostname}:{port}")
40 |         os.environ["JUPYTERHUB_SERVICE_URL"] = urlunparse(url)
41 |     else:
42 |         # JupyterHub < 2.0 specifies port on the command-line
43 |         sys.argv.append(f"--port={port}")
44 | 
45 |     cmd_path = which(sys.argv[1])
46 |     sys.argv = sys.argv[1:]
47 |     run_path(cmd_path, run_name="__main__")
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/batchspawner/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jupyterhub/batchspawner/87874004b2dc761598405a564a26e5f6775bb473/batchspawner/tests/__init__.py


--------------------------------------------------------------------------------
/batchspawner/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """Relevant pytest fixtures are re-used from JupyterHub's test suite"""
2 | 
3 | # We use "db" directly, but we also need event_loop
4 | from jupyterhub.tests.conftest import db, event_loop  # noqa
5 | 


--------------------------------------------------------------------------------
/batchspawner/tests/test_spawners.py:
--------------------------------------------------------------------------------
  1 | """Test BatchSpawner and subclasses"""
  2 | 
  3 | import asyncio
  4 | import pwd
  5 | import re
  6 | import time
  7 | from getpass import getuser
  8 | from unittest import mock
  9 | 
 10 | import pytest
 11 | from jupyterhub import orm
 12 | from jupyterhub.objects import Hub, Server
 13 | from jupyterhub.user import User
 14 | from traitlets import Unicode
 15 | 
 16 | from .. import BatchSpawnerRegexStates, JobStatus
 17 | 
 18 | testhost = "userhost123"
 19 | testjob = "12345"
 20 | testport = 54321
 21 | 
 22 | 
 23 | @pytest.fixture(autouse=True)
 24 | def _always_get_my_home():
 25 |     # pwd.getbwnam() is always called with the current user
 26 |     # ignoring the requested name, which usually doesn't exist
 27 |     getpwnam = pwd.getpwnam
 28 |     with mock.patch.object(pwd, "getpwnam", lambda name: getpwnam(getuser())):
 29 |         yield
 30 | 
 31 | 
 32 | class BatchDummy(BatchSpawnerRegexStates):
 33 |     exec_prefix = ""
 34 |     batch_submit_cmd = Unicode("cat > /dev/null; echo " + testjob)
 35 |     batch_query_cmd = Unicode("echo RUN " + testhost)
 36 |     batch_cancel_cmd = Unicode("echo STOP")
 37 |     batch_script = Unicode("{cmd}")
 38 |     state_pending_re = Unicode("PEND")
 39 |     state_running_re = Unicode("RUN")
 40 |     state_exechost_re = Unicode("RUN (.*)$")
 41 |     state_unknown_re = Unicode("UNKNOWN")
 42 | 
 43 |     cmd_expectlist = None
 44 |     out_expectlist = None
 45 | 
 46 |     async def run_command(self, *args, **kwargs):
 47 |         """Overwriten run command to test templating and outputs"""
 48 |         cmd = args[0]
 49 |         # Test that the command matches the expectations
 50 |         if self.cmd_expectlist:
 51 |             run_re = self.cmd_expectlist.pop(0)
 52 |             if run_re:
 53 |                 print("run:", run_re)
 54 |                 assert (
 55 |                     run_re.search(cmd) is not None
 56 |                 ), f"Failed test: re={run_re} cmd={cmd}"
 57 |         # Run command normally
 58 |         out = await super().run_command(*args, **kwargs)
 59 |         # Test that the command matches the expectations
 60 |         if self.out_expectlist:
 61 |             out_re = self.out_expectlist.pop(0)
 62 |             if out_re:
 63 |                 print("out:", out_re)
 64 |                 assert (
 65 |                     out_re.search(cmd) is not None
 66 |                 ), f"Failed output: re={out_re} cmd={cmd} out={out}"
 67 |         return out
 68 | 
 69 | 
 70 | def new_spawner(db, spawner_class=BatchDummy, **kwargs):
 71 |     kwargs.setdefault("cmd", ["singleuser_command"])
 72 |     user = db.query(orm.User).first()
 73 |     hub = Hub()
 74 |     user = User(user, {})
 75 |     server = Server()
 76 |     # Set it after constructions because it isn't a traitlet.
 77 |     kwargs.setdefault("hub", hub)
 78 |     kwargs.setdefault("user", user)
 79 |     kwargs.setdefault("poll_interval", 1)
 80 | 
 81 |     # These are not traitlets so we have to set them here
 82 |     spawner = user._new_spawner("", spawner_class=spawner_class, **kwargs)
 83 |     spawner.server = server
 84 |     spawner.mock_port = testport
 85 |     return spawner
 86 | 
 87 | 
 88 | def check_ip(spawner, value):
 89 |     assert spawner.ip == value
 90 | 
 91 | 
 92 | async def test_spawner_start_stop_poll(db, event_loop):
 93 |     spawner = new_spawner(db=db)
 94 | 
 95 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
 96 |     assert status == 1
 97 |     assert spawner.job_id == ""
 98 |     assert spawner.get_state() == {}
 99 | 
100 |     await asyncio.wait_for(spawner.start(), timeout=5)
101 |     check_ip(spawner, testhost)
102 |     assert spawner.job_id == testjob
103 | 
104 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
105 |     assert status is None
106 |     spawner.batch_query_cmd = "echo NOPE"
107 |     await asyncio.wait_for(spawner.stop(), timeout=5)
108 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
109 |     assert status == 1
110 |     assert spawner.get_state() == {}
111 | 
112 | 
113 | async def test_stress_submit(db, event_loop):
114 |     for i in range(200):
115 |         time.sleep(0.01)
116 |         test_spawner_start_stop_poll(db, event_loop)
117 | 
118 | 
119 | async def test_spawner_state_reload(db, event_loop):
120 |     spawner = new_spawner(db=db)
121 |     assert spawner.get_state() == {}
122 | 
123 |     await asyncio.wait_for(spawner.start(), timeout=30)
124 |     check_ip(spawner, testhost)
125 |     assert spawner.job_id == testjob
126 | 
127 |     state = spawner.get_state()
128 |     assert state == dict(job_id=testjob, job_status="RUN " + testhost)
129 |     spawner = new_spawner(db=db)
130 |     spawner.clear_state()
131 |     assert spawner.get_state() == {}
132 |     spawner.load_state(state)
133 |     # We used to check IP here, but that is actually only computed on start(),
134 |     # and is not part of the spawner's persistent state
135 |     assert spawner.job_id == testjob
136 | 
137 | 
138 | async def test_submit_failure(db, event_loop):
139 |     spawner = new_spawner(db=db)
140 |     assert spawner.get_state() == {}
141 |     spawner.batch_submit_cmd = "cat > /dev/null; true"
142 |     with pytest.raises(RuntimeError):
143 |         await asyncio.wait_for(spawner.start(), timeout=30)
144 |     assert spawner.job_id == ""
145 |     assert spawner.job_status == ""
146 | 
147 | 
148 | async def test_submit_pending_fails(db, event_loop):
149 |     """Submission works, but the batch query command immediately fails"""
150 |     spawner = new_spawner(db=db)
151 |     assert spawner.get_state() == {}
152 |     spawner.batch_query_cmd = "echo xyz"
153 |     with pytest.raises(RuntimeError):
154 |         await asyncio.wait_for(spawner.start(), timeout=30)
155 |     status = await asyncio.wait_for(spawner.query_job_status(), timeout=30)
156 |     assert status == JobStatus.NOTFOUND
157 |     assert spawner.job_id == ""
158 |     assert spawner.job_status == ""
159 | 
160 | 
161 | async def test_poll_fails(db, event_loop):
162 |     """Submission works, but a later .poll() fails"""
163 |     spawner = new_spawner(db=db)
164 |     assert spawner.get_state() == {}
165 |     # The start is successful:
166 |     await asyncio.wait_for(spawner.start(), timeout=30)
167 |     spawner.batch_query_cmd = "echo xyz"
168 |     # Now, the poll fails:
169 |     await asyncio.wait_for(spawner.poll(), timeout=30)
170 |     # .poll() will run self.clear_state() if it's not found:
171 |     assert spawner.job_id == ""
172 |     assert spawner.job_status == ""
173 | 
174 | 
175 | async def test_unknown_status(db, event_loop):
176 |     """Polling returns an unknown status"""
177 |     spawner = new_spawner(db=db)
178 |     assert spawner.get_state() == {}
179 |     # The start is successful:
180 |     await asyncio.wait_for(spawner.start(), timeout=30)
181 |     spawner.batch_query_cmd = "echo UNKNOWN"
182 |     # This poll should not fail:
183 |     await asyncio.wait_for(spawner.poll(), timeout=30)
184 |     status = await asyncio.wait_for(spawner.query_job_status(), timeout=30)
185 |     assert status == JobStatus.UNKNOWN
186 |     assert spawner.job_id == "12345"
187 |     assert spawner.job_status != ""
188 | 
189 | 
190 | async def test_templates(db, event_loop):
191 |     """Test templates in the run_command commands"""
192 |     spawner = new_spawner(db=db)
193 | 
194 |     # Test when not running
195 |     spawner.cmd_expectlist = [
196 |         re.compile(".*RUN"),
197 |     ]
198 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
199 |     assert status == 1
200 |     assert spawner.job_id == ""
201 |     assert spawner.get_state() == {}
202 | 
203 |     # Test starting
204 |     spawner.cmd_expectlist = [
205 |         re.compile(".*echo"),
206 |         re.compile(".*RUN"),
207 |     ]
208 |     await asyncio.wait_for(spawner.start(), timeout=5)
209 |     check_ip(spawner, testhost)
210 |     assert spawner.job_id == testjob
211 | 
212 |     # Test poll - running
213 |     spawner.cmd_expectlist = [
214 |         re.compile(".*RUN"),
215 |     ]
216 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
217 |     assert status is None
218 | 
219 |     # Test stopping
220 |     spawner.batch_query_cmd = "echo NOPE"
221 |     spawner.cmd_expectlist = [
222 |         re.compile(".*STOP"),
223 |         re.compile(".*NOPE"),
224 |     ]
225 |     await asyncio.wait_for(spawner.stop(), timeout=5)
226 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
227 |     assert status == 1
228 |     assert spawner.get_state() == {}
229 | 
230 | 
231 | async def test_batch_script(db, event_loop):
232 |     """Test that the batch script substitutes {cmd}"""
233 | 
234 |     class BatchDummyTestScript(BatchDummy):
235 |         async def _get_batch_script(self, **subvars):
236 |             script = await super()._get_batch_script(**subvars)
237 |             assert "singleuser_command" in script
238 |             return script
239 | 
240 |     spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript)
241 |     # status = await asyncio.wait_for(spawner.poll(), timeout=5)
242 |     await asyncio.wait_for(spawner.start(), timeout=5)
243 |     # status = await asyncio.wait_for(spawner.poll(), timeout=5)
244 |     # await asyncio.wait_for(spawner.stop(), timeout=5)
245 | 
246 | 
247 | async def test_exec_prefix(db, event_loop):
248 |     """Test that all run_commands have exec_prefix"""
249 | 
250 |     class BatchDummyTestScript(BatchDummy):
251 |         exec_prefix = "PREFIX"
252 | 
253 |         async def run_command(self, cmd, *args, **kwargs):
254 |             assert cmd.startswith("PREFIX ")
255 |             cmd = cmd[7:]
256 |             print(cmd)
257 |             out = await super().run_command(cmd, *args, **kwargs)
258 |             return out
259 | 
260 |     spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript)
261 |     # Not running
262 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
263 |     assert status == 1
264 |     # Start
265 |     await asyncio.wait_for(spawner.start(), timeout=5)
266 |     assert spawner.job_id == testjob
267 |     # Poll
268 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
269 |     assert status is None
270 |     # Stop
271 |     spawner.batch_query_cmd = "echo NOPE"
272 |     await asyncio.wait_for(spawner.stop(), timeout=5)
273 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
274 |     assert status == 1
275 | 
276 | 
277 | async def run_spawner_script(
278 |     db, spawner, script, batch_script_re_list=None, spawner_kwargs={}
279 | ):
280 |     """Run a spawner script and test that the output and behavior is as expected.
281 | 
282 |     db: same as in this module
283 |     spawner: the BatchSpawnerBase subclass to test
284 |     script: list of (input_re_to_match, output)
285 |     batch_script_re_list: if given, assert batch script matches all of these
286 |     """
287 |     # Create the expected scripts
288 |     cmd_expectlist, out_list = zip(*script)
289 |     cmd_expectlist = list(cmd_expectlist)
290 |     out_list = list(out_list)
291 | 
292 |     class BatchDummyTestScript(spawner):
293 |         async def run_command(self, cmd, input=None, env=None):
294 |             # Test the input
295 |             run_re = cmd_expectlist.pop(0)
296 |             if run_re:
297 |                 print(f'run: "{cmd}"   [{run_re}]')
298 |                 assert (
299 |                     run_re.search(cmd) is not None
300 |                 ), f"Failed test: re={run_re} cmd={cmd}"
301 |             # Test the stdin - will only be the batch script.  For
302 |             # each regular expression in batch_script_re_list, assert that
303 |             # each re in that list matches the batch script.
304 |             if batch_script_re_list and input:
305 |                 batch_script = input
306 |                 for match_re in batch_script_re_list:
307 |                     assert (
308 |                         match_re.search(batch_script) is not None
309 |                     ), f"Batch script does not match {match_re}"
310 |             # Return expected output.
311 |             out = out_list.pop(0)
312 |             print("  --> " + out)
313 |             return out
314 | 
315 |     spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript, **spawner_kwargs)
316 |     # Not running at beginning (no command run)
317 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
318 |     assert status == 1
319 |     # batch_submit_cmd
320 |     # batch_query_cmd    (result=pending)
321 |     # batch_query_cmd    (result=running)
322 |     await asyncio.wait_for(spawner.start(), timeout=5)
323 |     assert spawner.job_id == testjob
324 |     check_ip(spawner, testhost)
325 |     # batch_query_cmd
326 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
327 |     assert status is None
328 |     # batch_cancel_cmd
329 |     await asyncio.wait_for(spawner.stop(), timeout=5)
330 |     # batch_poll_cmd
331 |     status = await asyncio.wait_for(spawner.poll(), timeout=5)
332 |     assert status == 1
333 | 
334 | 
335 | async def test_torque(db, event_loop):
336 |     spawner_kwargs = {
337 |         "req_nprocs": "5",
338 |         "req_memory": "5678",
339 |         "req_options": "some_option_asdf",
340 |         "req_prologue": "PROLOGUE",
341 |         "req_epilogue": "EPILOGUE",
342 |     }
343 |     batch_script_re_list = [
344 |         re.compile(
345 |             r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
346 |             re.S | re.M,
347 |         ),
348 |         re.compile(r"mem=5678"),
349 |         re.compile(r"ppn=5"),
350 |         re.compile(r"^#PBS some_option_asdf", re.M),
351 |     ]
352 |     poll_running = (
353 |         re.compile(r"sudo.*qstat"),
354 |         f"<job_state>R</job_state><exec_host>{testhost}/1</exec_host>",
355 |     )
356 |     script = [
357 |         (re.compile(r"sudo.*qsub"), str(testjob)),
358 |         (
359 |             re.compile(r"sudo.*qstat"),
360 |             "<job_state>Q</job_state><exec_host></exec_host>",
361 |         ),  # pending
362 |         poll_running,
363 |         poll_running,
364 |         poll_running,
365 |         (re.compile(r"sudo.*qdel"), "STOP"),
366 |         (re.compile(r"sudo.*qstat"), ""),
367 |     ]
368 |     from .. import TorqueSpawner
369 | 
370 |     await run_spawner_script(
371 |         db,
372 |         TorqueSpawner,
373 |         script,
374 |         batch_script_re_list=batch_script_re_list,
375 |         spawner_kwargs=spawner_kwargs,
376 |     )
377 | 
378 | 
379 | async def test_moab(db, event_loop):
380 |     spawner_kwargs = {
381 |         "req_nprocs": "5",
382 |         "req_memory": "5678",
383 |         "req_options": "some_option_asdf",
384 |         "req_prologue": "PROLOGUE",
385 |         "req_epilogue": "EPILOGUE",
386 |     }
387 |     batch_script_re_list = [
388 |         re.compile(
389 |             r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
390 |             re.S | re.M,
391 |         ),
392 |         re.compile(r"mem=5678"),
393 |         re.compile(r"ppn=5"),
394 |         re.compile(r"^#PBS some_option_asdf", re.M),
395 |     ]
396 |     poll_running = (
397 |         re.compile(r"sudo.*mdiag"),
398 |         f'State="Running" AllocNodeList="{testhost}"',
399 |     )
400 |     script = [
401 |         (re.compile(r"sudo.*msub"), str(testjob)),
402 |         (re.compile(r"sudo.*mdiag"), 'State="Idle"'),  # pending
403 |         poll_running,
404 |         poll_running,
405 |         poll_running,
406 |         (re.compile(r"sudo.*mjobctl.*-c"), "STOP"),
407 |         (re.compile(r"sudo.*mdiag"), ""),
408 |     ]
409 |     from .. import MoabSpawner
410 | 
411 |     await run_spawner_script(
412 |         db,
413 |         MoabSpawner,
414 |         script,
415 |         batch_script_re_list=batch_script_re_list,
416 |         spawner_kwargs=spawner_kwargs,
417 |     )
418 | 
419 | 
420 | async def test_pbs(db, event_loop):
421 |     spawner_kwargs = {
422 |         "req_nprocs": "4",
423 |         "req_memory": "10256",
424 |         "req_options": "some_option_asdf",
425 |         "req_host": "some_pbs_admin_node",
426 |         "req_runtime": "08:00:00",
427 |     }
428 |     batch_script_re_list = [
429 |         re.compile(r"singleuser_command"),
430 |         re.compile(r"select=1"),
431 |         re.compile(r"ncpus=4"),
432 |         re.compile(r"mem=10256"),
433 |         re.compile(r"walltime=08:00:00"),
434 |         re.compile(r"@some_pbs_admin_node"),
435 |         re.compile(r"^#PBS some_option_asdf", re.M),
436 |     ]
437 |     poll_running = (
438 |         re.compile(r"sudo.*qstat"),
439 |         f"job_state = R\nexec_host = {testhost}/2*1",
440 |     )
441 |     script = [
442 |         (re.compile(r"sudo.*qsub"), str(testjob)),
443 |         (re.compile(r"sudo.*qstat"), "job_state = Q"),  # pending
444 |         poll_running,
445 |         poll_running,
446 |         poll_running,
447 |         (re.compile(r"sudo.*qdel"), "STOP"),
448 |         (re.compile(r"sudo.*qstat"), ""),
449 |     ]
450 |     from .. import PBSSpawner
451 | 
452 |     await run_spawner_script(
453 |         db,
454 |         PBSSpawner,
455 |         script,
456 |         batch_script_re_list=batch_script_re_list,
457 |         spawner_kwargs=spawner_kwargs,
458 |     )
459 | 
460 | 
461 | async def test_slurm(db, event_loop):
462 |     spawner_kwargs = {
463 |         "req_runtime": "3-05:10:10",
464 |         "req_nprocs": "5",
465 |         "req_memory": "5678",
466 |         "req_options": "some_option_asdf",
467 |         "req_prologue": "PROLOGUE",
468 |         "req_epilogue": "EPILOGUE",
469 |         "req_reservation": "RES123",
470 |         "req_gres": "GRES123",
471 |     }
472 |     batch_script_re_list = [
473 |         re.compile(
474 |             r"PROLOGUE.*srun batchspawner-singleuser singleuser_command.*EPILOGUE", re.S
475 |         ),
476 |         re.compile(r"^\#SBATCH \s+ --cpus-per-task=5", re.X | re.M),
477 |         re.compile(r"^\#SBATCH \s+ --time=3-05:10:10", re.X | re.M),
478 |         re.compile(r"^\#SBATCH \s+ some_option_asdf", re.X | re.M),
479 |         re.compile(r"^\#SBATCH \s+ --reservation=RES123", re.X | re.M),
480 |         re.compile(r"^\#SBATCH \s+ --gres=GRES123", re.X | re.M),
481 |     ]
482 |     from .. import SlurmSpawner
483 | 
484 |     await run_spawner_script(
485 |         db,
486 |         SlurmSpawner,
487 |         normal_slurm_script,
488 |         batch_script_re_list=batch_script_re_list,
489 |         spawner_kwargs=spawner_kwargs,
490 |     )
491 | 
492 | 
493 | # We tend to use slurm as our typical example job.  These allow quick
494 | # Slurm examples.
495 | normal_slurm_script = [
496 |     (re.compile(r"sudo.*sbatch"), str(testjob)),
497 |     (re.compile(r"sudo.*squeue"), "PENDING "),  # pending
498 |     (
499 |         re.compile(r"sudo.*squeue"),
500 |         "slurm_load_jobs error: Unable to contact slurm controller",
501 |     ),  # unknown
502 |     (re.compile(r"sudo.*squeue"), "RUNNING " + testhost),  # running
503 |     (re.compile(r"sudo.*squeue"), "RUNNING " + testhost),
504 |     (re.compile(r"sudo.*squeue"), "RUNNING " + testhost),
505 |     (re.compile(r"sudo.*scancel"), "STOP"),
506 |     (re.compile(r"sudo.*squeue"), ""),
507 | ]
508 | from .. import SlurmSpawner
509 | 
510 | 
511 | async def run_typical_slurm_spawner(
512 |     db,
513 |     spawner=SlurmSpawner,
514 |     script=normal_slurm_script,
515 |     batch_script_re_list=None,
516 |     spawner_kwargs={},
517 | ):
518 |     """Run a full slurm job with default (overrideable) parameters.
519 | 
520 |     This is useful, for example, for changing options and testing effect
521 |     of batch scripts.
522 |     """
523 |     return await run_spawner_script(
524 |         db,
525 |         spawner,
526 |         script,
527 |         batch_script_re_list=batch_script_re_list,
528 |         spawner_kwargs=spawner_kwargs,
529 |     )
530 | 
531 | 
532 | # async def test_gridengine(db, event_loop):
533 | #    spawner_kwargs = {
534 | #        'req_options': 'some_option_asdf',
535 | #        }
536 | #    batch_script_re_list = [
537 | #        re.compile(r'singleuser_command'),
538 | #        re.compile(r'#$\s+some_option_asdf'),
539 | #        ]
540 | #    script = [
541 | #        (re.compile(r'sudo.*qsub'),   'x x '+str(testjob)),
542 | #        (re.compile(r'sudo.*qstat'),   'PENDING '),
543 | #        (re.compile(r'sudo.*qstat'),   'RUNNING '+testhost),
544 | #        (re.compile(r'sudo.*qstat'),   'RUNNING '+testhost),
545 | #        (re.compile(r'sudo.*qdel'),  'STOP'),
546 | #        (re.compile(r'sudo.*qstat'),   ''),
547 | #        ]
548 | #    from .. import GridengineSpawner
549 | #    await run_spawner_script(db, GridengineSpawner, script,
550 | #                       batch_script_re_list=batch_script_re_list,
551 | #                       spawner_kwargs=spawner_kwargs)
552 | 
553 | 
554 | async def test_condor(db, event_loop):
555 |     spawner_kwargs = {
556 |         "req_nprocs": "5",
557 |         "req_memory": "5678",
558 |         "req_options": "some_option_asdf",
559 |     }
560 |     batch_script_re_list = [
561 |         re.compile(r"exec batchspawner-singleuser singleuser_command"),
562 |         re.compile(r"RequestCpus = 5"),
563 |         re.compile(r"RequestMemory = 5678"),
564 |         re.compile(r"^some_option_asdf", re.M),
565 |     ]
566 |     script = [
567 |         (
568 |             re.compile(r"sudo.*condor_submit"),
569 |             f"submitted to cluster {str(testjob)}",
570 |         ),
571 |         (re.compile(r"sudo.*condor_q"), "1,"),  # pending
572 |         (re.compile(r"sudo.*condor_q"), f"2, @{testhost}"),  # runing
573 |         (re.compile(r"sudo.*condor_q"), f"2, @{testhost}"),
574 |         (re.compile(r"sudo.*condor_q"), f"2, @{testhost}"),
575 |         (re.compile(r"sudo.*condor_rm"), "STOP"),
576 |         (re.compile(r"sudo.*condor_q"), ""),
577 |     ]
578 |     from .. import CondorSpawner
579 | 
580 |     await run_spawner_script(
581 |         db,
582 |         CondorSpawner,
583 |         script,
584 |         batch_script_re_list=batch_script_re_list,
585 |         spawner_kwargs=spawner_kwargs,
586 |     )
587 | 
588 | 
589 | async def test_lfs(db, event_loop):
590 |     spawner_kwargs = {
591 |         "req_nprocs": "5",
592 |         "req_memory": "5678",
593 |         "req_options": "some_option_asdf",
594 |         "req_queue": "some_queue",
595 |         "req_prologue": "PROLOGUE",
596 |         "req_epilogue": "EPILOGUE",
597 |     }
598 |     batch_script_re_list = [
599 |         re.compile(
600 |             r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
601 |             re.S | re.M,
602 |         ),
603 |         re.compile(r"#BSUB\s+-q\s+some_queue", re.M),
604 |     ]
605 |     script = [
606 |         (
607 |             re.compile(r"sudo.*bsub"),
608 |             f"Job <{str(testjob)}> is submitted to default queue <normal>",
609 |         ),
610 |         (re.compile(r"sudo.*bjobs"), "PEND "),  # pending
611 |         (re.compile(r"sudo.*bjobs"), f"RUN {testhost}"),  # running
612 |         (re.compile(r"sudo.*bjobs"), f"RUN {testhost}"),
613 |         (re.compile(r"sudo.*bjobs"), f"RUN {testhost}"),
614 |         (re.compile(r"sudo.*bkill"), "STOP"),
615 |         (re.compile(r"sudo.*bjobs"), ""),
616 |     ]
617 |     from .. import LsfSpawner
618 | 
619 |     await run_spawner_script(
620 |         db,
621 |         LsfSpawner,
622 |         script,
623 |         batch_script_re_list=batch_script_re_list,
624 |         spawner_kwargs=spawner_kwargs,
625 |     )
626 | 
627 | 
628 | async def test_keepvars(db, event_loop):
629 |     # req_keepvars
630 |     spawner_kwargs = {
631 |         "req_keepvars": "ABCDE",
632 |     }
633 |     batch_script_re_list = [
634 |         re.compile(r"--export=ABCDE", re.X | re.M),
635 |     ]
636 |     await run_typical_slurm_spawner(
637 |         db,
638 |         spawner_kwargs=spawner_kwargs,
639 |         batch_script_re_list=batch_script_re_list,
640 |     )
641 | 
642 |     # req_keepvars AND req_keepvars together
643 |     spawner_kwargs = {
644 |         "req_keepvars": "ABCDE",
645 |         "req_keepvars_extra": "XYZ",
646 |     }
647 |     batch_script_re_list = [
648 |         re.compile(r"--export=ABCDE,XYZ", re.X | re.M),
649 |     ]
650 |     await run_typical_slurm_spawner(
651 |         db,
652 |         spawner_kwargs=spawner_kwargs,
653 |         batch_script_re_list=batch_script_re_list,
654 |     )
655 | 
656 | 
657 | async def test_early_stop(db, event_loop):
658 |     script = [
659 |         (re.compile(r"sudo.*sbatch"), str(testjob)),
660 |         (re.compile(r"sudo.*squeue"), "PENDING "),  # pending
661 |         (
662 |             re.compile(r"sudo.*squeue"),
663 |             "slurm_load_jobs error: Unable to contact slurm controller",
664 |         ),  # unknown
665 |         # job exits early during start
666 |         (re.compile(r"sudo.*squeue"), ""),
667 |         (re.compile(r"sudo.*scancel"), "STOP"),
668 |     ]
669 |     with pytest.raises(RuntimeError, match="job has disappeared"):
670 |         await run_spawner_script(db, SlurmSpawner, script)
671 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | 
 6 | # autoflake is used for autoformatting Python code
 7 | #
 8 | # ref: https://github.com/PyCQA/autoflake#readme
 9 | #
10 | [tool.autoflake]
11 | ignore-init-module-imports = true
12 | #remove-all-unused-imports = true
13 | remove-duplicate-keys = true
14 | remove-unused-variables = true
15 | 
16 | 
17 | # isort is used for autoformatting Python code
18 | #
19 | # ref: https://pycqa.github.io/isort/
20 | #
21 | [tool.isort]
22 | profile = "black"
23 | 
24 | 
25 | # black is used for autoformatting Python code
26 | #
27 | # ref: https://black.readthedocs.io/en/stable/
28 | #
29 | [tool.black]
30 | target_version = [
31 |     "py36",
32 |     "py37",
33 |     "py38",
34 |     "py39",
35 |     "py310",
36 |     "py311",
37 |     "py312",
38 | ]
39 | 
40 | 
41 | # pytest is used for running Python based tests
42 | #
43 | # ref: https://docs.pytest.org/en/stable/
44 | #
45 | [tool.pytest.ini_options]
46 | addopts = "--verbose --color=yes --durations=10 --cov=batchspawner"
47 | asyncio_mode = "auto"
48 | testpaths = ["batchspawner/tests"]
49 | 
50 | 
51 | # pytest-cov / coverage is used to measure code coverage of tests
52 | #
53 | # ref: https://coverage.readthedocs.io/en/stable/config.html
54 | #
55 | [tool.coverage.run]
56 | omit = [
57 |     "batchspawner/tests/*",
58 | ]
59 | 
60 | 
61 | # tbump is used to simplify and standardize the release process when updating
62 | # the version, making a git commit and tag, and pushing changes.
63 | #
64 | # ref: https://github.com/your-tools/tbump#readme
65 | #
66 | [tool.tbump]
67 | github_url = "https://github.com/jupyterhub/batchspawner"
68 | 
69 | [tool.tbump.version]
70 | current = "1.3.1.dev"
71 | regex = '''
72 |     (?P<major>\d+)
73 |     \.
74 |     (?P<minor>\d+)
75 |     \.
76 |     (?P<patch>\d+)
77 |     (?P<pre>((a|b|rc)\d+)|)
78 |     \.?
79 |     (?P<dev>(?<=\.)dev\d*|)
80 | '''
81 | 
82 | [tool.tbump.git]
83 | message_template = "Bump to {new_version}"
84 | tag_template = "v{new_version}"
85 | 
86 | [[tool.tbump.file]]
87 | src = "setup.py"
88 | 
89 | [[tool.tbump.file]]
90 | src = "batchspawner/_version.py"
91 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open("README.md") as f:
 4 |     long_description = f.read()
 5 | 
 6 | setup(
 7 |     name="batchspawner",
 8 |     entry_points={
 9 |         "console_scripts": ["batchspawner-singleuser=batchspawner.singleuser:main"],
10 |     },
11 |     packages=["batchspawner"],
12 |     version="1.3.1.dev",
13 |     description="""Batchspawner: A spawner for Jupyterhub to spawn notebooks using batch resource managers.""",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     author="Michael Milligan, Andrea Zonca, Mike Gilbert",
17 |     author_email="milligan@umn.edu",
18 |     url="http://jupyter.org",
19 |     license="BSD",
20 |     platforms="Linux, Mac OS X",
21 |     keywords=["Interactive", "Interpreter", "Shell", "Web", "Jupyter"],
22 |     classifiers=[
23 |         "Intended Audience :: Developers",
24 |         "Intended Audience :: System Administrators",
25 |         "Intended Audience :: Science/Research",
26 |         "License :: OSI Approved :: BSD License",
27 |         "Programming Language :: Python",
28 |         "Programming Language :: Python :: 3",
29 |     ],
30 |     project_urls={
31 |         "Bug Reports": "https://github.com/jupyterhub/batchspawner/issues",
32 |         "Source": "https://github.com/jupyterhub/batchspawner/",
33 |         "About Jupyterhub": "http://jupyterhub.readthedocs.io/en/latest/",
34 |         "Jupyter Project": "http://jupyter.org",
35 |     },
36 |     python_requires=">=3.6",
37 |     install_require={
38 |         "jinja2",
39 |         "jupyterhub>=1.5.1",
40 |     },
41 |     extras_require={
42 |         "test": [
43 |             "pytest",
44 |             "pytest-asyncio",
45 |             "pytest-cov",
46 |             "notebook",
47 |         ],
48 |     },
49 | )
50 | 


--------------------------------------------------------------------------------