├── batchspawner
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ └── test_spawners.py
├── __init__.py
├── singleuser.py
├── api.py
└── batchspawner.py
├── requirements.txt
├── MANIFEST.in
├── .gitignore
├── CONTRIBUTING.md
├── version.py
├── .flake8
├── .github
└── workflows
│ ├── python-publish.yml
│ └── test.yml
├── LICENSE
├── .pre-commit-config.yaml
├── SPAWNERS.md
├── setup.py
├── CHANGELOG.md
└── README.md
/batchspawner/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | async_generator>=1.8
2 | jinja2
3 | jupyterhub>=0.9
4 |
--------------------------------------------------------------------------------
/batchspawner/__init__.py:
--------------------------------------------------------------------------------
1 | from .batchspawner import *
2 | from . import api
3 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | include version.py
4 | include requirements.txt
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info/
2 | *.log
3 | *.pyc
4 | __pycache__/
5 | .cache/
6 | .coverage
7 | .pytest_cache
8 | *~
9 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Welcome! As a [Jupyter](https://jupyter.org) project, we follow the [Jupyter contributor guide](https://jupyter.readthedocs.io/en/latest/contributing/content-contributor.html).
4 |
--------------------------------------------------------------------------------
/version.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Jupyter Development Team.
2 | # Distributed under the terms of the Modified BSD License.
3 |
4 | version_info = (
5 | 1,
6 | 2,
7 | 0,
8 | # "dev", # comment-out this line for a release
9 | )
10 | __version__ = ".".join(map(str, version_info))
11 |
--------------------------------------------------------------------------------
/batchspawner/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """Relevant pytest fixtures are re-used from JupyterHub's test suite"""
2 |
3 | # We only use "db" and "io_loop", but we also need event_loop which is used by
4 | # io_loop to be available with jupyterhub 1+.
5 | from jupyterhub.tests.conftest import db, io_loop
6 |
7 | try:
8 | from jupyterhub.tests.conftest import event_loop
9 | except:
10 | pass
11 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # Ignore style and complexity
3 | # E: style errors
4 | # W: style warnings
5 | # C: complexity
6 | # F401: module imported but unused
7 | # F403: import *
8 | # F811: redefinition of unused `name` from line `N`
9 | # F841: local variable assigned but never used
10 | # E402: module level import not at top of file
11 | # I100: Import statements are in the wrong order
12 | # I101: Imported names are in the wrong order. Should be
13 | ignore = E, W, C, F401, F403, F811, F841, E402, I100, I101, D400
14 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 | #
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [released]
9 |
10 | jobs:
11 | deploy:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v3
16 | - uses: actions/setup-python@v3
17 | with:
18 | python-version: "3.x"
19 |
20 | - name: install build package
21 | run: |
22 | pip install --upgrade pip
23 | pip install build
24 | pip freeze
25 |
26 | - name: build release
27 | run: |
28 | python -m build --sdist --wheel .
29 | ls -l dist
30 | sha256sum dist/* | tee SHA256SUMS
31 |
32 | - name: Publish to PyPI
33 | env:
34 | TWINE_USERNAME: __token__
35 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
36 | run: |
37 | pip install twine
38 | twine upload --skip-existing dist/*
39 |
--------------------------------------------------------------------------------
/batchspawner/singleuser.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from runpy import run_path
5 | from shutil import which
6 |
7 | from jupyterhub.utils import random_port, url_path_join
8 | from jupyterhub.services.auth import HubAuth
9 |
10 | import requests
11 |
12 |
13 | def main(argv=None):
14 | port = random_port()
15 | hub_auth = HubAuth()
16 | hub_auth.client_ca = os.environ.get("JUPYTERHUB_SSL_CLIENT_CA", "")
17 | hub_auth.certfile = os.environ.get("JUPYTERHUB_SSL_CERTFILE", "")
18 | hub_auth.keyfile = os.environ.get("JUPYTERHUB_SSL_KEYFILE", "")
19 |
20 | url = url_path_join(hub_auth.api_url, "batchspawner")
21 | headers = {"Authorization": f"token {hub_auth.api_token}"}
22 |
23 | # internal_ssl kwargs
24 | kwargs = {}
25 | if hub_auth.certfile and hub_auth.keyfile:
26 | kwargs["cert"] = (hub_auth.certfile, hub_auth.keyfile)
27 | if hub_auth.client_ca:
28 | kwargs["verify"] = hub_auth.client_ca
29 |
30 | r = requests.post(
31 | url,
32 | headers={"Authorization": f"token {hub_auth.api_token}"},
33 | json={"port": port},
34 | **kwargs,
35 | )
36 |
37 | cmd_path = which(sys.argv[1])
38 | sys.argv = sys.argv[1:] + ["--port={}".format(port)]
39 | run_path(cmd_path, run_name="__main__")
40 |
41 |
42 | if __name__ == "__main__":
43 | main()
44 |
--------------------------------------------------------------------------------
/batchspawner/api.py:
--------------------------------------------------------------------------------
1 | import json
2 | from tornado import web
3 | from jupyterhub.apihandlers import APIHandler, default_handlers
4 | from batchspawner import BatchSpawnerBase
5 |
6 |
7 | class BatchSpawnerAPIHandler(APIHandler):
8 | @web.authenticated
9 | def post(self):
10 | """POST set user spawner data"""
11 | if hasattr(self, "current_user"):
12 | # Jupyterhub compatability, (september 2018, d79a99323ef1d)
13 | user = self.current_user
14 | else:
15 | # Previous jupyterhub, 0.9.4 and before.
16 | user = self.get_current_user()
17 | token = self.get_auth_token()
18 | spawner = None
19 | for s in user.spawners.values():
20 | if s.api_token == token:
21 | spawner = s
22 | # fix for when spawner is not batchspawner.
23 | # unsure if you can link properties between two classes
24 | while not issubclass(spawner.__class__,BatchSpawnerBase):
25 | if hasattr(s,"child_spawner"):
26 | spawner = spawner.child_spawner
27 | break
28 | data = self.get_json_body()
29 | for key, value in data.items():
30 | if hasattr(spawner, key):
31 | setattr(spawner, key, value)
32 | self.finish(json.dumps({"message": "BatchSpawner data configured"}))
33 | self.set_status(201)
34 |
35 | default_handlers.append((r"/api/batchspawner", BatchSpawnerAPIHandler))
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2017, Project Jupyter Contributors
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # pre-commit is a tool to perform a predefined set of tasks manually and/or
2 | # automatically before git commits are made.
3 | #
4 | # Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level
5 | #
6 | # Common tasks
7 | #
8 | # - Run on all files: pre-commit run --all-files
9 | # - Register git hooks: pre-commit install --install-hooks
10 | #
11 | repos:
12 | # Autoformat: Python code
13 | - repo: https://github.com/psf/black
14 | rev: "23.9.1"
15 | hooks:
16 | - id: black
17 | args:
18 | - --target-version=py36
19 | - --target-version=py37
20 | - --target-version=py38
21 | - --target-version=py39
22 | - --target-version=py310
23 | - --target-version=py311
24 |
25 | # Autoformat: markdown, yaml
26 | - repo: https://github.com/pre-commit/mirrors-prettier
27 | rev: v3.0.3
28 | hooks:
29 | - id: prettier
30 |
31 | # Lint: Python code
32 | - repo: https://github.com/PyCQA/flake8
33 | rev: "6.1.0"
34 | hooks:
35 | - id: flake8
36 |
37 | # Misc...
38 | - repo: https://github.com/pre-commit/pre-commit-hooks
39 | rev: v4.4.0
40 | # ref: https://github.com/pre-commit/pre-commit-hooks#hooks-available
41 | hooks:
42 | # Autoformat: Makes sure files end in a newline and only a newline.
43 | - id: end-of-file-fixer
44 |
45 | # Autoformat: Sorts entries in requirements.txt.
46 | - id: requirements-txt-fixer
47 |
48 | # Prevent giant (500kB) files from being committed.
49 | - id: check-added-large-files
50 |
51 | # Lint: Check for files with names that would conflict on a
52 | # case-insensitive filesystem like MacOS HFS+ or Windows FAT.
53 | - id: check-case-conflict
54 |
55 | # Lint: Checks that non-binary executables have a proper shebang.
56 | - id: check-executables-have-shebangs
57 |
58 | # pre-commit.ci config reference: https://pre-commit.ci/#configuration
59 | ci:
60 | autoupdate_schedule: monthly
61 |
--------------------------------------------------------------------------------
/SPAWNERS.md:
--------------------------------------------------------------------------------
1 | # Notes on specific spawners
2 |
3 | **Spawner maintainers**: Included below are "spawner maintainers",
4 | when available. There aren't official obligations, but the general
5 | idea is that you should watch the repository and feel especially
6 | empowered to comment on issues when you think it might be relevant to
7 | you (obviously everyone should be, but this is our attempt at even
8 | more outreach). You should let us know when we break something and
9 | provide a diversity of opinions in general. Submitting PRs and
10 | testing is nice but not required.
11 |
12 | To be listed as a maintainer, just submit an issue or PR adding you,
13 | and please watch the repository on Github.
14 |
15 | ## `TorqueSpawner`
16 |
17 | Maintainers:
18 |
19 | ## `MoabSpawner`
20 |
21 | Subclass of TorqueSpawner
22 |
23 | Maintainers:
24 |
25 | ## `SlurmSpawner`
26 |
27 | Maintainers: @rkdarst
28 |
29 | This spawner enforces the environment if `srun` is used to wrap the
30 | spawner command, which is the default. If you _do_ want user
31 | environment to be used, set `req_srun=''`. However, this is not
32 | perfect: there is still a bash shell begun as the user which could run
33 | arbitrary startup, define shell aliases for `srun`, etc.
34 |
35 | Use of `srun` is required to gracefully terminate.
36 |
37 | ## `GridengineSpawner`
38 |
39 | Maintainers:
40 |
41 | ## `CondorSpawner`
42 |
43 | Maintainers:
44 |
45 | ## `LsfSpawner`
46 |
47 | Maintainers:
48 |
49 | # Checklist for making spawners
50 |
51 | Please document each of these things under the spawner list above, -
52 | even if it is "OK", we need to track status of all spawners. If it is
53 | a bug, users really need to know.
54 |
55 | - Does your spawner read shell environment before starting? (See
56 | [Jupyterhub
57 | Security](https://jupyterhub.readthedocs.io/en/stable/reference/websecurity.html).
58 |
59 | - Does your spawner send SIGTERM to the jupyterhub-singleuser process
60 | before SIGKILL? It should, so that the process can terminate
61 | gracefully. Add `echo "terminated gracefully"` to the end of the
62 | batch script - if you see this in your singleuser server output, you
63 | know that you DO receive SIGTERM and terminate gracefully. If your
64 | batch system can not automatically send SIGTERM before SIGKILL, PR
65 | #75 might help here, ask for it to be finished.
66 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # Copyright (c) Jupyter Development Team.
5 | # Distributed under the terms of the Modified BSD License.
6 |
7 | # -----------------------------------------------------------------------------
8 | # Minimal Python version sanity check (from IPython/Jupyterhub)
9 | # -----------------------------------------------------------------------------
10 |
11 | from __future__ import print_function
12 |
13 | import os
14 | import sys
15 |
16 | from setuptools import setup
17 | from glob import glob
18 |
19 | pjoin = os.path.join
20 | here = os.path.abspath(os.path.dirname(__file__))
21 |
22 | # Get the current package version.
23 | version_ns = {}
24 | with open(pjoin(here, "version.py")) as f:
25 | exec(f.read(), {}, version_ns)
26 |
27 | with open(pjoin(here, "README.md"), encoding="utf-8") as f:
28 | long_desc = f.read()
29 |
30 | setup_args = dict(
31 | name="batchspawner",
32 | entry_points={
33 | "console_scripts": ["batchspawner-singleuser=batchspawner.singleuser:main"],
34 | },
35 | packages=["batchspawner"],
36 | version=version_ns["__version__"],
37 | description="""Batchspawner: A spawner for Jupyterhub to spawn notebooks using batch resource managers.""",
38 | long_description=long_desc,
39 | long_description_content_type="text/markdown",
40 | author="Michael Milligan, Andrea Zonca, Mike Gilbert",
41 | author_email="milligan@umn.edu",
42 | url="http://jupyter.org",
43 | license="BSD",
44 | platforms="Linux, Mac OS X",
45 | python_requires="~=3.5",
46 | keywords=["Interactive", "Interpreter", "Shell", "Web", "Jupyter"],
47 | classifiers=[
48 | "Intended Audience :: Developers",
49 | "Intended Audience :: System Administrators",
50 | "Intended Audience :: Science/Research",
51 | "License :: OSI Approved :: BSD License",
52 | "Programming Language :: Python",
53 | "Programming Language :: Python :: 3",
54 | ],
55 | project_urls={
56 | "Bug Reports": "https://github.com/jupyterhub/batchspawner/issues",
57 | "Source": "https://github.com/jupyterhub/batchspawner/",
58 | "About Jupyterhub": "http://jupyterhub.readthedocs.io/en/latest/",
59 | "Jupyter Project": "http://jupyter.org",
60 | },
61 | )
62 |
63 | # setuptools requirements
64 | if "setuptools" in sys.modules:
65 | setup_args["install_requires"] = install_requires = []
66 | with open("requirements.txt") as f:
67 | for line in f.readlines():
68 | req = line.strip()
69 | if not req or req.startswith(("-e", "#")):
70 | continue
71 | install_requires.append(req)
72 |
73 |
74 | def main():
75 | setup(**setup_args)
76 |
77 |
78 | if __name__ == "__main__":
79 | main()
80 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | # This is a GitHub workflow defining a set of jobs with a set of steps.
2 | # ref: https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-syntax-for-github-actions
3 | #
4 | name: Test
5 |
6 | on:
7 | pull_request:
8 | paths-ignore:
9 | - "**.md"
10 | - "**.yml"
11 | - "**.yaml"
12 | - "!.github/workflows/test.yml"
13 | push:
14 | paths-ignore:
15 | - "**.md"
16 | - "**.yml"
17 | - "**.yaml"
18 | - "!.github/workflows/test.yml"
19 | branches-ignore:
20 | - "dependabot/**"
21 | - "pre-commit-ci-update-config"
22 | tags: ["**"]
23 | workflow_dispatch:
24 |
25 | jobs:
26 | pytest:
27 | name: "Run pytest"
28 | runs-on: ubuntu-20.04
29 | continue-on-error: ${{ matrix.allow_failure }}
30 | strategy:
31 | # Keep running even if one variation of the job fail
32 | fail-fast: false
33 | matrix:
34 | python-version:
35 | - "3.6"
36 | - "3.10"
37 | JHUB_VER:
38 | - "1.0.0"
39 | - "1.5.1"
40 | - "2.3.1"
41 | allow_failure: [false]
42 |
43 | exclude:
44 | # JupyterHub 1.3.0 requires python 3.6+
45 | - JHUB_VER: "1.3.0"
46 | python-version: "3.5"
47 | # JupyterHub 0.9.6 used a deprecated sqlalchemy feature removed in py3.9 environment
48 | - JHUB_VER: "0.9.6"
49 | python-version: "3.9"
50 | include:
51 | - JHUB_VER: "main"
52 | python-version: "3.9"
53 | allow_failure: true
54 | - JHUB_VER: "3.0.0"
55 | python-version: "3.9"
56 | allow_failure: true
57 |
58 | steps:
59 | - uses: actions/checkout@v3
60 | - name: Set up Python ${{ matrix.python-version }}
61 | uses: actions/setup-python@v3
62 | with:
63 | python-version: "${{ matrix.python-version }}"
64 |
65 | - name: Install dependencies
66 | run: |
67 | python -m pip install --upgrade pip
68 | python -m pip install pytest
69 | pip install -r requirements.txt
70 | pip list
71 |
72 | - name: Install nodejs dependencies
73 | run: |
74 | sudo npm install -g configurable-http-proxy
75 |
76 | # We need to check compatibility with different versions of the JH API,
77 | # including latest development. For that, we also need to pull in the
78 | # development dependencies of that old JH version (but we don't need
79 | # conda/npm for our tests).
80 | - name: install JupyterHub
81 | run: |
82 | git clone --quiet --branch ${{ matrix.JHUB_VER }} https://github.com/jupyterhub/jupyterhub.git ./jupyterhub
83 | pip install -r ./jupyterhub/dev-requirements.txt
84 | pip install ./jupyterhub
85 |
86 | - name: pytest
87 | run: |
88 | pytest --verbose --color=yes --last-failed --cov batchspawner batchspawner/tests
89 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## unreleased changes
4 |
5 | Added (user)
6 |
7 | Added (developer)
8 |
9 | Changed
10 |
11 | Fixed
12 |
13 | ## v1.2
14 |
15 | Changed
16 |
17 | - PR #237: Replace use of scripts with entry_points
18 | - PR #208 #238 #239 #240 #241: updates to CI - bumping versions and aligning with Jupyterhub standards
19 | - PR #220: remove code supporting Jupyterhub earlier than 0.9
20 |
21 | Fixed
22 |
23 | - PR #229: LSF jobs with multiple slots display each hostname ':' separated
24 |
25 | ## v1.1
26 |
27 | Added (user)
28 |
29 | - PR #170: SlurmSpawner: add `req_gres` to specify `-go-res`.
30 | - PR #137: GridEngineSpawner: spawner will now add the following system environment values to the spawner environment, in accordance with the Univa Admin Guide: `SGE_CELL`, `SGE_EXECD`, `SGE_ROOT`, `SGE_CLUSTER_NAME`, `SGE_QMASTER_PORT`, `SGE_EXECD_PORT`, `PATH`
31 |
32 | Added (developer)
33 |
34 | - PR #187: support for unknown job state
35 |
36 | Changed
37 |
38 | - PR #177: Fail on first error in batch script by setting `set -e` to script templates.
39 | - PR #165: SlurmSpawner: Update template to use `--chdir` instead of `--workdir`. Users of Slurm older than 17.11 may need to revert this locally.
40 | - PR #189: remove bashism from default script template
41 | - PR #195: fix exception handling in run_command
42 | - PR #198: change from Travis to gh-actions for testing
43 | - PR #196: documentation
44 | - PR #199: update setup.py
45 |
46 | ## v1.0 (requires minimum JupyterHub 0.9 and Python 3.5)
47 |
48 | Added (user)
49 |
50 | - Add support for JupyterHub named servers. #167
51 | - Add Jinja2 templating as an option for all scripts and commands. If '{{' or `{%` is used anywhere in the string, it is used as a jinja2 template.
52 | - Add new option exec_prefix, which defaults to `sudo -E -u {username}`. This replaces explicit `sudo` in every batch command - changes in local commands may be needed.
53 | - New option: `req_keepvars_extra`, which allows keeping extra variables in addition to what is defined by JupyterHub itself (addition of variables to keep instead of replacement). #99
54 | - Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96
55 | - SlurmSpawner: add the `req_reservation` option. #91
56 | - Add basic support for JupyterHub progress updates, but this is not used much yet. #86
57 |
58 | Added (developer)
59 |
60 | - Add many more tests.
61 | - Add a new page `SPAWNERS.md` which information on specific spawners. Begin trying to collect a list of spawner-specific contacts. #97
62 | - Rename `current_ip` and `current_port` commands to `ip` and `port`. No user impact. #139
63 | - Update to Python 3.5 `async` / `await` syntax to support JupyterHub progress updates. #90
64 |
65 | Changed
66 |
67 | - PR #58 and #141 changes logic of port selection, so that it is selected _after_ the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes:
68 | - `jupyterhub_config.py` _must_ explicitely import `batchspawner`
69 | - Add a new option `batchspawner_singleuser_cmd` which is used as a wrapper in the single-user servers, which conveys the remote port back to JupyterHub. This is now an integral part of the spawn process.
70 | - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`.
71 | - Update minimum requirements to JupyterHub 0.9 and Python 3.5. #143
72 | - Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited.
73 | - Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these environment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82.
74 |
75 | Fixed
76 |
77 | - Improve debugging on failed submission by raising errors including error messages from the commands. #106
78 | - Many other non-user or developer visible changes. #107 #106 #100
79 | - In Travis CI, blacklist jsonschema=3.0.0a1 because it breaks tests
80 |
81 | Removed
82 |
83 | ## v0.8.1 (bugfix release)
84 |
85 | - Fix regression: single-user server binding address is overwritten by previous session server address, resulting in failure to start. Issue #76
86 |
87 | ## v0.8.0 (compatible with JupyterHub 0.5.0 through 0.8.1/0.9dev)
88 |
89 | - SlurmSpawner: Remove `--uid` for (at least) Slurm 17.11 compatibility. If you use `sudo`, this should not be necessary, but because this is security related you should check that user management is as you expect. If your configuration does not use `sudo` then you may need to add the `--uid` option in a custom `batch_script`.
90 | - add base options `req_ngpus` `req_partition` `req_account` and `req_options`
91 | - Fix up logging
92 | - Merge `user_options` with the template substitution vars instead of having it as a separate key
93 | - Update ip/port handling for JupyterHub 0.8
94 | - Add `LICENSE` (BSD3) and `CONTRIBUTING.md`
95 | - Add `LsfSpawner` for IBM LFS
96 | - Add `MultiSlurmSpawner`
97 | - Add `MoabSpawner`
98 | - Add `condorSpawner`
99 | - Add `GridEngineSpawner`
100 | - SlurmSpawner: add `req_qos` option
101 | - WrapSpawner and ProfilesSpawner, which provide mechanisms for runtime configuration of spawners, have been split out and moved to the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner) package
102 | - Enable CI testing via Travis-CI
103 |
104 | ## v0.3 (tag: jhub-0.3, compatible with JupyterHub 0.3.0)
105 |
106 | - initial release containing `TorqueSpawner` and `SlurmSpawner`
107 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # batchspawner for Jupyterhub
2 |
3 | [](https://github.com/jupyterhub/batchspawner/actions)
4 | [](https://pypi.python.org/pypi/batchspawner)
5 | [](https://github.com/jupyterhub/batchspawner/issues)
6 | [](https://discourse.jupyter.org/c/jupyterhub)
7 | [](https://gitter.im/jupyterhub/jupyterhub)
8 | [](https://github.com/jupyterhub/batchspawner/blob/master/CONTRIBUTING.md)
9 |
10 | This is a custom spawner for [Jupyterhub](https://jupyterhub.readthedocs.io/) that is designed for installations on clusters using batch scheduling software.
11 |
12 | This began as a generalization of [mkgilbert's batchspawner](https://github.com/mkgilbert/slurmspawner) which in turn was inspired by [Andrea Zonca's blog post](http://zonca.github.io/2015/04/jupyterhub-hpc.html "Run jupyterhub on a Supercomputer") where he explains his implementation for a spawner that uses SSH and Torque. His github repo is found [here](http://www.github.com/zonca/remotespawner "RemoteSpawner").
13 |
14 | This package formerly included WrapSpawner and ProfilesSpawner, which provide mechanisms for runtime configuration of spawners. These have been split out and moved to the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner) package.
15 |
16 | ## Installation
17 |
18 | 1. from root directory of this repo (where setup.py is), run `pip install -e .`
19 |
20 | If you don't actually need an editable version, you can simply run
21 | `pip install batchspawner`
22 |
23 | 2. add lines in jupyterhub_config.py for the spawner you intend to use, e.g.
24 |
25 | ```python
26 | c = get_config()
27 | c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner'
28 | import batchspawner # Even though not used, needed to register batchspawner interface
29 | ```
30 |
31 | 3. Depending on the spawner, additional configuration will likely be needed.
32 |
33 | ## Batch Spawners
34 |
35 | For information on the specific spawners, see [SPAWNERS.md](SPAWNERS.md).
36 |
37 | ### Overview
38 |
39 | This file contains an abstraction layer for batch job queueing systems (`BatchSpawnerBase`), and implements
40 | Jupyterhub spawners for Torque, Moab, SLURM, SGE, HTCondor, LSF, and eventually others.
41 | Common attributes of batch submission / resource manager environments will include notions of:
42 |
43 | - queue names, resource manager addresses
44 | - resource limits including runtime, number of processes, memory
45 | - singleuser child process running on (usually remote) host not known until runtime
46 | - job submission and monitoring via resource manager utilities
47 | - remote execution via submission of templated scripts
48 | - job names instead of PIDs
49 |
50 | `BatchSpawnerBase` provides several general mechanisms:
51 |
52 | - configurable traits `req_foo` that are exposed as `{foo}` in job template scripts. Templates (submit scripts in particular) may also use the full power of [jinja2](http://jinja.pocoo.org/). Templates are automatically detected if a `{{` or `{%` is present, otherwise str.format() used.
53 | - configurable command templates for submitting/querying/cancelling jobs
54 | - a generic concept of job-ID and ID-based job state tracking
55 | - overrideable hooks for subclasses to plug in logic at numerous points
56 |
57 | ### Example
58 |
59 | Every effort has been made to accommodate highly diverse systems through configuration
60 | only. This example consists of the (lightly edited) configuration used by the author
61 | to run Jupyter notebooks on an academic supercomputer cluster.
62 |
63 | ```python
64 | # Select the Torque backend and increase the timeout since batch jobs may take time to start
65 | import batchspawner
66 | c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner'
67 | c.Spawner.http_timeout = 120
68 |
69 | #------------------------------------------------------------------------------
70 | # BatchSpawnerBase configuration
71 | # These are simply setting parameters used in the job script template below
72 | #------------------------------------------------------------------------------
73 | c.BatchSpawnerBase.req_nprocs = '2'
74 | c.BatchSpawnerBase.req_queue = 'mesabi'
75 | c.BatchSpawnerBase.req_host = 'mesabi.xyz.edu'
76 | c.BatchSpawnerBase.req_runtime = '12:00:00'
77 | c.BatchSpawnerBase.req_memory = '4gb'
78 | #------------------------------------------------------------------------------
79 | # TorqueSpawner configuration
80 | # The script below is nearly identical to the default template, but we needed
81 | # to add a line for our local environment. For most sites the default templates
82 | # should be a good starting point.
83 | #------------------------------------------------------------------------------
84 | c.TorqueSpawner.batch_script = '''#!/bin/sh
85 | #PBS -q {queue}@{host}
86 | #PBS -l walltime={runtime}
87 | #PBS -l nodes=1:ppn={nprocs}
88 | #PBS -l mem={memory}
89 | #PBS -N jupyterhub-singleuser
90 | #PBS -v {keepvars}
91 | module load python3
92 | {cmd}
93 | '''
94 | # For our site we need to munge the execution hostname returned by qstat
95 | c.TorqueSpawner.state_exechost_exp = r'int-\1.mesabi.xyz.edu'
96 | ```
97 |
98 | ### Security
99 |
100 | Unless otherwise stated for a specific spawner, assume that spawners
101 | _do_ evaluate shell environment for users and thus the [security
102 | requirements of JupyterHub security for untrusted
103 | users](https://jupyterhub.readthedocs.io/en/stable/reference/websecurity.html)
104 | are not fulfilled because some (most?) spawners _do_ start a user
105 | shell which will execute arbitrary user environment configuration
106 | (`.profile`, `.bashrc` and the like) unless users do not have
107 | access to their own cluster user account. This is something which we
108 | are working on.
109 |
110 | ## Provide different configurations of BatchSpawner
111 |
112 | ### Overview
113 |
114 | `ProfilesSpawner`, available as part of the [`wrapspawner`](https://github.com/jupyterhub/wrapspawner)
115 | package, allows the Jupyterhub administrator to define a set of different spawning configurations,
116 | both different spawners and different configurations of the same spawner.
117 | The user is then presented a dropdown menu for choosing the most suitable configuration for their needs.
118 |
119 | This method provides an easy and safe way to provide different configurations of `BatchSpawner` to the
120 | users, see an example below.
121 |
122 | ### Example
123 |
124 | The following is based on the author's configuration (at the same site as the example above)
125 | showing how to give users access to multiple job configurations on the batch scheduled
126 | clusters, as well as an option to run a local notebook directly on the jupyterhub server.
127 |
128 | ```python
129 | # Same initial setup as the previous example
130 | import batchspawner
131 | c.JupyterHub.spawner_class = 'wrapspawner.ProfilesSpawner'
132 | c.Spawner.http_timeout = 120
133 | #------------------------------------------------------------------------------
134 | # BatchSpawnerBase configuration
135 | # Providing default values that we may omit in the profiles
136 | #------------------------------------------------------------------------------
137 | c.BatchSpawnerBase.req_host = 'mesabi.xyz.edu'
138 | c.BatchSpawnerBase.req_runtime = '12:00:00'
139 | c.TorqueSpawner.state_exechost_exp = r'in-\1.mesabi.xyz.edu'
140 | #------------------------------------------------------------------------------
141 | # ProfilesSpawner configuration
142 | #------------------------------------------------------------------------------
143 | # List of profiles to offer for selection. Signature is:
144 | # List(Tuple( Unicode, Unicode, Type(Spawner), Dict ))
145 | # corresponding to profile display name, unique key, Spawner class,
146 | # dictionary of spawner config options.
147 | #
148 | # The first three values will be exposed in the input_template as {display},
149 | # {key}, and {type}
150 | #
151 | c.ProfilesSpawner.profiles = [
152 | ( "Local server", 'local', 'jupyterhub.spawner.LocalProcessSpawner', {'ip':'0.0.0.0'} ),
153 | ('Mesabi - 2 cores, 4 GB, 8 hours', 'mesabi2c4g12h', 'batchspawner.TorqueSpawner',
154 | dict(req_nprocs='2', req_queue='mesabi', req_runtime='8:00:00', req_memory='4gb')),
155 | ('Mesabi - 12 cores, 128 GB, 4 hours', 'mesabi128gb', 'batchspawner.TorqueSpawner',
156 | dict(req_nprocs='12', req_queue='ram256g', req_runtime='4:00:00', req_memory='125gb')),
157 | ('Mesabi - 2 cores, 4 GB, 24 hours', 'mesabi2c4gb24h', 'batchspawner.TorqueSpawner',
158 | dict(req_nprocs='2', req_queue='mesabi', req_runtime='24:00:00', req_memory='4gb')),
159 | ('Interactive Cluster - 2 cores, 4 GB, 8 hours', 'lab', 'batchspawner.TorqueSpawner',
160 | dict(req_nprocs='2', req_host='labhost.xyz.edu', req_queue='lab',
161 | req_runtime='8:00:00', req_memory='4gb', state_exechost_exp='')),
162 | ]
163 | c.ProfilesSpawner.ip = '0.0.0.0'
164 | ```
165 |
166 | ## Debugging batchspawner
167 |
168 | Sometimes it can be hard to debug batchspawner, but it's not really
169 | once you know how the pieces interact. Check the following places for
170 | error messages:
171 |
172 | - Check the JupyterHub logs for errors.
173 |
174 | - Check the JupyterHub logs for the batch script that got submitted
175 | and the command used to submit it. Are these correct? (Note that
176 | there are submission environment variables too, which aren't
177 | displayed.)
178 |
179 | - At this point, it's a matter of checking the batch system. Is the
180 | job ever scheduled? Does it run? Does it succeed? Check the batch
181 | system status and output of the job. The most comon failure
182 | patterns are a) job never starting due to bad scheduler options, b)
183 | job waiting in the queue beyond the `start_timeout`, causing
184 | JupyterHub to kill the job.
185 |
186 | - At this point the job starts. Does it fail immediately, or before
187 | Jupyter starts? Check the scheduler output files (stdout/stderr of
188 | the job), wherever it is stored. To debug the job script, you can
189 | add debugging into the batch script, such as an `env` or `set -x`.
190 |
191 | - At this point Jupyter itself starts - check its error messages. Is
192 | it starting with the right options? Can it communicate with the
193 | hub? At this point there usually isn't anything
194 | batchspawner-specific, with the one exception below. The error log
195 | would be in the batch script output (same file as above). There may
196 | also be clues in the JupyterHub logfile.
197 | - Are you running on an NFS filesystem? It's possible for Jupyter to
198 | experience issues due to varying implementations of the fcntl() system
199 | call. (See also [Jupyterhub-Notes and Tips: SQLite](https://jupyterhub.readthedocs.io/en/latest/reference/database.html?highlight=NFS#sqlite))
200 |
201 | Common problems:
202 |
203 | - Did you `import batchspawner` in the `jupyterhub_config.py` file?
204 | This is needed in order to activate the batchspawer API in
205 | JupyterHub.
206 |
207 | ## Changelog
208 |
209 | See [CHANGELOG.md](CHANGELOG.md).
210 |
--------------------------------------------------------------------------------
/batchspawner/tests/test_spawners.py:
--------------------------------------------------------------------------------
1 | """Test BatchSpawner and subclasses"""
2 |
3 | import re
4 | from unittest import mock
5 | from .. import BatchSpawnerRegexStates, JobStatus
6 | from traitlets import Unicode
7 | import time
8 | import pytest
9 | from jupyterhub import orm
10 | from tornado import gen
11 |
12 | try:
13 | from jupyterhub.objects import Hub, Server
14 | from jupyterhub.user import User
15 | except:
16 | pass
17 |
18 | testhost = "userhost123"
19 | testjob = "12345"
20 | testport = 54321
21 |
22 |
23 | class BatchDummy(BatchSpawnerRegexStates):
24 | exec_prefix = ""
25 | batch_submit_cmd = Unicode("cat > /dev/null; echo " + testjob)
26 | batch_query_cmd = Unicode("echo RUN " + testhost)
27 | batch_cancel_cmd = Unicode("echo STOP")
28 | batch_script = Unicode("{cmd}")
29 | state_pending_re = Unicode("PEND")
30 | state_running_re = Unicode("RUN")
31 | state_exechost_re = Unicode("RUN (.*)$")
32 | state_unknown_re = Unicode("UNKNOWN")
33 |
34 | cmd_expectlist = None
35 | out_expectlist = None
36 |
37 | def run_command(self, *args, **kwargs):
38 | """Overwriten run command to test templating and outputs"""
39 | cmd = args[0]
40 | # Test that the command matches the expectations
41 | if self.cmd_expectlist:
42 | run_re = self.cmd_expectlist.pop(0)
43 | if run_re:
44 | print("run:", run_re)
45 | assert (
46 | run_re.search(cmd) is not None
47 | ), "Failed test: re={0} cmd={1}".format(run_re, cmd)
48 | # Run command normally
49 | out = super().run_command(*args, **kwargs)
50 | # Test that the command matches the expectations
51 | if self.out_expectlist:
52 | out_re = self.out_expectlist.pop(0)
53 | if out_re:
54 | print("out:", out_re)
55 | assert (
56 | out_re.search(cmd) is not None
57 | ), "Failed output: re={0} cmd={1} out={2}".format(out_re, cmd, out)
58 | return out
59 |
60 |
61 | def new_spawner(db, spawner_class=BatchDummy, **kwargs):
62 | kwargs.setdefault("cmd", ["singleuser_command"])
63 | user = db.query(orm.User).first()
64 | hub = Hub()
65 | user = User(user, {})
66 | server = Server()
67 | # Set it after constructions because it isn't a traitlet.
68 | kwargs.setdefault("hub", hub)
69 | kwargs.setdefault("user", user)
70 | kwargs.setdefault("poll_interval", 1)
71 |
72 | # These are not traitlets so we have to set them here
73 | spawner = user._new_spawner("", spawner_class=spawner_class, **kwargs)
74 | spawner.server = server
75 | spawner.mock_port = testport
76 | return spawner
77 |
78 |
79 | @pytest.mark.slow
80 | def test_stress_submit(db, io_loop):
81 | for i in range(200):
82 | time.sleep(0.01)
83 | test_spawner_start_stop_poll(db, io_loop)
84 |
85 |
86 | def check_ip(spawner, value):
87 | assert spawner.ip == value
88 |
89 |
90 | def test_spawner_start_stop_poll(db, io_loop):
91 | spawner = new_spawner(db=db)
92 |
93 | status = io_loop.run_sync(spawner.poll, timeout=5)
94 | assert status == 1
95 | assert spawner.job_id == ""
96 | assert spawner.get_state() == {}
97 |
98 | io_loop.run_sync(spawner.start, timeout=5)
99 | check_ip(spawner, testhost)
100 | assert spawner.job_id == testjob
101 |
102 | status = io_loop.run_sync(spawner.poll, timeout=5)
103 | assert status is None
104 | spawner.batch_query_cmd = "echo NOPE"
105 | io_loop.run_sync(spawner.stop, timeout=5)
106 | status = io_loop.run_sync(spawner.poll, timeout=5)
107 | assert status == 1
108 | assert spawner.get_state() == {}
109 |
110 |
111 | def test_spawner_state_reload(db, io_loop):
112 | spawner = new_spawner(db=db)
113 | assert spawner.get_state() == {}
114 |
115 | io_loop.run_sync(spawner.start, timeout=30)
116 | check_ip(spawner, testhost)
117 | assert spawner.job_id == testjob
118 |
119 | state = spawner.get_state()
120 | assert state == dict(job_id=testjob, job_status="RUN " + testhost)
121 | spawner = new_spawner(db=db)
122 | spawner.clear_state()
123 | assert spawner.get_state() == {}
124 | spawner.load_state(state)
125 | # We used to check IP here, but that is actually only computed on start(),
126 | # and is not part of the spawner's persistent state
127 | assert spawner.job_id == testjob
128 |
129 |
130 | def test_submit_failure(db, io_loop):
131 | spawner = new_spawner(db=db)
132 | assert spawner.get_state() == {}
133 | spawner.batch_submit_cmd = "cat > /dev/null; true"
134 | with pytest.raises(RuntimeError) as e_info:
135 | io_loop.run_sync(spawner.start, timeout=30)
136 | assert spawner.job_id == ""
137 | assert spawner.job_status == ""
138 |
139 |
140 | def test_submit_pending_fails(db, io_loop):
141 | """Submission works, but the batch query command immediately fails"""
142 | spawner = new_spawner(db=db)
143 | assert spawner.get_state() == {}
144 | spawner.batch_query_cmd = "echo xyz"
145 | with pytest.raises(RuntimeError) as e_info:
146 | io_loop.run_sync(spawner.start, timeout=30)
147 | status = io_loop.run_sync(spawner.query_job_status, timeout=30)
148 | assert status == JobStatus.NOTFOUND
149 | assert spawner.job_id == ""
150 | assert spawner.job_status == ""
151 |
152 |
153 | def test_poll_fails(db, io_loop):
154 | """Submission works, but a later .poll() fails"""
155 | spawner = new_spawner(db=db)
156 | assert spawner.get_state() == {}
157 | # The start is successful:
158 | io_loop.run_sync(spawner.start, timeout=30)
159 | spawner.batch_query_cmd = "echo xyz"
160 | # Now, the poll fails:
161 | io_loop.run_sync(spawner.poll, timeout=30)
162 | # .poll() will run self.clear_state() if it's not found:
163 | assert spawner.job_id == ""
164 | assert spawner.job_status == ""
165 |
166 |
167 | def test_unknown_status(db, io_loop):
168 | """Polling returns an unknown status"""
169 | spawner = new_spawner(db=db)
170 | assert spawner.get_state() == {}
171 | # The start is successful:
172 | io_loop.run_sync(spawner.start, timeout=30)
173 | spawner.batch_query_cmd = "echo UNKNOWN"
174 | # This poll should not fail:
175 | io_loop.run_sync(spawner.poll, timeout=30)
176 | status = io_loop.run_sync(spawner.query_job_status, timeout=30)
177 | assert status == JobStatus.UNKNOWN
178 | assert spawner.job_id == "12345"
179 | assert spawner.job_status != ""
180 |
181 |
182 | def test_templates(db, io_loop):
183 | """Test templates in the run_command commands"""
184 | spawner = new_spawner(db=db)
185 |
186 | # Test when not running
187 | spawner.cmd_expectlist = [
188 | re.compile(".*RUN"),
189 | ]
190 | status = io_loop.run_sync(spawner.poll, timeout=5)
191 | assert status == 1
192 | assert spawner.job_id == ""
193 | assert spawner.get_state() == {}
194 |
195 | # Test starting
196 | spawner.cmd_expectlist = [
197 | re.compile(".*echo"),
198 | re.compile(".*RUN"),
199 | ]
200 | io_loop.run_sync(spawner.start, timeout=5)
201 | check_ip(spawner, testhost)
202 | assert spawner.job_id == testjob
203 |
204 | # Test poll - running
205 | spawner.cmd_expectlist = [
206 | re.compile(".*RUN"),
207 | ]
208 | status = io_loop.run_sync(spawner.poll, timeout=5)
209 | assert status is None
210 |
211 | # Test stopping
212 | spawner.batch_query_cmd = "echo NOPE"
213 | spawner.cmd_expectlist = [
214 | re.compile(".*STOP"),
215 | re.compile(".*NOPE"),
216 | ]
217 | io_loop.run_sync(spawner.stop, timeout=5)
218 | status = io_loop.run_sync(spawner.poll, timeout=5)
219 | assert status == 1
220 | assert spawner.get_state() == {}
221 |
222 |
223 | def test_batch_script(db, io_loop):
224 | """Test that the batch script substitutes {cmd}"""
225 |
226 | class BatchDummyTestScript(BatchDummy):
227 | @gen.coroutine
228 | def _get_batch_script(self, **subvars):
229 | script = yield super()._get_batch_script(**subvars)
230 | assert "singleuser_command" in script
231 | return script
232 |
233 | spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript)
234 | # status = io_loop.run_sync(spawner.poll, timeout=5)
235 | io_loop.run_sync(spawner.start, timeout=5)
236 | # status = io_loop.run_sync(spawner.poll, timeout=5)
237 | # io_loop.run_sync(spawner.stop, timeout=5)
238 |
239 |
240 | def test_exec_prefix(db, io_loop):
241 | """Test that all run_commands have exec_prefix"""
242 |
243 | class BatchDummyTestScript(BatchDummy):
244 | exec_prefix = "PREFIX"
245 |
246 | @gen.coroutine
247 | def run_command(self, cmd, *args, **kwargs):
248 | assert cmd.startswith("PREFIX ")
249 | cmd = cmd[7:]
250 | print(cmd)
251 | out = yield super().run_command(cmd, *args, **kwargs)
252 | return out
253 |
254 | spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript)
255 | # Not running
256 | status = io_loop.run_sync(spawner.poll, timeout=5)
257 | assert status == 1
258 | # Start
259 | io_loop.run_sync(spawner.start, timeout=5)
260 | assert spawner.job_id == testjob
261 | # Poll
262 | status = io_loop.run_sync(spawner.poll, timeout=5)
263 | assert status is None
264 | # Stop
265 | spawner.batch_query_cmd = "echo NOPE"
266 | io_loop.run_sync(spawner.stop, timeout=5)
267 | status = io_loop.run_sync(spawner.poll, timeout=5)
268 | assert status == 1
269 |
270 |
271 | def run_spawner_script(
272 | db, io_loop, spawner, script, batch_script_re_list=None, spawner_kwargs={}
273 | ):
274 | """Run a spawner script and test that the output and behavior is as expected.
275 |
276 | db: same as in this module
277 | io_loop: same as in this module
278 | spawner: the BatchSpawnerBase subclass to test
279 | script: list of (input_re_to_match, output)
280 | batch_script_re_list: if given, assert batch script matches all of these
281 | """
282 | # Create the expected scripts
283 | cmd_expectlist, out_list = zip(*script)
284 | cmd_expectlist = list(cmd_expectlist)
285 | out_list = list(out_list)
286 |
287 | class BatchDummyTestScript(spawner):
288 | @gen.coroutine
289 | def run_command(self, cmd, input=None, env=None):
290 | # Test the input
291 | run_re = cmd_expectlist.pop(0)
292 | if run_re:
293 | print('run: "{}" [{}]'.format(cmd, run_re))
294 | assert (
295 | run_re.search(cmd) is not None
296 | ), "Failed test: re={0} cmd={1}".format(run_re, cmd)
297 | # Test the stdin - will only be the batch script. For
298 | # each regular expression in batch_script_re_list, assert that
299 | # each re in that list matches the batch script.
300 | if batch_script_re_list and input:
301 | batch_script = input
302 | for match_re in batch_script_re_list:
303 | assert (
304 | match_re.search(batch_script) is not None
305 | ), "Batch script does not match {}".format(match_re)
306 | # Return expected output.
307 | out = out_list.pop(0)
308 | print(" --> " + out)
309 | return out
310 |
311 | spawner = new_spawner(db=db, spawner_class=BatchDummyTestScript, **spawner_kwargs)
312 | # Not running at beginning (no command run)
313 | status = io_loop.run_sync(spawner.poll, timeout=5)
314 | assert status == 1
315 | # batch_submit_cmd
316 | # batch_query_cmd (result=pending)
317 | # batch_query_cmd (result=running)
318 | io_loop.run_sync(spawner.start, timeout=5)
319 | assert spawner.job_id == testjob
320 | check_ip(spawner, testhost)
321 | # batch_query_cmd
322 | status = io_loop.run_sync(spawner.poll, timeout=5)
323 | assert status is None
324 | # batch_cancel_cmd
325 | io_loop.run_sync(spawner.stop, timeout=5)
326 | # batch_poll_cmd
327 | status = io_loop.run_sync(spawner.poll, timeout=5)
328 | assert status == 1
329 |
330 |
331 | def test_torque(db, io_loop):
332 | spawner_kwargs = {
333 | "req_nprocs": "5",
334 | "req_memory": "5678",
335 | "req_options": "some_option_asdf",
336 | "req_prologue": "PROLOGUE",
337 | "req_epilogue": "EPILOGUE",
338 | }
339 | batch_script_re_list = [
340 | re.compile(
341 | r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
342 | re.S | re.M,
343 | ),
344 | re.compile(r"mem=5678"),
345 | re.compile(r"ppn=5"),
346 | re.compile(r"^#PBS some_option_asdf", re.M),
347 | ]
348 | script = [
349 | (re.compile(r"sudo.*qsub"), str(testjob)),
350 | (
351 | re.compile(r"sudo.*qstat"),
352 | "Q",
353 | ), # pending
354 | (
355 | re.compile(r"sudo.*qstat"),
356 | "R{}/1".format(testhost),
357 | ), # running
358 | (
359 | re.compile(r"sudo.*qstat"),
360 | "R{}/1".format(testhost),
361 | ), # running
362 | (re.compile(r"sudo.*qdel"), "STOP"),
363 | (re.compile(r"sudo.*qstat"), ""),
364 | ]
365 | from .. import TorqueSpawner
366 |
367 | run_spawner_script(
368 | db,
369 | io_loop,
370 | TorqueSpawner,
371 | script,
372 | batch_script_re_list=batch_script_re_list,
373 | spawner_kwargs=spawner_kwargs,
374 | )
375 |
376 |
377 | def test_moab(db, io_loop):
378 | spawner_kwargs = {
379 | "req_nprocs": "5",
380 | "req_memory": "5678",
381 | "req_options": "some_option_asdf",
382 | "req_prologue": "PROLOGUE",
383 | "req_epilogue": "EPILOGUE",
384 | }
385 | batch_script_re_list = [
386 | re.compile(
387 | r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
388 | re.S | re.M,
389 | ),
390 | re.compile(r"mem=5678"),
391 | re.compile(r"ppn=5"),
392 | re.compile(r"^#PBS some_option_asdf", re.M),
393 | ]
394 | script = [
395 | (re.compile(r"sudo.*msub"), str(testjob)),
396 | (re.compile(r"sudo.*mdiag"), 'State="Idle"'), # pending
397 | (
398 | re.compile(r"sudo.*mdiag"),
399 | 'State="Running" AllocNodeList="{}"'.format(testhost),
400 | ), # running
401 | (
402 | re.compile(r"sudo.*mdiag"),
403 | 'State="Running" AllocNodeList="{}"'.format(testhost),
404 | ), # running
405 | (re.compile(r"sudo.*mjobctl.*-c"), "STOP"),
406 | (re.compile(r"sudo.*mdiag"), ""),
407 | ]
408 | from .. import MoabSpawner
409 |
410 | run_spawner_script(
411 | db,
412 | io_loop,
413 | MoabSpawner,
414 | script,
415 | batch_script_re_list=batch_script_re_list,
416 | spawner_kwargs=spawner_kwargs,
417 | )
418 |
419 |
420 | def test_pbs(db, io_loop):
421 | spawner_kwargs = {
422 | "req_nprocs": "4",
423 | "req_memory": "10256",
424 | "req_options": "some_option_asdf",
425 | "req_host": "some_pbs_admin_node",
426 | "req_runtime": "08:00:00",
427 | }
428 | batch_script_re_list = [
429 | re.compile(r"singleuser_command"),
430 | re.compile(r"select=1"),
431 | re.compile(r"ncpus=4"),
432 | re.compile(r"mem=10256"),
433 | re.compile(r"walltime=08:00:00"),
434 | re.compile(r"@some_pbs_admin_node"),
435 | re.compile(r"^#PBS some_option_asdf", re.M),
436 | ]
437 | script = [
438 | (re.compile(r"sudo.*qsub"), str(testjob)),
439 | (re.compile(r"sudo.*qstat"), "job_state = Q"), # pending
440 | (
441 | re.compile(r"sudo.*qstat"),
442 | "job_state = R\nexec_host = {}/2*1".format(testhost),
443 | ), # running
444 | (
445 | re.compile(r"sudo.*qstat"),
446 | "job_state = R\nexec_host = {}/2*1".format(testhost),
447 | ), # running
448 | (re.compile(r"sudo.*qdel"), "STOP"),
449 | (re.compile(r"sudo.*qstat"), ""),
450 | ]
451 | from .. import PBSSpawner
452 |
453 | run_spawner_script(
454 | db,
455 | io_loop,
456 | PBSSpawner,
457 | script,
458 | batch_script_re_list=batch_script_re_list,
459 | spawner_kwargs=spawner_kwargs,
460 | )
461 |
462 |
463 | def test_slurm(db, io_loop):
464 | spawner_kwargs = {
465 | "req_runtime": "3-05:10:10",
466 | "req_nprocs": "5",
467 | "req_memory": "5678",
468 | "req_options": "some_option_asdf",
469 | "req_prologue": "PROLOGUE",
470 | "req_epilogue": "EPILOGUE",
471 | "req_reservation": "RES123",
472 | "req_gres": "GRES123",
473 | }
474 | batch_script_re_list = [
475 | re.compile(
476 | r"PROLOGUE.*srun batchspawner-singleuser singleuser_command.*EPILOGUE", re.S
477 | ),
478 | re.compile(r"^#SBATCH \s+ --cpus-per-task=5", re.X | re.M),
479 | re.compile(r"^#SBATCH \s+ --time=3-05:10:10", re.X | re.M),
480 | re.compile(r"^#SBATCH \s+ some_option_asdf", re.X | re.M),
481 | re.compile(r"^#SBATCH \s+ --reservation=RES123", re.X | re.M),
482 | re.compile(r"^#SBATCH \s+ --gres=GRES123", re.X | re.M),
483 | ]
484 | from .. import SlurmSpawner
485 |
486 | run_spawner_script(
487 | db,
488 | io_loop,
489 | SlurmSpawner,
490 | normal_slurm_script,
491 | batch_script_re_list=batch_script_re_list,
492 | spawner_kwargs=spawner_kwargs,
493 | )
494 |
495 |
496 | # We tend to use slurm as our typical example job. These allow quick
497 | # Slurm examples.
498 | normal_slurm_script = [
499 | (re.compile(r"sudo.*sbatch"), str(testjob)),
500 | (re.compile(r"sudo.*squeue"), "PENDING "), # pending
501 | (
502 | re.compile(r"sudo.*squeue"),
503 | "slurm_load_jobs error: Unable to contact slurm controller",
504 | ), # unknown
505 | (re.compile(r"sudo.*squeue"), "RUNNING " + testhost), # running
506 | (re.compile(r"sudo.*squeue"), "RUNNING " + testhost),
507 | (re.compile(r"sudo.*scancel"), "STOP"),
508 | (re.compile(r"sudo.*squeue"), ""),
509 | ]
510 | from .. import SlurmSpawner
511 |
512 |
513 | def run_typical_slurm_spawner(
514 | db,
515 | io_loop,
516 | spawner=SlurmSpawner,
517 | script=normal_slurm_script,
518 | batch_script_re_list=None,
519 | spawner_kwargs={},
520 | ):
521 | """Run a full slurm job with default (overrideable) parameters.
522 |
523 | This is useful, for example, for changing options and testing effect
524 | of batch scripts.
525 | """
526 | return run_spawner_script(
527 | db,
528 | io_loop,
529 | spawner,
530 | script,
531 | batch_script_re_list=batch_script_re_list,
532 | spawner_kwargs=spawner_kwargs,
533 | )
534 |
535 |
536 | # def test_gridengine(db, io_loop):
537 | # spawner_kwargs = {
538 | # 'req_options': 'some_option_asdf',
539 | # }
540 | # batch_script_re_list = [
541 | # re.compile(r'singleuser_command'),
542 | # re.compile(r'#$\s+some_option_asdf'),
543 | # ]
544 | # script = [
545 | # (re.compile(r'sudo.*qsub'), 'x x '+str(testjob)),
546 | # (re.compile(r'sudo.*qstat'), 'PENDING '),
547 | # (re.compile(r'sudo.*qstat'), 'RUNNING '+testhost),
548 | # (re.compile(r'sudo.*qstat'), 'RUNNING '+testhost),
549 | # (re.compile(r'sudo.*qdel'), 'STOP'),
550 | # (re.compile(r'sudo.*qstat'), ''),
551 | # ]
552 | # from .. import GridengineSpawner
553 | # run_spawner_script(db, io_loop, GridengineSpawner, script,
554 | # batch_script_re_list=batch_script_re_list,
555 | # spawner_kwargs=spawner_kwargs)
556 |
557 |
558 | def test_condor(db, io_loop):
559 | spawner_kwargs = {
560 | "req_nprocs": "5",
561 | "req_memory": "5678",
562 | "req_options": "some_option_asdf",
563 | }
564 | batch_script_re_list = [
565 | re.compile(r"exec batchspawner-singleuser singleuser_command"),
566 | re.compile(r"RequestCpus = 5"),
567 | re.compile(r"RequestMemory = 5678"),
568 | re.compile(r"^some_option_asdf", re.M),
569 | ]
570 | script = [
571 | (
572 | re.compile(r"sudo.*condor_submit"),
573 | "submitted to cluster {}".format(str(testjob)),
574 | ),
575 | (re.compile(r"sudo.*condor_q"), "1,"), # pending
576 | (re.compile(r"sudo.*condor_q"), "2, @{}".format(testhost)), # runing
577 | (re.compile(r"sudo.*condor_q"), "2, @{}".format(testhost)),
578 | (re.compile(r"sudo.*condor_rm"), "STOP"),
579 | (re.compile(r"sudo.*condor_q"), ""),
580 | ]
581 | from .. import CondorSpawner
582 |
583 | run_spawner_script(
584 | db,
585 | io_loop,
586 | CondorSpawner,
587 | script,
588 | batch_script_re_list=batch_script_re_list,
589 | spawner_kwargs=spawner_kwargs,
590 | )
591 |
592 |
593 | def test_lfs(db, io_loop):
594 | spawner_kwargs = {
595 | "req_nprocs": "5",
596 | "req_memory": "5678",
597 | "req_options": "some_option_asdf",
598 | "req_queue": "some_queue",
599 | "req_prologue": "PROLOGUE",
600 | "req_epilogue": "EPILOGUE",
601 | }
602 | batch_script_re_list = [
603 | re.compile(
604 | r"^PROLOGUE.*^batchspawner-singleuser singleuser_command.*^EPILOGUE",
605 | re.S | re.M,
606 | ),
607 | re.compile(r"#BSUB\s+-q\s+some_queue", re.M),
608 | ]
609 | script = [
610 | (
611 | re.compile(r"sudo.*bsub"),
612 | "Job <{}> is submitted to default queue ".format(str(testjob)),
613 | ),
614 | (re.compile(r"sudo.*bjobs"), "PEND "), # pending
615 | (re.compile(r"sudo.*bjobs"), "RUN {}".format(testhost)), # running
616 | (re.compile(r"sudo.*bjobs"), "RUN {}".format(testhost)),
617 | (re.compile(r"sudo.*bkill"), "STOP"),
618 | (re.compile(r"sudo.*bjobs"), ""),
619 | ]
620 | from .. import LsfSpawner
621 |
622 | run_spawner_script(
623 | db,
624 | io_loop,
625 | LsfSpawner,
626 | script,
627 | batch_script_re_list=batch_script_re_list,
628 | spawner_kwargs=spawner_kwargs,
629 | )
630 |
631 |
632 | def test_keepvars(db, io_loop):
633 | # req_keepvars
634 | spawner_kwargs = {
635 | "req_keepvars": "ABCDE",
636 | }
637 | batch_script_re_list = [
638 | re.compile(r"--export=ABCDE", re.X | re.M),
639 | ]
640 | run_typical_slurm_spawner(
641 | db,
642 | io_loop,
643 | spawner_kwargs=spawner_kwargs,
644 | batch_script_re_list=batch_script_re_list,
645 | )
646 |
647 | # req_keepvars AND req_keepvars together
648 | spawner_kwargs = {
649 | "req_keepvars": "ABCDE",
650 | "req_keepvars_extra": "XYZ",
651 | }
652 | batch_script_re_list = [
653 | re.compile(r"--export=ABCDE,XYZ", re.X | re.M),
654 | ]
655 | run_typical_slurm_spawner(
656 | db,
657 | io_loop,
658 | spawner_kwargs=spawner_kwargs,
659 | batch_script_re_list=batch_script_re_list,
660 | )
661 |
--------------------------------------------------------------------------------
/batchspawner/batchspawner.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Regents of the University of Minnesota
2 | # Copyright (c) Michael Gilbert
3 | # Distributed under the terms of the Modified BSD License.
4 |
5 | """Batch spawners
6 |
7 | This file contains an abstraction layer for batch job queueing systems, and implements
8 | Jupyterhub spawners for Torque, SLURM, and eventually others.
9 |
10 | Common attributes of batch submission / resource manager environments will include notions of:
11 | * queue names, resource manager addresses
12 | * resource limits including runtime, number of processes, memory
13 | * singleuser child process running on (usually remote) host not known until runtime
14 | * job submission and monitoring via resource manager utilities
15 | * remote execution via submission of templated scripts
16 | * job names instead of PIDs
17 | """
18 | import asyncio
19 | from async_generator import async_generator, yield_
20 | import pwd
21 | import os
22 | import re
23 |
24 | import xml.etree.ElementTree as ET
25 |
26 | from enum import Enum
27 |
28 | from jinja2 import Template
29 |
30 | from tornado import gen
31 |
32 | from jupyterhub.spawner import Spawner
33 | from traitlets import Integer, Unicode, Float, Dict, default
34 |
35 | from jupyterhub.spawner import set_user_setuid
36 |
37 |
38 | def format_template(template, *args, **kwargs):
39 | """Format a template, either using jinja2 or str.format().
40 |
41 | Use jinja2 if the template is a jinja2.Template, or contains '{{' or
42 | '{%'. Otherwise, use str.format() for backwards compatability with
43 | old scripts (but you can't mix them).
44 | """
45 | if isinstance(template, Template):
46 | return template.render(*args, **kwargs)
47 | elif "{{" in template or "{%" in template:
48 | return Template(template).render(*args, **kwargs)
49 | return template.format(*args, **kwargs)
50 |
51 |
52 | class JobStatus(Enum):
53 | NOTFOUND = 0
54 | RUNNING = 1
55 | PENDING = 2
56 | UNKNOWN = 3
57 |
58 |
59 | class BatchSpawnerBase(Spawner):
60 | """Base class for spawners using resource manager batch job submission mechanisms
61 |
62 | This base class is developed targetting the TorqueSpawner and SlurmSpawner, so by default
63 | assumes a qsub-like command that reads a script from its stdin for starting jobs,
64 | a qstat-like command that outputs some data that can be parsed to check if the job is running
65 | and on what remote node, and a qdel-like command to cancel a job. The goal is to be
66 | sufficiently general that a broad range of systems can be supported with minimal overrides.
67 |
68 | At minimum, subclasses should provide reasonable defaults for the traits:
69 | batch_script
70 | batch_submit_cmd
71 | batch_query_cmd
72 | batch_cancel_cmd
73 |
74 | and must provide implementations for the methods:
75 | state_ispending
76 | state_isrunning
77 | state_gethost
78 | """
79 |
80 | # override default since batch systems typically need longer
81 | start_timeout = Integer(300).tag(config=True)
82 |
83 | # override default server ip since batch jobs normally running remotely
84 | ip = Unicode(
85 | "0.0.0.0",
86 | help="Address for singleuser server to listen at",
87 | ).tag(config=True)
88 |
89 | exec_prefix = Unicode(
90 | "sudo -E -u {username}",
91 | help="Standard executon prefix (e.g. the default sudo -E -u {username})",
92 | ).tag(config=True)
93 |
94 | # all these req_foo traits will be available as substvars for templated strings
95 | req_queue = Unicode(
96 | "",
97 | help="Queue name to submit job to resource manager",
98 | ).tag(config=True)
99 |
100 | req_host = Unicode(
101 | "",
102 | help="Host name of batch server to submit job to resource manager",
103 | ).tag(config=True)
104 |
105 | req_memory = Unicode(
106 | "",
107 | help="Memory to request from resource manager",
108 | ).tag(config=True)
109 |
110 | req_nprocs = Unicode(
111 | "",
112 | help="Number of processors to request from resource manager",
113 | ).tag(config=True)
114 |
115 | req_ngpus = Unicode(
116 | "",
117 | help="Number of GPUs to request from resource manager",
118 | ).tag(config=True)
119 |
120 | req_runtime = Unicode(
121 | "",
122 | help="Length of time for submitted job to run",
123 | ).tag(config=True)
124 |
125 | req_partition = Unicode(
126 | "",
127 | help="Partition name to submit job to resource manager",
128 | ).tag(config=True)
129 |
130 | req_account = Unicode(
131 | "",
132 | help="Account name string to pass to the resource manager",
133 | ).tag(config=True)
134 |
135 | req_options = Unicode(
136 | "",
137 | help="Other options to include into job submission script",
138 | ).tag(config=True)
139 |
140 | req_prologue = Unicode(
141 | "",
142 | help="Script to run before single user server starts.",
143 | ).tag(config=True)
144 |
145 | req_epilogue = Unicode(
146 | "",
147 | help="Script to run after single user server ends.",
148 | ).tag(config=True)
149 |
150 | req_username = Unicode()
151 |
152 | @default("req_username")
153 | def _req_username_default(self):
154 | return self.user.name
155 |
156 | # Useful IF getpwnam on submit host returns correct info for exec host
157 | req_homedir = Unicode()
158 |
159 | @default("req_homedir")
160 | def _req_homedir_default(self):
161 | return pwd.getpwnam(self.user.name).pw_dir
162 |
163 | req_keepvars = Unicode()
164 |
165 | @default("req_keepvars")
166 | def _req_keepvars_default(self):
167 | return ",".join(self.get_env().keys())
168 |
169 | req_keepvars_extra = Unicode(
170 | help="Extra environment variables which should be configured, "
171 | "added to the defaults in keepvars, "
172 | "comma separated list.",
173 | )
174 |
175 | batch_script = Unicode(
176 | "",
177 | help="Template for job submission script. Traits on this class named like req_xyz "
178 | "will be substituted in the template for {xyz} using string.Formatter. "
179 | "Must include {cmd} which will be replaced with the jupyterhub-singleuser command line.",
180 | ).tag(config=True)
181 |
182 | batchspawner_singleuser_cmd = Unicode(
183 | "batchspawner-singleuser",
184 | help="A wrapper which is capable of special batchspawner setup: currently sets the port on "
185 | "the remote host. Not needed to be set under normal circumstances, unless path needs "
186 | "specification.",
187 | ).tag(config=True)
188 |
189 | # Raw output of job submission command unless overridden
190 | job_id = Unicode()
191 |
192 | # Will get the raw output of the job status command unless overridden
193 | job_status = Unicode()
194 |
195 | # Prepare substitution variables for templates using req_xyz traits
196 | def get_req_subvars(self):
197 | reqlist = [t for t in self.trait_names() if t.startswith("req_")]
198 | subvars = {}
199 | for t in reqlist:
200 | subvars[t[4:]] = getattr(self, t)
201 | if subvars.get("keepvars_extra"):
202 | subvars["keepvars"] += "," + subvars["keepvars_extra"]
203 | return subvars
204 |
205 | batch_submit_cmd = Unicode(
206 | "",
207 | help="Command to run to submit batch scripts. Formatted using req_xyz traits as {xyz}.",
208 | ).tag(config=True)
209 |
210 | def parse_job_id(self, output):
211 | "Parse output of submit command to get job id."
212 | return output
213 |
214 | def cmd_formatted_for_batch(self):
215 | """The command which is substituted inside of the batch script"""
216 | return " ".join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args())
217 |
218 | async def run_command(self, cmd, input=None, env=None):
219 | proc = await asyncio.create_subprocess_shell(
220 | cmd,
221 | env=env,
222 | stdin=asyncio.subprocess.PIPE,
223 | stdout=asyncio.subprocess.PIPE,
224 | stderr=asyncio.subprocess.PIPE,
225 | )
226 | inbytes = None
227 |
228 | if input:
229 | inbytes = input.encode()
230 |
231 | try:
232 | out, eout = await proc.communicate(input=inbytes)
233 | except:
234 | self.log.debug("Exception raised when trying to run command: %s" % cmd)
235 | proc.kill()
236 | self.log.debug("Running command failed, killed process.")
237 | try:
238 | out, eout = await asyncio.wait_for(proc.communicate(), timeout=2)
239 | out = out.decode().strip()
240 | eout = eout.decode().strip()
241 | self.log.error("Subprocess returned exitcode %s" % proc.returncode)
242 | self.log.error("Stdout:")
243 | self.log.error(out)
244 | self.log.error("Stderr:")
245 | self.log.error(eout)
246 | raise RuntimeError(
247 | "{} exit status {}: {}".format(cmd, proc.returncode, eout)
248 | )
249 | except asyncio.TimeoutError:
250 | self.log.error(
251 | "Encountered timeout trying to clean up command, process probably killed already: %s"
252 | % cmd
253 | )
254 | return ""
255 | except:
256 | self.log.error(
257 | "Encountered exception trying to clean up command: %s" % cmd
258 | )
259 | raise
260 | else:
261 | eout = eout.decode().strip()
262 | err = proc.returncode
263 | if err != 0:
264 | self.log.error("Subprocess returned exitcode %s" % err)
265 | self.log.error(eout)
266 | raise RuntimeError(eout)
267 |
268 | out = out.decode().strip()
269 | return out
270 |
271 | async def _get_batch_script(self, **subvars):
272 | """Format batch script from vars"""
273 | # Could be overridden by subclasses, but mainly useful for testing
274 | return format_template(self.batch_script, **subvars)
275 |
276 | async def submit_batch_script(self):
277 | subvars = self.get_req_subvars()
278 | # `cmd` is submitted to the batch system
279 | cmd = " ".join(
280 | (
281 | format_template(self.exec_prefix, **subvars),
282 | format_template(self.batch_submit_cmd, **subvars),
283 | )
284 | )
285 | # `subvars['cmd']` is what is run _inside_ the batch script,
286 | # put into the template.
287 | subvars["cmd"] = self.cmd_formatted_for_batch()
288 | if hasattr(self, "user_options"):
289 | subvars.update(self.user_options)
290 | script = await self._get_batch_script(**subvars)
291 | self.log.info("Spawner submitting job using " + cmd)
292 | self.log.info("Spawner submitted script:\n" + script)
293 | out = await self.run_command(cmd, input=script, env=self.get_env())
294 | try:
295 | self.log.info("Job submitted. cmd: " + cmd + " output: " + out)
296 | self.job_id = self.parse_job_id(out)
297 | except:
298 | self.log.error("Job submission failed with exit code " + out)
299 | self.job_id = ""
300 | return self.job_id
301 |
302 | # Override if your batch system needs something more elaborate to query the job status
303 | batch_query_cmd = Unicode(
304 | "",
305 | help="Command to run to query job status. Formatted using req_xyz traits as {xyz} "
306 | "and self.job_id as {job_id}.",
307 | ).tag(config=True)
308 |
309 | async def query_job_status(self):
310 | """Check job status, return JobStatus object."""
311 | if self.job_id is None or len(self.job_id) == 0:
312 | self.job_status = ""
313 | return JobStatus.NOTFOUND
314 | subvars = self.get_req_subvars()
315 | subvars["job_id"] = self.job_id
316 | cmd = " ".join(
317 | (
318 | format_template(self.exec_prefix, **subvars),
319 | format_template(self.batch_query_cmd, **subvars),
320 | )
321 | )
322 | self.log.debug("Spawner querying job: " + cmd)
323 | try:
324 | self.job_status = await self.run_command(cmd)
325 | except RuntimeError as e:
326 | # e.args[0] is stderr from the process
327 | self.job_status = e.args[0]
328 | except Exception as e:
329 | self.log.error("Error querying job " + self.job_id)
330 | self.job_status = ""
331 |
332 | if self.state_isrunning():
333 | return JobStatus.RUNNING
334 | elif self.state_ispending():
335 | return JobStatus.PENDING
336 | elif self.state_isunknown():
337 | return JobStatus.UNKNOWN
338 | else:
339 | return JobStatus.NOTFOUND
340 |
341 | batch_cancel_cmd = Unicode(
342 | "",
343 | help="Command to stop/cancel a previously submitted job. Formatted like batch_query_cmd.",
344 | ).tag(config=True)
345 |
346 | async def cancel_batch_job(self):
347 | subvars = self.get_req_subvars()
348 | subvars["job_id"] = self.job_id
349 | cmd = " ".join(
350 | (
351 | format_template(self.exec_prefix, **subvars),
352 | format_template(self.batch_cancel_cmd, **subvars),
353 | )
354 | )
355 | self.log.info("Cancelling job " + self.job_id + ": " + cmd)
356 | await self.run_command(cmd)
357 |
358 | def load_state(self, state):
359 | """load job_id from state"""
360 | super(BatchSpawnerBase, self).load_state(state)
361 | self.job_id = state.get("job_id", "")
362 | self.job_status = state.get("job_status", "")
363 |
364 | def get_state(self):
365 | """add job_id to state"""
366 | state = super(BatchSpawnerBase, self).get_state()
367 | if self.job_id:
368 | state["job_id"] = self.job_id
369 | if self.job_status:
370 | state["job_status"] = self.job_status
371 | return state
372 |
373 | def clear_state(self):
374 | """clear job_id state"""
375 | super(BatchSpawnerBase, self).clear_state()
376 | self.job_id = ""
377 | self.job_status = ""
378 |
379 | def make_preexec_fn(self, name):
380 | """make preexec fn to change uid (if running as root) before job submission"""
381 | return set_user_setuid(name)
382 |
383 | def state_ispending(self):
384 | "Return boolean indicating if job is still waiting to run, likely by parsing self.job_status"
385 | raise NotImplementedError("Subclass must provide implementation")
386 |
387 | def state_isrunning(self):
388 | "Return boolean indicating if job is running, likely by parsing self.job_status"
389 | raise NotImplementedError("Subclass must provide implementation")
390 |
391 | def state_isunknown(self):
392 | "Return boolean indicating if job state retrieval failed because of the resource manager"
393 | return None
394 |
395 | def state_gethost(self):
396 | "Return string, hostname or addr of running job, likely by parsing self.job_status"
397 | raise NotImplementedError("Subclass must provide implementation")
398 |
399 | async def poll(self):
400 | """Poll the process"""
401 | status = await self.query_job_status()
402 | if status in (JobStatus.PENDING, JobStatus.RUNNING, JobStatus.UNKNOWN):
403 | return None
404 | else:
405 | self.clear_state()
406 | return 1
407 |
408 | startup_poll_interval = Float(
409 | 0.5,
410 | help="Polling interval (seconds) to check job state during startup",
411 | ).tag(config=True)
412 |
413 | async def start(self):
414 | """Start the process"""
415 | self.ip = self.traits()["ip"].default_value
416 | self.port = self.traits()["port"].default_value
417 |
418 | if self.server:
419 | self.server.port = self.port
420 |
421 | job = await self.submit_batch_script()
422 |
423 | # We are called with a timeout, and if the timeout expires this function will
424 | # be interrupted at the next yield, and self.stop() will be called.
425 | # So this function should not return unless successful, and if unsuccessful
426 | # should either raise and Exception or loop forever.
427 | if len(self.job_id) == 0:
428 | raise RuntimeError(
429 | "Jupyter batch job submission failure (no jobid in output)"
430 | )
431 | while True:
432 | status = await self.query_job_status()
433 | if status == JobStatus.RUNNING:
434 | break
435 | elif status == JobStatus.PENDING:
436 | self.log.debug("Job " + self.job_id + " still pending")
437 | elif status == JobStatus.UNKNOWN:
438 | self.log.debug("Job " + self.job_id + " still unknown")
439 | else:
440 | self.log.warning(
441 | "Job "
442 | + self.job_id
443 | + " neither pending nor running.\n"
444 | + self.job_status
445 | )
446 | self.clear_state()
447 | raise RuntimeError(
448 | "The Jupyter batch job has disappeared"
449 | " while pending in the queue or died immediately"
450 | " after starting."
451 | )
452 | await gen.sleep(self.startup_poll_interval)
453 |
454 | self.ip = self.state_gethost()
455 | while self.port == 0:
456 | await gen.sleep(self.startup_poll_interval)
457 | # Test framework: For testing, mock_port is set because we
458 | # don't actually run the single-user server yet.
459 | if hasattr(self, "mock_port"):
460 | self.port = self.mock_port
461 |
462 | self.db.commit()
463 | self.log.info(
464 | "Notebook server job {0} started at {1}:{2}".format(
465 | self.job_id, self.ip, self.port
466 | )
467 | )
468 |
469 | return self.ip, self.port
470 |
471 | async def stop(self, now=False):
472 | """Stop the singleuser server job.
473 |
474 | Returns immediately after sending job cancellation command if now=True, otherwise
475 | tries to confirm that job is no longer running."""
476 |
477 | self.log.info("Stopping server job " + self.job_id)
478 | await self.cancel_batch_job()
479 | if now:
480 | return
481 | for i in range(10):
482 | status = await self.query_job_status()
483 | if status not in (JobStatus.RUNNING, JobStatus.UNKNOWN):
484 | return
485 | await gen.sleep(1.0)
486 | if self.job_id:
487 | self.log.warning(
488 | "Notebook server job {0} at {1}:{2} possibly failed to terminate".format(
489 | self.job_id, self.ip, self.port
490 | )
491 | )
492 |
493 | @async_generator
494 | async def progress(self):
495 | while True:
496 | if self.state_ispending():
497 | await yield_(
498 | {
499 | "message": "Pending in queue...",
500 | }
501 | )
502 | elif self.state_isrunning():
503 | await yield_(
504 | {
505 | "message": "Cluster job running... waiting to connect",
506 | }
507 | )
508 | return
509 | else:
510 | await yield_(
511 | {
512 | "message": "Unknown status...",
513 | }
514 | )
515 | await gen.sleep(1)
516 |
517 |
518 | class BatchSpawnerRegexStates(BatchSpawnerBase):
519 | """Subclass of BatchSpawnerBase that uses config-supplied regular expressions
520 | to interact with batch submission system state. Provides implementations of
521 | state_ispending
522 | state_isrunning
523 | state_gethost
524 |
525 | In their place, the user should supply the following configuration:
526 | state_pending_re - regex that matches job_status if job is waiting to run
527 | state_running_re - regex that matches job_status if job is running
528 | state_exechost_re - regex with at least one capture group that extracts
529 | execution host from job_status
530 | state_exechost_exp - if empty, notebook IP will be set to the contents of the
531 | first capture group. If this variable is set, the match object
532 | will be expanded using this string to obtain the notebook IP.
533 | See Python docs: re.match.expand
534 | """
535 |
536 | state_pending_re = Unicode(
537 | "",
538 | help="Regex that matches job_status if job is waiting to run",
539 | ).tag(config=True)
540 | state_running_re = Unicode(
541 | "",
542 | help="Regex that matches job_status if job is running",
543 | ).tag(config=True)
544 | state_exechost_re = Unicode(
545 | "",
546 | help="Regex with at least one capture group that extracts "
547 | "the execution host from job_status output",
548 | ).tag(config=True)
549 | state_exechost_exp = Unicode(
550 | "",
551 | help="""If empty, notebook IP will be set to the contents of the first capture group.
552 |
553 | If this variable is set, the match object will be expanded using this string
554 | to obtain the notebook IP.
555 | See Python docs: re.match.expand""",
556 | ).tag(config=True)
557 | state_unknown_re = Unicode(
558 | "",
559 | help="Regex that matches job_status if the resource manager is not answering."
560 | "Blank indicates not used.",
561 | ).tag(config=True)
562 |
563 | def state_ispending(self):
564 | assert self.state_pending_re, "Misconfigured: define state_running_re"
565 | return self.job_status and re.search(self.state_pending_re, self.job_status)
566 |
567 | def state_isrunning(self):
568 | assert self.state_running_re, "Misconfigured: define state_running_re"
569 | return self.job_status and re.search(self.state_running_re, self.job_status)
570 |
571 | def state_isunknown(self):
572 | # Blank means "not set" and this function always returns None.
573 | if self.state_unknown_re:
574 | return self.job_status and re.search(self.state_unknown_re, self.job_status)
575 |
576 | def state_gethost(self):
577 | assert self.state_exechost_re, "Misconfigured: define state_exechost_re"
578 | match = re.search(self.state_exechost_re, self.job_status)
579 | if not match:
580 | self.log.error(
581 | "Spawner unable to match host addr in job status: " + self.job_status
582 | )
583 | return
584 | if not self.state_exechost_exp:
585 | return match.groups()[0]
586 | else:
587 | return match.expand(self.state_exechost_exp)
588 |
589 |
590 | class TorqueSpawner(BatchSpawnerRegexStates):
591 | batch_script = Unicode(
592 | """#!/bin/sh
593 | #PBS -q {queue}@{host}
594 | #PBS -l walltime={runtime}
595 | #PBS -l nodes=1:ppn={nprocs}
596 | #PBS -l mem={memory}
597 | #PBS -N jupyterhub-singleuser
598 | #PBS -v {keepvars}
599 | #PBS {options}
600 |
601 | set -eu
602 |
603 | {prologue}
604 | {cmd}
605 | {epilogue}
606 | """
607 | ).tag(config=True)
608 |
609 | # outputs job id string
610 | batch_submit_cmd = Unicode("qsub").tag(config=True)
611 | # outputs job data XML string
612 | batch_query_cmd = Unicode("qstat -x {job_id}").tag(config=True)
613 | batch_cancel_cmd = Unicode("qdel {job_id}").tag(config=True)
614 | # search XML string for job_state - [QH] = pending, R = running, [CE] = done
615 | state_pending_re = Unicode(r"[QH]").tag(config=True)
616 | state_running_re = Unicode(r"R").tag(config=True)
617 | state_exechost_re = Unicode(r"((?:[\w_-]+\.?)+)/\d+").tag(config=True)
618 |
619 |
620 | class MoabSpawner(TorqueSpawner):
621 | # outputs job id string
622 | batch_submit_cmd = Unicode("msub").tag(config=True)
623 | # outputs job data XML string
624 | batch_query_cmd = Unicode("mdiag -j {job_id} --xml").tag(config=True)
625 | batch_cancel_cmd = Unicode("mjobctl -c {job_id}").tag(config=True)
626 | state_pending_re = Unicode(r'State="Idle"').tag(config=True)
627 | state_running_re = Unicode(r'State="Running"').tag(config=True)
628 | state_exechost_re = Unicode(r'AllocNodeList="([^\r\n\t\f :"]*)').tag(config=True)
629 |
630 |
631 | class PBSSpawner(TorqueSpawner):
632 | batch_script = Unicode(
633 | """#!/bin/sh
634 | {% if queue or host %}#PBS -q {% if queue %}{{queue}}{% endif %}\
635 | {% if host %}@{{host}}{% endif %}{% endif %}
636 | #PBS -l walltime={{runtime}}
637 | #PBS -l select=1:ncpus={{nprocs}}:mem={{memory}}
638 | #PBS -N jupyterhub-singleuser
639 | #PBS -o {{homedir}}/.jupyterhub.pbs.out
640 | #PBS -e {{homedir}}/.jupyterhub.pbs.err
641 | #PBS -v {{keepvars}}
642 | {% if options %}#PBS {{options}}{% endif %}
643 |
644 | set -eu
645 |
646 | {{prologue}}
647 | {{cmd}}
648 | {{epilogue}}
649 | """
650 | ).tag(config=True)
651 |
652 | # outputs job data XML string
653 | batch_query_cmd = Unicode("qstat -fx {job_id}").tag(config=True)
654 |
655 | state_pending_re = Unicode(r"job_state = [QH]").tag(config=True)
656 | state_running_re = Unicode(r"job_state = R").tag(config=True)
657 | #state_exechost_re = Unicode(r"exec_host = ([\w_-]+)/").tag(config=True)
658 | state_exechost_re = Unicode(r"exec_host = ([\w_-]+)").tag(config=True)
659 |
660 |
661 | class UserEnvMixin:
662 | """Mixin class that computes values for USER, SHELL and HOME in the environment passed to
663 | the job submission subprocess in case the batch system needs these for the batch script.
664 | """
665 |
666 | def user_env(self, env):
667 | """get user environment"""
668 | env["USER"] = self.user.name
669 | home = pwd.getpwnam(self.user.name).pw_dir
670 | shell = pwd.getpwnam(self.user.name).pw_shell
671 | if home:
672 | env["HOME"] = home
673 | if shell:
674 | env["SHELL"] = shell
675 | return env
676 |
677 | def get_env(self):
678 | """Get user environment variables to be passed to the user's job
679 |
680 | Everything here should be passed to the user's job as
681 | environment. Caution: If these variables are used for
682 | authentication to the batch system commands as an admin, be
683 | aware that the user will receive access to these as well.
684 | """
685 | env = super().get_env()
686 | env = self.user_env(env)
687 | return env
688 |
689 |
690 | class SlurmSpawner(UserEnvMixin, BatchSpawnerRegexStates):
691 | batch_script = Unicode(
692 | """#!/bin/bash
693 | #SBATCH --output={{homedir}}/jupyterhub_slurmspawner_%j.log
694 | #SBATCH --job-name=spawner-jupyterhub
695 | #SBATCH --chdir={{homedir}}
696 | #SBATCH --export={{keepvars}}
697 | #SBATCH --get-user-env=L
698 | {% if partition %}#SBATCH --partition={{partition}}
699 | {% endif %}{% if runtime %}#SBATCH --time={{runtime}}
700 | {% endif %}{% if memory %}#SBATCH --mem={{memory}}
701 | {% endif %}{% if gres %}#SBATCH --gres={{gres}}
702 | {% endif %}{% if nprocs %}#SBATCH --cpus-per-task={{nprocs}}
703 | {% endif %}{% if reservation%}#SBATCH --reservation={{reservation}}
704 | {% endif %}{% if options %}#SBATCH {{options}}{% endif %}
705 |
706 | set -euo pipefail
707 |
708 | trap 'echo SIGTERM received' TERM
709 | {{prologue}}
710 | which jupyterhub-singleuser
711 | {% if srun %}{{srun}} {% endif %}{{cmd}}
712 | echo "jupyterhub-singleuser ended gracefully"
713 | {{epilogue}}
714 | """
715 | ).tag(config=True)
716 |
717 | # all these req_foo traits will be available as substvars for templated strings
718 | req_cluster = Unicode(
719 | "",
720 | help="Cluster name to submit job to resource manager",
721 | ).tag(config=True)
722 |
723 | req_qos = Unicode(
724 | "",
725 | help="QoS name to submit job to resource manager",
726 | ).tag(config=True)
727 |
728 | req_srun = Unicode(
729 | "srun",
730 | help="Set req_srun='' to disable running in job step, and note that "
731 | "this affects environment handling. This is effectively a "
732 | "prefix for the singleuser command.",
733 | ).tag(config=True)
734 |
735 | req_reservation = Unicode(
736 | "",
737 | help="Reservation name to submit to resource manager",
738 | ).tag(config=True)
739 |
740 | req_gres = Unicode(
741 | "",
742 | help="Additional resources (e.g. GPUs) requested",
743 | ).tag(config=True)
744 |
745 | # outputs line like "Submitted batch job 209"
746 | batch_submit_cmd = Unicode("sbatch --parsable").tag(config=True)
747 | # outputs status and exec node like "RUNNING hostname"
748 | batch_query_cmd = Unicode("squeue -h -j {job_id} -o '%T %B'").tag(config=True)
749 | batch_cancel_cmd = Unicode("scancel {job_id}").tag(config=True)
750 | # use long-form states: PENDING, CONFIGURING = pending
751 | # RUNNING, COMPLETING = running
752 | state_pending_re = Unicode(r"^(?:PENDING|CONFIGURING)").tag(config=True)
753 | state_running_re = Unicode(r"^(?:RUNNING|COMPLETING)").tag(config=True)
754 | state_unknown_re = Unicode(
755 | r"^slurm_load_jobs error: (?:Socket timed out on send/recv|Unable to contact slurm controller)"
756 | ).tag(config=True)
757 | state_exechost_re = Unicode(r"\s+((?:[\w_-]+\.?)+)$").tag(config=True)
758 |
759 | def parse_job_id(self, output):
760 | # make sure jobid is really a number
761 | try:
762 | # use only last line to circumvent slurm bug
763 | output = output.splitlines()[-1]
764 | id = output.split(";")[0]
765 | int(id)
766 | except Exception as e:
767 | self.log.error("SlurmSpawner unable to parse job ID from text: " + output)
768 | raise e
769 | return id
770 |
771 |
772 | class MultiSlurmSpawner(SlurmSpawner):
773 | """When slurm has been compiled with --enable-multiple-slurmd, the
774 | administrator sets the name of the slurmd instance via the slurmd -N
775 | option. This node name is usually different from the hostname and may
776 | not be resolvable by JupyterHub. Here we enable the administrator to
777 | map the node names onto the real hostnames via a traitlet."""
778 |
779 | daemon_resolver = Dict(
780 | {},
781 | help="Map node names to hostnames",
782 | ).tag(config=True)
783 |
784 | def state_gethost(self):
785 | host = SlurmSpawner.state_gethost(self)
786 | return self.daemon_resolver.get(host, host)
787 |
788 |
789 | class GridengineSpawner(BatchSpawnerBase):
790 | batch_script = Unicode(
791 | """#!/bin/bash
792 | #$ -j yes
793 | #$ -N spawner-jupyterhub
794 | #$ -o {homedir}/.jupyterhub.sge.out
795 | #$ -e {homedir}/.jupyterhub.sge.err
796 | #$ -v {keepvars}
797 | #$ {options}
798 |
799 | set -euo pipefail
800 |
801 | {prologue}
802 | {cmd}
803 | {epilogue}
804 | """
805 | ).tag(config=True)
806 |
807 | # outputs job id string
808 | batch_submit_cmd = Unicode("qsub").tag(config=True)
809 | # outputs job data XML string
810 | batch_query_cmd = Unicode("qstat -xml").tag(config=True)
811 | batch_cancel_cmd = Unicode("qdel {job_id}").tag(config=True)
812 |
813 | def parse_job_id(self, output):
814 | return output.split(" ")[2]
815 |
816 | def state_ispending(self):
817 | if self.job_status:
818 | job_info = ET.fromstring(self.job_status).find(
819 | ".//job_list[JB_job_number='{0}']".format(self.job_id)
820 | )
821 | if job_info is not None:
822 | return job_info.attrib.get("state") == "pending"
823 | return False
824 |
825 | def state_isrunning(self):
826 | if self.job_status:
827 | job_info = ET.fromstring(self.job_status).find(
828 | ".//job_list[JB_job_number='{0}']".format(self.job_id)
829 | )
830 | if job_info is not None:
831 | return job_info.attrib.get("state") == "running"
832 | return False
833 |
834 | def state_gethost(self):
835 | if self.job_status:
836 | queue_name = ET.fromstring(self.job_status).find(
837 | ".//job_list[JB_job_number='{0}']/queue_name".format(self.job_id)
838 | )
839 | if queue_name is not None and queue_name.text:
840 | return queue_name.text.split("@")[1]
841 |
842 | self.log.error(
843 | "Spawner unable to match host addr in job {0} with status {1}".format(
844 | self.job_id, self.job_status
845 | )
846 | )
847 | return
848 |
849 | def get_env(self):
850 | env = super().get_env()
851 |
852 | # SGE relies on environment variables to launch local jobs. Ensure that these values are included
853 | # in the environment used to run the spawner.
854 | for key in [
855 | "SGE_CELL",
856 | "SGE_EXECD",
857 | "SGE_ROOT",
858 | "SGE_CLUSTER_NAME",
859 | "SGE_QMASTER_PORT",
860 | "SGE_EXECD_PORT",
861 | "PATH",
862 | ]:
863 | if key in os.environ and key not in env:
864 | env[key] = os.environ[key]
865 | return env
866 |
867 |
868 | class CondorSpawner(UserEnvMixin, BatchSpawnerRegexStates):
869 | batch_script = Unicode(
870 | """
871 | Executable = /bin/sh
872 | RequestMemory = {memory}
873 | RequestCpus = {nprocs}
874 | Arguments = \"-c 'exec {cmd}'\"
875 | Remote_Initialdir = {homedir}
876 | Output = {homedir}/.jupyterhub.condor.out
877 | Error = {homedir}/.jupyterhub.condor.err
878 | ShouldTransferFiles = False
879 | GetEnv = True
880 | {options}
881 | Queue
882 | """
883 | ).tag(config=True)
884 |
885 | # outputs job id string
886 | batch_submit_cmd = Unicode("condor_submit").tag(config=True)
887 | # outputs job data XML string
888 | batch_query_cmd = Unicode(
889 | 'condor_q {job_id} -format "%s, " JobStatus -format "%s" RemoteHost -format "\n" True'
890 | ).tag(config=True)
891 | batch_cancel_cmd = Unicode("condor_rm {job_id}").tag(config=True)
892 | # job status: 1 = pending, 2 = running
893 | state_pending_re = Unicode(r"^1,").tag(config=True)
894 | state_running_re = Unicode(r"^2,").tag(config=True)
895 | state_exechost_re = Unicode(r"^\w*, .*@([^ ]*)").tag(config=True)
896 |
897 | def parse_job_id(self, output):
898 | match = re.search(r".*submitted to cluster ([0-9]+)", output)
899 | if match:
900 | return match.groups()[0]
901 |
902 | error_msg = "CondorSpawner unable to parse jobID from text: " + output
903 | self.log.error(error_msg)
904 | raise Exception(error_msg)
905 |
906 | def cmd_formatted_for_batch(self):
907 | return (
908 | super(CondorSpawner, self)
909 | .cmd_formatted_for_batch()
910 | .replace('"', '""')
911 | .replace("'", "''")
912 | )
913 |
914 |
915 | class LsfSpawner(BatchSpawnerBase):
916 | """A Spawner that uses IBM's Platform Load Sharing Facility (LSF) to launch notebooks."""
917 |
918 | batch_script = Unicode(
919 | """#!/bin/sh
920 | #BSUB -R "select[type==any]" # Allow spawning on non-uniform hardware
921 | #BSUB -R "span[hosts=1]" # Only spawn job on one server
922 | #BSUB -q {queue}
923 | #BSUB -J spawner-jupyterhub
924 | #BSUB -o {homedir}/.jupyterhub.lsf.out
925 | #BSUB -e {homedir}/.jupyterhub.lsf.err
926 |
927 | set -eu
928 |
929 | {prologue}
930 | {cmd}
931 | {epilogue}
932 | """
933 | ).tag(config=True)
934 |
935 | batch_submit_cmd = Unicode("bsub").tag(config=True)
936 | batch_query_cmd = Unicode('bjobs -a -noheader -o "STAT EXEC_HOST" {job_id}').tag(
937 | config=True
938 | )
939 | batch_cancel_cmd = Unicode("bkill {job_id}").tag(config=True)
940 |
941 | def get_env(self):
942 | env = super().get_env()
943 |
944 | # LSF relies on environment variables to launch local jobs. Ensure that these values are included
945 | # in the environment used to run the spawner.
946 | for key in [
947 | "LSF_ENVDIR",
948 | "LSF_SERVERDIR",
949 | "LSF_FULL_VERSION",
950 | "LSF_LIBDIR",
951 | "LSF_BINDIR",
952 | ]:
953 | if key in os.environ and key not in env:
954 | env[key] = os.environ[key]
955 | return env
956 |
957 | def parse_job_id(self, output):
958 | # Assumes output in the following form:
959 | # "Job <1815> is submitted to default queue ."
960 | return output.split(" ")[1].strip("<>")
961 |
962 | def state_ispending(self):
963 | # Parse results of batch_query_cmd
964 | # Output determined by results of self.batch_query_cmd
965 | if self.job_status:
966 | return self.job_status.split(" ")[0].upper() in {"PEND", "PUSP"}
967 |
968 | def state_isrunning(self):
969 | if self.job_status:
970 | return self.job_status.split(" ")[0].upper() == "RUN"
971 |
972 | def state_gethost(self):
973 | if self.job_status:
974 | return self.job_status.split(" ")[1].strip().split(":")[0]
975 |
976 | self.log.error(
977 | "Spawner unable to match host addr in job {0} with status {1}".format(
978 | self.job_id, self.job_status
979 | )
980 | )
981 | return
982 |
983 |
984 | # vim: set ai expandtab softtabstop=4:
985 |
--------------------------------------------------------------------------------