├── .ci_support
    ├── check.py
    ├── environment-docs.yml
    ├── environment-integration.yml
    ├── environment-mini.yml
    ├── environment-mpich.yml
    ├── environment-notebooks.yml
    ├── environment-old.yml
    ├── environment-openmpi.yml
    ├── environment-uml.yml
    ├── environment-win.yml
    └── release.py
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    └── workflows
    │   ├── dependabot.yml
    │   ├── deploy.yml
    │   └── pipeline.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── README.md
├── binder
    ├── environment.yml
    ├── kernel.json
    └── postBuild
├── codemeta.json
├── docs
    ├── _config.yml
    ├── _templates
    │   ├── custom-class-template.rst
    │   └── custom-module-template.rst
    ├── _toc.yml
    ├── api.rst
    ├── application.md
    ├── images
    │   └── pyiron-logo.png
    ├── installation.md
    ├── paper
    │   ├── paper.bib
    │   ├── paper.md
    │   └── process.png
    └── trouble_shooting.md
├── executorlib
    ├── __init__.py
    ├── _version.py
    ├── api.py
    ├── backend
    │   ├── __init__.py
    │   ├── cache_parallel.py
    │   ├── cache_serial.py
    │   ├── interactive_parallel.py
    │   └── interactive_serial.py
    ├── executor
    │   ├── __init__.py
    │   ├── base.py
    │   ├── flux.py
    │   ├── single.py
    │   └── slurm.py
    ├── standalone
    │   ├── __init__.py
    │   ├── cache.py
    │   ├── command.py
    │   ├── inputcheck.py
    │   ├── interactive
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── backend.py
    │   │   ├── communication.py
    │   │   └── spawner.py
    │   ├── plot.py
    │   ├── queue.py
    │   └── serialize.py
    └── task_scheduler
    │   ├── __init__.py
    │   ├── base.py
    │   ├── file
    │       ├── __init__.py
    │       ├── backend.py
    │       ├── hdf.py
    │       ├── queue_spawner.py
    │       ├── shared.py
    │       ├── subprocess_spawner.py
    │       └── task_scheduler.py
    │   └── interactive
    │       ├── __init__.py
    │       ├── blockallocation.py
    │       ├── dependency.py
    │       ├── fluxspawner.py
    │       ├── onetoone.py
    │       ├── shared.py
    │       └── slurmspawner.py
├── notebooks
    ├── 1-single-node.ipynb
    ├── 2-hpc-cluster.ipynb
    ├── 3-hpc-job.ipynb
    ├── 4-developer.ipynb
    ├── 5-1-gpaw.ipynb
    ├── 5-2-quantum-espresso.ipynb
    └── images
    │   ├── uml_executor.png
    │   └── uml_spawner.png
├── pyproject.toml
└── tests
    ├── __init__.py
    ├── benchmark
        ├── llh.py
        └── test_results.py
    ├── executables
        └── count.py
    ├── integration
        └── Al.pbe-n-kjpaw_psl.1.0.0.UPF
    ├── test_backend_interactive_serial.py
    ├── test_base_executor_queue.py
    ├── test_cache_backend_execute.py
    ├── test_cache_fileexecutor_mpi.py
    ├── test_cache_fileexecutor_serial.py
    ├── test_fluxclusterexecutor.py
    ├── test_fluxjobexecutor.py
    ├── test_fluxjobexecutor_plot.py
    ├── test_fluxpythonspawner.py
    ├── test_integration_pyiron_workflow.py
    ├── test_interactive_dependencies.py
    ├── test_interactive_slurmspawner.py
    ├── test_mpiexecspawner.py
    ├── test_singlenodeexecutor_cache.py
    ├── test_singlenodeexecutor_dependencies.py
    ├── test_singlenodeexecutor_mpi.py
    ├── test_singlenodeexecutor_noblock.py
    ├── test_singlenodeexecutor_plot_dependency.py
    ├── test_singlenodeexecutor_resize.py
    ├── test_singlenodeexecutor_shell_executor.py
    ├── test_singlenodeexecutor_shell_interactive.py
    ├── test_standalone_hdf.py
    ├── test_standalone_inputcheck.py
    ├── test_standalone_interactive_arguments.py
    ├── test_standalone_interactive_backend.py
    └── test_standalone_interactive_communication.py


/.ci_support/check.py:
--------------------------------------------------------------------------------
 1 | import tomlkit
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 |     with open("pyproject.toml", "r") as f:
 6 |         data = tomlkit.load(f)
 7 | 
 8 |     lst = list(data['build-system']['requires'])
 9 |     for sub_lst in data["project"]["optional-dependencies"].values():
10 |         for el in sub_lst:
11 |             lst.append(el)
12 | 
13 |     data["project"]["dependencies"] += list(set(lst))
14 | 
15 |     with open("pyproject.toml", "w") as f:
16 |         f.writelines(tomlkit.dumps(data))


--------------------------------------------------------------------------------
/.ci_support/environment-docs.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - hatchling
 5 | - hatch-vcs
 6 | - nbsphinx
 7 | - sphinx
 8 | - sphinx_rtd_theme
 9 | - myst-parser
10 | - numpy
11 | - openmpi
12 | - cloudpickle =3.1.1
13 | - mpi4py =4.0.1
14 | - pyzmq =26.4.0
15 | - flux-core
16 | - jupyter-book =1.0.0
17 | - h5py =3.13.0
18 | - python =3.12
19 | 


--------------------------------------------------------------------------------
/.ci_support/environment-integration.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - jupyter
 5 | - papermill
 6 | - numpy
 7 | - openmpi
 8 | - cloudpickle =3.1.1
 9 | - mpi4py
10 | - pyzmq =26.3.0
11 | - flux-core =0.59.0
12 | - hatchling =1.27.0
13 | - hatch-vcs =0.5.0
14 | - h5py
15 | - matplotlib =3.10.0
16 | - networkx =3.4.2
17 | - pygraphviz =1.14
18 | - pysqa =0.2.4
19 | - ipython =9.0.2
20 | - atomistics =0.2.4
21 | - qe =7.2
22 | - gpaw =24.6.0
23 | 


--------------------------------------------------------------------------------
/.ci_support/environment-mini.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy
 6 | - cloudpickle =3.1.0
 7 | - pyzmq =26.2.0
 8 | - hatchling =1.27.0
 9 | - hatch-vcs =0.5.0
10 | 


--------------------------------------------------------------------------------
/.ci_support/environment-mpich.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy
 6 | - mpich
 7 | - cloudpickle =3.1.1
 8 | - mpi4py =4.0.1
 9 | - pyzmq =26.4.0
10 | - h5py =3.13.0
11 | - networkx =3.4.2
12 | - pygraphviz =1.14
13 | - ipython =9.0.2
14 | - pysqa =0.2.6
15 | - hatchling =1.27.0
16 | - hatch-vcs =0.5.0
17 | 


--------------------------------------------------------------------------------
/.ci_support/environment-notebooks.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - jupyter
5 | - papermill


--------------------------------------------------------------------------------
/.ci_support/environment-old.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy =1.23.5
 6 | - openmpi =4.1.4
 7 | - cloudpickle =2.0.0
 8 | - mpi4py =3.1.4
 9 | - pyzmq =25.0.0
10 | - h5py =3.6.0
11 | - networkx =2.8.8
12 | - ipython =7.33.0
13 | - pygraphviz =1.10
14 | - hatchling =1.27.0
15 | - hatch-vcs =0.4.0
16 | 


--------------------------------------------------------------------------------
/.ci_support/environment-openmpi.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy
 6 | - openmpi
 7 | - cloudpickle =3.1.1
 8 | - mpi4py =4.0.1
 9 | - pyzmq =26.4.0
10 | - h5py =3.13.0
11 | - networkx =3.4.2
12 | - pygraphviz =1.14
13 | - pysqa =0.2.6
14 | - ipython =9.0.2
15 | - hatchling =1.27.0
16 | - hatch-vcs =0.5.0
17 | 


--------------------------------------------------------------------------------
/.ci_support/environment-uml.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python
5 | - pylint
6 | - graphviz


--------------------------------------------------------------------------------
/.ci_support/environment-win.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy
 6 | - msmpi
 7 | - cloudpickle =3.1.1
 8 | - mpi4py =4.0.1
 9 | - pyzmq =26.4.0
10 | - h5py =3.13.0
11 | - networkx =3.4.2
12 | - pygraphviz =1.14
13 | - ipython =9.0.2
14 | - hatchling =1.27.0
15 | - hatch-vcs =0.5.0
16 | 


--------------------------------------------------------------------------------
/.ci_support/release.py:
--------------------------------------------------------------------------------
 1 | def get_setup_version_and_pattern(setup_content):
 2 |     depend_lst, version_lst = [], []
 3 |     for l in setup_content:
 4 |         if "==" in l:
 5 |             lst = (
 6 |                 l.split("= [")[-1]
 7 |                 .split("]\n")[0]
 8 |                 .replace(" ", "")
 9 |                 .replace('"', "")
10 |                 .replace("'", "")
11 |                 .split(",")
12 |             )
13 |             for dep in lst:
14 |                 if dep != "\n":
15 |                     version_lst.append(dep.split("==")[1])
16 |                     depend_lst.append(dep.split("==")[0])
17 | 
18 |     version_high_dict = {d: v for d, v in zip(depend_lst, version_lst)}
19 |     return version_high_dict
20 | 
21 | 
22 | def get_env_version(env_content):
23 |     read_flag = False
24 |     depend_lst, version_lst = [], []
25 |     for l in env_content:
26 |         if "dependencies:" in l:
27 |             read_flag = True
28 |         elif read_flag:
29 |             lst = l.replace("-", "").replace(" ", "").replace("\n", "").split("=")
30 |             if len(lst) == 2:
31 |                 depend_lst.append(lst[0])
32 |                 version_lst.append(lst[1])
33 |     return {d: v for d, v in zip(depend_lst, version_lst)}
34 | 
35 | 
36 | def update_dependencies(setup_content, version_low_dict, version_high_dict):
37 |     version_combo_dict = {}
38 |     for dep, ver in version_high_dict.items():
39 |         if dep in version_low_dict.keys() and version_low_dict[dep] != ver:
40 |             version_combo_dict[dep] = dep + ">=" + version_low_dict[dep] + ",<=" + ver
41 |         else:
42 |             version_combo_dict[dep] = dep + "==" + ver
43 | 
44 |     setup_content_new = ""
45 |     pattern_dict = {d: d + "==" + v for d, v in version_high_dict.items()}
46 |     for l in setup_content:
47 |         for k, v in pattern_dict.items():
48 |             if v in l:
49 |                 l = l.replace(v, version_combo_dict[k])
50 |         setup_content_new += l
51 |     return setup_content_new
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     with open("pyproject.toml", "r") as f:
56 |         setup_content = f.readlines()
57 | 
58 |     with open("environment.yml", "r") as f:
59 |         env_content = f.readlines()
60 | 
61 |     setup_content_new = update_dependencies(
62 |         setup_content=setup_content[2:],
63 |         version_low_dict=get_env_version(env_content=env_content),
64 |         version_high_dict=get_setup_version_and_pattern(
65 |             setup_content=setup_content[2:]
66 |         ),
67 |     )
68 | 
69 |     with open("pyproject.toml", "w") as f:
70 |         f.writelines("".join(setup_content[:2]) + setup_content_new)
71 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @jan-janssen
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 |   directory: "/"
5 |   schedule:
6 |     interval: daily
7 |   open-pull-requests-limit: 10


--------------------------------------------------------------------------------
/.github/workflows/dependabot.yml:
--------------------------------------------------------------------------------
 1 | name: Update Dependabot
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches: [ main ]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     if: (github.actor == 'dependabot[bot]')
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |         with:
14 |           ref: ${{ github.event.pull_request.head.ref }} # Check out the head of the actual branch, not the PR
15 |           fetch-depth: 0 # otherwise, you will fail to push refs to dest repo
16 |           token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }}
17 |       - name: UpdateEnvironmentFile
18 |         env:
19 |           PR_TITLE: ${{ github.event.pull_request.title }}
20 |         shell: bash -l {0}
21 |         run: |
22 |           package=$(echo "$PR_TITLE" | awk '{print $2}')
23 |           from=$(echo "$PR_TITLE" | awk '{print $4}')
24 |           to=$(echo "$PR_TITLE" | awk '{print $6}')
25 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mpich.yml
26 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-openmpi.yml
27 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-win.yml
28 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-docs.yml
29 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mini.yml
30 |           sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-integration.yml
31 |           sed -i "/${package}/s/${from}/${to}/g" binder/environment.yml
32 |       - name: UpdateDependabotPR commit
33 |         run: |
34 |           git config --local user.email "pyiron@mpie.de"
35 |           git config --local user.name "pyironrunner"
36 |           git commit -m "[dependabot skip] Update environment" -a
37 |       - name: UpdateDependabotPR push
38 |         uses: ad-m/github-push-action@master
39 |         with:
40 |           github_token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }}
41 |           branch: ${{ github.event.pull_request.head.ref }}
42 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | # This workflow is used to upload and deploy a new release to PyPi
 2 | # Based on https://github.com/pypa/gh-action-pypi-publish
 3 | 
 4 | name: PyPi Release
 5 | 
 6 | on:
 7 |     push:
 8 |     pull_request:
 9 |     workflow_dispatch:
10 | 
11 | jobs:
12 |   build:
13 |     if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release'
14 |     runs-on: ubuntu-latest
15 |     environment:
16 |       name: pypi
17 |       url: https://pypi.org/p/${{ github.event.repository.name }}
18 |     permissions:
19 |       id-token: write
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Conda config
23 |       run: echo -e "channels:\n  - conda-forge\n" > .condarc
24 |     - uses: conda-incubator/setup-miniconda@v3
25 |       with:
26 |         python-version: "3.13"
27 |         miniforge-version: latest
28 |         condarc-file: .condarc
29 |         environment-file: .ci_support/environment-openmpi.yml
30 |     - name: Build
31 |       shell: bash -l {0}
32 |       run: |
33 |         cp .ci_support/environment-old.yml environment.yml
34 |         python .ci_support/release.py; cat pyproject.toml
35 |         git update-index --assume-unchanged pyproject.toml executorlib/_version.py
36 |         hatchling build -t sdist -t wheel
37 |     - name: Publish distribution 📦 to PyPI
38 |       uses: pypa/gh-action-pypi-publish@release/v1
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .DS_Store
3 | .coverage
4 | .idea/
5 | .vscode/
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.11.12
 4 |     hooks:
 5 |       - id: ruff
 6 |         name: ruff lint
 7 |         args: ["--fix"]
 8 |         files: ^executorlib/
 9 |       - id: ruff-format
10 |         name: ruff format
11 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | build:
 9 |   os: "ubuntu-24.04"
10 |   tools:
11 |     python: "mambaforge-23.11"
12 |   jobs:
13 |     pre_build:
14 |       # Generate the Sphinx configuration for this Jupyter Book so it builds.
15 |       - pip install . --no-deps --no-build-isolation
16 |       - "cp README.md docs"
17 |       - "cp notebooks/*.ipynb docs"
18 |       - "cp -r notebooks/images docs"
19 |       - "jupyter-book config sphinx docs/"
20 | 
21 | # Build documentation in the docs/ directory with Sphinx
22 | sphinx:
23 |   builder: html
24 |   configuration: docs/conf.py
25 | 
26 | # Optionally build your docs in additional formats such as PDF and ePub
27 | formats: []
28 | 
29 | # Install executorlib from conda
30 | conda:
31 |   environment: .ci_support/environment-docs.yml
32 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: "1.2.0"
 2 | authors:
 3 | - family-names: Janssen
 4 |   given-names: Jan
 5 |   orcid: "https://orcid.org/0000-0001-9948-7119"
 6 | - family-names: Taylor
 7 |   given-names: Michael Gilbert
 8 |   orcid: "https://orcid.org/0000-0003-4327-2746"
 9 | - family-names: Yang
10 |   given-names: Ping
11 |   orcid: "https://orcid.org/0000-0003-4726-2860"
12 | - family-names: Neugebauer
13 |   given-names: Joerg
14 |   orcid: "https://orcid.org/0000-0002-7903-2472"
15 | - family-names: Perez
16 |   given-names: Danny
17 |   orcid: "https://orcid.org/0000-0003-3028-5249"
18 | doi: 10.5281/zenodo.15121422
19 | message: If you use this software, please cite our article in the
20 |   Journal of Open Source Software.
21 | preferred-citation:
22 |   authors:
23 |   - family-names: Janssen
24 |     given-names: Jan
25 |     orcid: "https://orcid.org/0000-0001-9948-7119"
26 |   - family-names: Taylor
27 |     given-names: Michael Gilbert
28 |     orcid: "https://orcid.org/0000-0003-4327-2746"
29 |   - family-names: Yang
30 |     given-names: Ping
31 |     orcid: "https://orcid.org/0000-0003-4726-2860"
32 |   - family-names: Neugebauer
33 |     given-names: Joerg
34 |     orcid: "https://orcid.org/0000-0002-7903-2472"
35 |   - family-names: Perez
36 |     given-names: Danny
37 |     orcid: "https://orcid.org/0000-0003-3028-5249"
38 |   date-published: 2025-04-01
39 |   doi: 10.21105/joss.07782
40 |   issn: 2475-9066
41 |   issue: 108
42 |   journal: Journal of Open Source Software
43 |   publisher:
44 |     name: Open Journals
45 |   start: 7782
46 |   title: Executorlib -- Up-scaling Python workflows for hierarchical
47 |     heterogenous high-performance computing
48 |   type: article
49 |   url: "https://joss.theoj.org/papers/10.21105/joss.07782"
50 |   volume: 10
51 | title: Executorlib -- Up-scaling Python workflows for hierarchical
52 |   heterogenous high-performance computing
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2022, Jan Janssen
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE


--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - python
 5 | - numpy
 6 | - openmpi
 7 | - cloudpickle =3.1.1
 8 | - mpi4py =4.0.1
 9 | - pyzmq =26.4.0
10 | - flux-core =0.59.0
11 | - flux-pmix =0.5.0
12 | - hatchling =1.27.0
13 | - hatch-vcs =0.5.0
14 | - h5py =3.12.1
15 | - matplotlib =3.10.0
16 | - networkx =3.4.2
17 | - pygraphviz =1.14
18 | - pysqa =0.2.6
19 | - ipython =9.0.2
20 | 


--------------------------------------------------------------------------------
/binder/kernel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "argv": [
 3 |   "flux",
 4 |   "start",
 5 |   "/srv/conda/envs/notebook/bin/python",
 6 |   "-m",
 7 |   "ipykernel_launcher",
 8 |   "-f",
 9 |   "{connection_file}"
10 |  ],
11 |  "display_name": "Flux",
12 |  "language": "python",
13 |  "metadata": {
14 |   "debugger": true
15 |  }
16 | }
17 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
 1 | # jupyter kernel
 2 | mkdir -p /home/jovyan/.local/share/jupyter/kernels/flux
 3 | cp binder/kernel.json /home/jovyan/.local/share/jupyter/kernels/flux
 4 | 
 5 | # install executorlib
 6 | pip install . --no-deps --no-build-isolation
 7 | 
 8 | # copy notebooks
 9 | mv notebooks/*.ipynb .
10 | mv notebooks/images .
11 | 
12 | # clean up
13 | rm -rf .ci_support .github binder docs notebooks executorlib executorlib.egg-info tests .coveralls.yml .gitignore .readthedocs.yml LICENSE MANIFEST.in README.md pyproject.toml setup.py build
14 | 


--------------------------------------------------------------------------------
/codemeta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
 3 |   "@type": "Code",
 4 |   "author": [
 5 |     {
 6 |       "@id": "https://orcid.org/0000-0001-9948-7119",
 7 |       "@type": "Person",
 8 |       "email": "j.janssen@mpi-susmat.de",
 9 |       "name": "Jan Janssen",
10 |       "affiliation": "Max Planck Institute for Sustainable Materials, Düsseldorf, Germany"
11 |     },
12 |     {
13 |       "@id": "https://orcid.org/0000-0003-4327-2746",
14 |       "@type": "Person",
15 |       "email": "mgt16@lanl.gov",
16 |       "name": "Michael Gilbert Taylor",
17 |       "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America"
18 |     },
19 |     {
20 |       "@id": "https://orcid.org/0000-0003-4726-2860",
21 |       "@type": "Person",
22 |       "email": "pyang@lanl.gov",
23 |       "name": "Ping Yang",
24 |       "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America"
25 |     },
26 |     {
27 |       "@id": "https://orcid.org/0000-0002-7903-2472",
28 |       "@type": "Person",
29 |       "email": "j.neugebauer@mpi-susmat.de",
30 |       "name": "Joerg Neugebauer",
31 |       "affiliation": "Max Planck Institute for Sustainable Materials, Düsseldorf, Germany"
32 |     },
33 |     {
34 |       "@id": "https://orcid.org/0000-0003-3028-5249",
35 |       "@type": "Person",
36 |       "email": "danny_perez@lanl.gov",
37 |       "name": "Danny Perez",
38 |       "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America"
39 |     }
40 |   ],
41 |   "identifier": "",
42 |   "codeRepository": "https://github.com/pyiron/executorlib",
43 |   "datePublished": "2025-02-14",
44 |   "dateModified": "2025-02-14",
45 |   "dateCreated": "2025-02-14",
46 |   "description": "Up-scale python functions for high performance computing (HPC) with executorlib.",
47 |   "keywords": "Python, High Performance Computing, Task Scheduling",
48 |   "license": "BSD",
49 |   "title": "executorlib",
50 |   "version": "0.3.0"
51 | }
52 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
 1 | title: executorlib
 2 | author: Jan Janssen
 3 | 
 4 | execute:
 5 |   execute_notebooks           : off
 6 | 
 7 | repository:
 8 |   url                       : https://github.com/pyiron/executorlib
 9 | html:
10 |   use_repository_button: true
11 | 
12 | launch_buttons:
13 |   notebook_interface          : jupyterlab
14 |   binderhub_url               : https://mybinder.org
15 | 
16 | sphinx:
17 |   extra_extensions:
18 |   - 'sphinx.ext.autodoc'
19 |   - 'sphinx.ext.napoleon'
20 |   - 'sphinx.ext.viewcode'
21 |   - 'sphinx.ext.autodoc'
22 |   - 'sphinx.ext.autosummary'
23 |   config:
24 |     autosummary_generate: True
25 |     templates_path: ['_templates']
26 | 


--------------------------------------------------------------------------------
/docs/_templates/custom-class-template.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | .. autoclass:: {{ objname }}
 6 |    :members:
 7 |    :show-inheritance:
 8 |    :inherited-members:
 9 | 
10 |    {% block methods %}
11 |    .. automethod:: __init__
12 | 
13 |    {% if methods %}
14 |    .. rubric:: {{ _('Methods') }}
15 | 
16 |    .. autosummary::
17 |    {% for item in methods %}
18 |       ~{{ name }}.{{ item }}
19 |    {%- endfor %}
20 |    {% endif %}
21 |    {% endblock %}
22 | 
23 |    {% block attributes %}
24 |    {% if attributes %}
25 |    .. rubric:: {{ _('Attributes') }}
26 | 
27 |    .. autosummary::
28 |    {% for item in attributes %}
29 |       ~{{ name }}.{{ item }}
30 |    {%- endfor %}
31 |    {% endif %}
32 |    {% endblock %}


--------------------------------------------------------------------------------
/docs/_templates/custom-module-template.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. automodule:: {{ fullname }}
 4 | 
 5 |    {% block attributes %}
 6 |    {% if attributes %}
 7 |    .. rubric:: Module Attributes
 8 | 
 9 |    .. autosummary::
10 |       :toctree:
11 |    {% for item in attributes %}
12 |       {{ item }}
13 |    {%- endfor %}
14 |    {% endif %}
15 |    {% endblock %}
16 | 
17 |    {% block functions %}
18 |    {% if functions %}
19 |    .. rubric:: {{ _('Functions') }}
20 | 
21 |    .. autosummary::
22 |       :toctree:
23 |    {% for item in functions %}
24 |       {{ item }}
25 |    {%- endfor %}
26 |    {% endif %}
27 |    {% endblock %}
28 | 
29 |    {% block classes %}
30 |    {% if classes %}
31 |    .. rubric:: {{ _('Classes') }}
32 | 
33 |    .. autosummary::
34 |       :toctree:
35 |       :template: custom-class-template.rst
36 |    {% for item in classes %}
37 |       {{ item }}
38 |    {%- endfor %}
39 |    {% endif %}
40 |    {% endblock %}
41 | 
42 |    {% block exceptions %}
43 |    {% if exceptions %}
44 |    .. rubric:: {{ _('Exceptions') }}
45 | 
46 |    .. autosummary::
47 |       :toctree:
48 |    {% for item in exceptions %}
49 |       {{ item }}
50 |    {%- endfor %}
51 |    {% endif %}
52 |    {% endblock %}
53 | 
54 | {% block modules %}
55 | {% if modules %}
56 | .. rubric:: Modules
57 | 
58 | .. autosummary::
59 |    :toctree:
60 |    :template: custom-module-template.rst
61 |    :recursive:
62 | {% for item in modules %}
63 |    {{ item }}
64 | {%- endfor %}
65 | {% endif %}
66 | {% endblock %}


--------------------------------------------------------------------------------
/docs/_toc.yml:
--------------------------------------------------------------------------------
 1 | format: jb-book
 2 | root: README
 3 | chapters:
 4 | - file: installation.md
 5 | - file: 1-single-node.ipynb
 6 | - file: 2-hpc-cluster.ipynb
 7 | - file: 3-hpc-job.ipynb
 8 | - file: application.md
 9 |   sections:
10 |     - file: 5-1-gpaw.ipynb
11 |     - file: 5-2-quantum-espresso.ipynb
12 | - file: trouble_shooting.md
13 | - file: 4-developer.ipynb
14 | - file: api.rst
15 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | Interface
 2 | =========
 3 | 
 4 | Documentation of the classes and functions defined in the :code:`executorlib` package.
 5 | 
 6 | .. autosummary::
 7 |    :toctree: _autosummary
 8 |    :template: custom-module-template.rst
 9 |    :recursive:
10 | 
11 |    executorlib


--------------------------------------------------------------------------------
/docs/application.md:
--------------------------------------------------------------------------------
 1 | # Application
 2 | While `executorlib` is designed to up-scale any Python function for high performance computing (HPC), it was initially
 3 | developed to accelerate atomistic computational materials science simulation. To demonstrate the usage of `executorlib`
 4 | in the context of atomistic simulation, it is combined with [atomistics](https://atomistics.readthedocs.io/) and the
 5 | [atomic simulation environment (ASE)](https://wiki.fysik.dtu.dk/ase/) to calculate the bulk modulus with two density
 6 | functional theory simulation codes [gpaw](https://gpaw.readthedocs.io/index.html) and [quantum espresso](https://www.quantum-espresso.org).
 7 | The bulk modulus is calculated by uniformly deforming a supercell of atoms and measuring the change in total energy 
 8 | during compression and elongation. The first derivative of this curve is the pressure and the second derivative is 
 9 | proportional to the bulk modulus. Other material properties like the heat capacity, thermal expansion or thermal conductivity
10 | can be calculated in similar ways following the [atomistics](https://atomistics.readthedocs.io/) documentation. 
11 | 


--------------------------------------------------------------------------------
/docs/images/pyiron-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/docs/images/pyiron-logo.png


--------------------------------------------------------------------------------
/docs/paper/process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/docs/paper/process.png


--------------------------------------------------------------------------------
/docs/trouble_shooting.md:
--------------------------------------------------------------------------------
 1 | # Trouble Shooting
 2 | Some of the most frequent issues are covered below, for everything else do not be shy and [open an issue on Github](https://github.com/pyiron/executorlib/issues).
 3 | 
 4 | ## Filesystem Usage
 5 | The cache of executorlib is not removed after the Python process completed. So it is the responsibility of the user to 
 6 | clean up the cache directory they created. This can be easily forgot, so it is important to check for remaining cache 
 7 | directories from time to time and remove them. 
 8 | 
 9 | ## Firewall Issues
10 | MacOS comes with a rather strict firewall, which does not allow to connect to an MacOS computer using the hostname even
11 | if it is the hostname of the current computer. MacOS only supports connections based on the hostname `localhost`. To use
12 | `localhost` rather than the hostname to connect to the Python processes executorlib uses for the execution of the Python
13 | function, executorlib provides the option to set `hostname_localhost=True`. For MacOS this option is enabled by default,
14 | still if other operating systems implement similar strict firewall rules, the option can also be set manually to enabled
15 | local mode on computers with strict firewall rules.
16 | 
17 | ## Message Passing Interface
18 | To use the message passing interface (MPI) executorlib requires [mpi4py](https://mpi4py.readthedocs.io/) as optional 
19 | dependency. The installation of this and other optional dependencies is covered in the [installation section](https://executorlib.readthedocs.io/en/latest/installation.html#mpi-support).
20 | 
21 | ## Missing Dependencies
22 | The default installation of executorlib only comes with a limited number of dependencies, especially the [zero message queue](https://zeromq.org)
23 | and [cloudpickle](https://github.com/cloudpipe/cloudpickle). Additional features like [caching](https://executorlib.readthedocs.io/en/latest/installation.html#caching), the [HPC Cluster Executors](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-cluster-executor) 
24 | and the [HPC Job Executors](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-job-executor) require 
25 | additional dependencies. The dependencies are explained in more detail in the 
26 | [installation section](https://executorlib.readthedocs.io/en/latest/installation.html).
27 | 
28 | ## Python Version 
29 | Executorlib supports all current Python version ranging from 3.9 to 3.13. Still some of the dependencies and especially 
30 | the [flux](http://flux-framework.org) job scheduler are currently limited to Python 3.12 and below. Consequently for high
31 | performance computing installations Python 3.12 is the recommended Python verion. 
32 | 
33 | ## Resource Dictionary
34 | The resource dictionary parameter `resource_dict` can contain one or more of the following options: 
35 | * `cores` (int): number of MPI cores to be used for each function call
36 | * `threads_per_core` (int): number of OpenMP threads to be used for each function call
37 | * `gpus_per_core` (int): number of GPUs per worker - defaults to 0
38 | * `cwd` (str/None): current working directory where the parallel python task is executed
39 | * `openmpi_oversubscribe` (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False
40 | * `slurm_cmd_args` (list): Additional command line arguments for the srun call (SLURM only)
41 | 
42 | For the special case of the [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html) 
43 | the resource dictionary parameter `resource_dict` can also include additional parameters define in the submission script
44 | of the [Python simple queuing system adatper (pysqa)](https://pysqa.readthedocs.io) these include but are not limited to: 
45 | * `run_time_max` (int): the maximum time the execution of the submitted Python function is allowed to take in seconds.
46 | * `memory_max` (int): the maximum amount of memory the Python function is allowed to use in Gigabytes. 
47 | * `partition` (str): the partition of the queuing system the Python function is submitted to. 
48 | * `queue` (str): the name of the queue the Python function is submitted to. 
49 | 
50 | All parameters in the resource dictionary `resource_dict` are optional. 
51 | 
52 | ## SSH Connection
53 | While the [Python simple queuing system adatper (pysqa)](https://pysqa.readthedocs.io) provides the option to connect to
54 | high performance computing (HPC) clusters via SSH, this functionality is not supported for executorlib. The background 
55 | is the use of [cloudpickle](https://github.com/cloudpipe/cloudpickle) for serialization inside executorlib, this requires
56 | the same Python version and dependencies on both computer connected via SSH. As tracking those parameters is rather 
57 | complicated the SSH connection functionality of [pysqa](https://pysqa.readthedocs.io) is not officially supported in 
58 | executorlib. 
59 | 


--------------------------------------------------------------------------------
/executorlib/__init__.py:
--------------------------------------------------------------------------------
 1 | from executorlib.executor.base import BaseExecutor
 2 | from executorlib.executor.flux import (
 3 |     FluxClusterExecutor,
 4 |     FluxJobExecutor,
 5 | )
 6 | from executorlib.executor.single import SingleNodeExecutor
 7 | from executorlib.executor.slurm import (
 8 |     SlurmClusterExecutor,
 9 |     SlurmJobExecutor,
10 | )
11 | from executorlib.standalone.cache import get_cache_data
12 | 
13 | from . import _version
14 | 
15 | __all__: list[str] = [
16 |     "get_cache_data",
17 |     "BaseExecutor",
18 |     "FluxJobExecutor",
19 |     "FluxClusterExecutor",
20 |     "SingleNodeExecutor",
21 |     "SlurmJobExecutor",
22 |     "SlurmClusterExecutor",
23 | ]
24 | 
25 | __version__ = _version.__version__
26 | 


--------------------------------------------------------------------------------
/executorlib/_version.py:
--------------------------------------------------------------------------------
 1 | # file generated by setuptools-scm
 2 | # don't change, don't track in version control
 3 | 
 4 | __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
 5 | 
 6 | TYPE_CHECKING = False
 7 | if TYPE_CHECKING:
 8 |     from typing import Tuple
 9 |     from typing import Union
10 | 
11 |     VERSION_TUPLE = Tuple[Union[int, str], ...]
12 | else:
13 |     VERSION_TUPLE = object
14 | 
15 | version: str
16 | __version__: str
17 | __version_tuple__: VERSION_TUPLE
18 | version_tuple: VERSION_TUPLE
19 | 
20 | __version__ = version = "0.0.1"
21 | __version_tuple__ = version_tuple = (0, 0, 1)
22 | 


--------------------------------------------------------------------------------
/executorlib/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | External application programming interface (API) following the semantic versioning this inteface is promised to remain
 3 | stable during minor releases and any change in the interface leads to a major version bump. External libraries should
 4 | only use the functionality in this API in combination with the user interface defined in the root __init__.py, all other
 5 | functionality is considered internal and might change during minor releases.
 6 | """
 7 | 
 8 | from executorlib.standalone.command import get_command_path
 9 | from executorlib.standalone.interactive.communication import (
10 |     SocketInterface,
11 |     interface_bootup,
12 |     interface_connect,
13 |     interface_receive,
14 |     interface_send,
15 |     interface_shutdown,
16 | )
17 | from executorlib.standalone.interactive.spawner import MpiExecSpawner, SubprocessSpawner
18 | from executorlib.standalone.queue import cancel_items_in_queue
19 | from executorlib.standalone.serialize import cloudpickle_register
20 | 
21 | __all__: list[str] = [
22 |     "cancel_items_in_queue",
23 |     "cloudpickle_register",
24 |     "get_command_path",
25 |     "interface_bootup",
26 |     "interface_connect",
27 |     "interface_receive",
28 |     "interface_send",
29 |     "interface_shutdown",
30 |     "MpiExecSpawner",
31 |     "SocketInterface",
32 |     "SubprocessSpawner",
33 | ]
34 | 


--------------------------------------------------------------------------------
/executorlib/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/backend/__init__.py


--------------------------------------------------------------------------------
/executorlib/backend/cache_parallel.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import sys
 3 | import time
 4 | 
 5 | import cloudpickle
 6 | 
 7 | from executorlib.task_scheduler.file.backend import (
 8 |     backend_load_file,
 9 |     backend_write_file,
10 | )
11 | 
12 | 
13 | def main() -> None:
14 |     """
15 |     Main function for executing the cache_parallel script.
16 | 
17 |     This function uses MPI (Message Passing Interface) to distribute the execution of a function
18 |     across multiple processes. It loads a file, broadcasts the data to all processes, executes
19 |     the function, gathers the results (if there are multiple processes), and writes the output
20 |     to a file.
21 | 
22 |     Args:
23 |         None
24 | 
25 |     Returns:
26 |         None
27 |     """
28 |     from mpi4py import MPI
29 | 
30 |     MPI.pickle.__init__(  # type: ignore
31 |         cloudpickle.dumps,
32 |         cloudpickle.loads,
33 |         pickle.HIGHEST_PROTOCOL,
34 |     )
35 |     mpi_rank_zero = MPI.COMM_WORLD.Get_rank() == 0
36 |     mpi_size_larger_one = MPI.COMM_WORLD.Get_size() > 1
37 |     file_name = sys.argv[1]
38 | 
39 |     time_start = time.time()
40 |     apply_dict = {}
41 |     if mpi_rank_zero:
42 |         apply_dict = backend_load_file(file_name=file_name)
43 |     apply_dict = MPI.COMM_WORLD.bcast(apply_dict, root=0)
44 |     output = apply_dict["fn"].__call__(*apply_dict["args"], **apply_dict["kwargs"])
45 |     try:
46 |         result = (
47 |             MPI.COMM_WORLD.gather(output, root=0) if mpi_size_larger_one else output
48 |         )
49 |     except Exception as error:
50 |         if mpi_rank_zero:
51 |             backend_write_file(
52 |                 file_name=file_name,
53 |                 output={"error": error},
54 |                 runtime=time.time() - time_start,
55 |             )
56 |     else:
57 |         if mpi_rank_zero:
58 |             backend_write_file(
59 |                 file_name=file_name,
60 |                 output={"result": result},
61 |                 runtime=time.time() - time_start,
62 |             )
63 |     MPI.COMM_WORLD.Barrier()
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/executorlib/backend/cache_serial.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | from executorlib.task_scheduler.file.backend import backend_execute_task_in_file
4 | 
5 | if __name__ == "__main__":
6 |     backend_execute_task_in_file(file_name=sys.argv[1])
7 | 


--------------------------------------------------------------------------------
/executorlib/backend/interactive_parallel.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import sys
  3 | from os.path import abspath
  4 | from typing import Optional
  5 | 
  6 | import cloudpickle
  7 | import zmq
  8 | 
  9 | from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 10 | from executorlib.standalone.interactive.communication import (
 11 |     interface_connect,
 12 |     interface_receive,
 13 |     interface_send,
 14 |     interface_shutdown,
 15 | )
 16 | 
 17 | 
 18 | def main() -> None:
 19 |     """
 20 |     Entry point of the program.
 21 | 
 22 |     This function initializes MPI, sets up the necessary communication, and executes the requested functions.
 23 | 
 24 |     Returns:
 25 |         None
 26 |     """
 27 |     from mpi4py import MPI
 28 | 
 29 |     MPI.pickle.__init__(  # type: ignore
 30 |         cloudpickle.dumps,
 31 |         cloudpickle.loads,
 32 |         pickle.HIGHEST_PROTOCOL,
 33 |     )
 34 |     mpi_rank_zero = MPI.COMM_WORLD.Get_rank() == 0
 35 |     mpi_size_larger_one = MPI.COMM_WORLD.Get_size() > 1
 36 | 
 37 |     argument_dict = parse_arguments(argument_lst=sys.argv)
 38 |     context: Optional[zmq.Context] = None
 39 |     socket: Optional[zmq.Socket] = None
 40 |     if mpi_rank_zero:
 41 |         context, socket = interface_connect(
 42 |             host=argument_dict["host"], port=argument_dict["zmqport"]
 43 |         )
 44 | 
 45 |     memory = None
 46 | 
 47 |     # required for flux interface - otherwise the current path is not included in the python path
 48 |     cwd = abspath(".")
 49 |     if cwd not in sys.path:
 50 |         sys.path.insert(1, cwd)
 51 | 
 52 |     while True:
 53 |         # Read from socket
 54 |         input_dict: dict = {}
 55 |         if mpi_rank_zero:
 56 |             input_dict = interface_receive(socket=socket)
 57 |         input_dict = MPI.COMM_WORLD.bcast(input_dict, root=0)
 58 | 
 59 |         # Parse input
 60 |         if "shutdown" in input_dict and input_dict["shutdown"]:
 61 |             if mpi_rank_zero:
 62 |                 interface_send(socket=socket, result_dict={"result": True})
 63 |                 interface_shutdown(socket=socket, context=context)
 64 |             MPI.COMM_WORLD.Barrier()
 65 |             break
 66 |         elif (
 67 |             "fn" in input_dict
 68 |             and "init" not in input_dict
 69 |             and "args" in input_dict
 70 |             and "kwargs" in input_dict
 71 |         ):
 72 |             # Execute function
 73 |             try:
 74 |                 output = call_funct(input_dict=input_dict, funct=None, memory=memory)
 75 |                 if mpi_size_larger_one:
 76 |                     output_reply = MPI.COMM_WORLD.gather(output, root=0)
 77 |                 else:
 78 |                     output_reply = output
 79 |             except Exception as error:
 80 |                 if mpi_rank_zero:
 81 |                     interface_send(
 82 |                         socket=socket,
 83 |                         result_dict={"error": error},
 84 |                     )
 85 |             else:
 86 |                 # Send output
 87 |                 if mpi_rank_zero:
 88 |                     interface_send(socket=socket, result_dict={"result": output_reply})
 89 |         elif (
 90 |             "init" in input_dict
 91 |             and input_dict["init"]
 92 |             and "args" in input_dict
 93 |             and "kwargs" in input_dict
 94 |         ):
 95 |             memory = call_funct(input_dict=input_dict, funct=None)
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/executorlib/backend/interactive_serial.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from os.path import abspath
 3 | from typing import Optional
 4 | 
 5 | from executorlib.standalone.interactive.backend import call_funct, parse_arguments
 6 | from executorlib.standalone.interactive.communication import (
 7 |     interface_connect,
 8 |     interface_receive,
 9 |     interface_send,
10 |     interface_shutdown,
11 | )
12 | 
13 | 
14 | def main(argument_lst: Optional[list[str]] = None):
15 |     """
16 |     The main function of the program.
17 | 
18 |     Args:
19 |         argument_lst (Optional[List[str]]): List of command line arguments. If None, sys.argv will be used.
20 | 
21 |     Returns:
22 |         None
23 |     """
24 |     if argument_lst is None:
25 |         argument_lst = sys.argv
26 |     argument_dict = parse_arguments(argument_lst=argument_lst)
27 |     context, socket = interface_connect(
28 |         host=argument_dict["host"], port=argument_dict["zmqport"]
29 |     )
30 | 
31 |     memory = None
32 | 
33 |     # required for flux interface - otherwise the current path is not included in the python path
34 |     cwd = abspath(".")
35 |     if cwd not in sys.path:
36 |         sys.path.insert(1, cwd)
37 | 
38 |     while True:
39 |         # Read from socket
40 |         input_dict = interface_receive(socket=socket)
41 | 
42 |         # Parse input
43 |         if "shutdown" in input_dict and input_dict["shutdown"]:
44 |             interface_send(socket=socket, result_dict={"result": True})
45 |             interface_shutdown(socket=socket, context=context)
46 |             break
47 |         elif (
48 |             "fn" in input_dict
49 |             and "init" not in input_dict
50 |             and "args" in input_dict
51 |             and "kwargs" in input_dict
52 |         ):
53 |             # Execute function
54 |             try:
55 |                 output = call_funct(input_dict=input_dict, funct=None, memory=memory)
56 |             except Exception as error:
57 |                 interface_send(
58 |                     socket=socket,
59 |                     result_dict={"error": error},
60 |                 )
61 |             else:
62 |                 # Send output
63 |                 interface_send(socket=socket, result_dict={"result": output})
64 |         elif (
65 |             "init" in input_dict
66 |             and input_dict["init"]
67 |             and "args" in input_dict
68 |             and "kwargs" in input_dict
69 |         ):
70 |             memory = call_funct(input_dict=input_dict, funct=None)
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main(argument_lst=sys.argv)
75 | 


--------------------------------------------------------------------------------
/executorlib/executor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/executor/__init__.py


--------------------------------------------------------------------------------
/executorlib/executor/base.py:
--------------------------------------------------------------------------------
  1 | import queue
  2 | from abc import ABC
  3 | from concurrent.futures import (
  4 |     Executor as FutureExecutor,
  5 | )
  6 | from concurrent.futures import (
  7 |     Future,
  8 | )
  9 | from typing import Callable, Optional
 10 | 
 11 | from executorlib.task_scheduler.base import TaskSchedulerBase
 12 | 
 13 | 
 14 | class BaseExecutor(FutureExecutor, ABC):
 15 |     """
 16 |     Interface class for the executor.
 17 | 
 18 |     Args:
 19 |         executor (TaskSchedulerBase): internal executor
 20 |     """
 21 | 
 22 |     def __init__(self, executor: TaskSchedulerBase):
 23 |         self._task_scheduler = executor
 24 | 
 25 |     @property
 26 |     def max_workers(self) -> Optional[int]:
 27 |         return self._task_scheduler.max_workers
 28 | 
 29 |     @max_workers.setter
 30 |     def max_workers(self, max_workers: int):
 31 |         self._task_scheduler.max_workers = max_workers
 32 | 
 33 |     @property
 34 |     def info(self) -> Optional[dict]:
 35 |         """
 36 |         Get the information about the executor.
 37 | 
 38 |         Returns:
 39 |             Optional[dict]: Information about the executor.
 40 |         """
 41 |         return self._task_scheduler.info
 42 | 
 43 |     @property
 44 |     def future_queue(self) -> Optional[queue.Queue]:
 45 |         """
 46 |         Get the future queue.
 47 | 
 48 |         Returns:
 49 |             queue.Queue: The future queue.
 50 |         """
 51 |         return self._task_scheduler.future_queue
 52 | 
 53 |     def submit(  # type: ignore
 54 |         self,
 55 |         fn: Callable,
 56 |         /,
 57 |         *args,
 58 |         resource_dict: Optional[dict] = None,
 59 |         **kwargs,
 60 |     ) -> Future:
 61 |         """
 62 |         Submits a callable to be executed with the given arguments.
 63 | 
 64 |         Schedules the callable to be executed as fn(*args, **kwargs) and returns
 65 |         a Future instance representing the execution of the callable.
 66 | 
 67 |         Args:
 68 |             fn (callable): function to submit for execution
 69 |             args: arguments for the submitted function
 70 |             kwargs: keyword arguments for the submitted function
 71 |             resource_dict (dict): resource dictionary, which defines the resources used for the execution of the
 72 |                                   function. Example resource dictionary: {
 73 |                                       cores: 1,
 74 |                                       threads_per_core: 1,
 75 |                                       gpus_per_worker: 0,
 76 |                                       oversubscribe: False,
 77 |                                       cwd: None,
 78 |                                       executor: None,
 79 |                                       hostname_localhost: False,
 80 |                                   }
 81 | 
 82 |         Returns:
 83 |             Future: A Future representing the given call.
 84 |         """
 85 |         return self._task_scheduler.submit(
 86 |             *([fn] + list(args)), resource_dict=resource_dict, **kwargs
 87 |         )
 88 | 
 89 |     def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
 90 |         """
 91 |         Clean-up the resources associated with the Executor.
 92 | 
 93 |         It is safe to call this method several times. Otherwise, no other
 94 |         methods can be called after this one.
 95 | 
 96 |         Args:
 97 |             wait (bool): If True then shutdown will not return until all running
 98 |                 futures have finished executing and the resources used by the
 99 |                 parallel_executors have been reclaimed.
100 |             cancel_futures (bool): If True then shutdown will cancel all pending
101 |                 futures. Futures that are completed or running will not be
102 |                 cancelled.
103 |         """
104 |         self._task_scheduler.shutdown(wait=wait, cancel_futures=cancel_futures)
105 | 
106 |     def __len__(self) -> int:
107 |         """
108 |         Get the length of the executor.
109 | 
110 |         Returns:
111 |             int: The length of the executor.
112 |         """
113 |         return len(self._task_scheduler)
114 | 
115 |     def __bool__(self):
116 |         """
117 |         Overwrite length to always return True
118 | 
119 |         Returns:
120 |             bool: Always return True
121 |         """
122 |         return True
123 | 
124 |     def __exit__(self, *args, **kwargs) -> None:
125 |         """
126 |         Exit method called when exiting the context manager.
127 |         """
128 |         self._task_scheduler.__exit__(*args, **kwargs)
129 | 


--------------------------------------------------------------------------------
/executorlib/standalone/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Submodules in the executorlib.standalone module do not depend on other modules of the executorlib package. This strict
3 | separation simplifies the development, testing and debugging. The functionality in executorlib.standalone is designed
4 | to be used independently in other libraries.
5 | """
6 | 


--------------------------------------------------------------------------------
/executorlib/standalone/cache.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cloudpickle
 4 | 
 5 | group_dict = {
 6 |     "fn": "function",
 7 |     "args": "input_args",
 8 |     "kwargs": "input_kwargs",
 9 |     "output": "output",
10 |     "error": "error",
11 |     "runtime": "runtime",
12 |     "queue_id": "queue_id",
13 | }
14 | 
15 | 
16 | def get_cache_data(cache_directory: str) -> list[dict]:
17 |     """
18 |     Collect all HDF5 files in the cache directory
19 | 
20 |     Args:
21 |         cache_directory (str): The directory to store cache files.
22 | 
23 |     Returns:
24 |         list[dict]: List of dictionaries each representing on of the HDF5 files in the cache directory.
25 |     """
26 |     import h5py
27 |     import numpy as np
28 | 
29 |     file_lst = []
30 |     for task_key in os.listdir(cache_directory):
31 |         file_name = os.path.join(cache_directory, task_key, "cache.h5out")
32 |         os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True)
33 |         if os.path.exists(file_name):
34 |             with h5py.File(file_name, "r") as hdf:
35 |                 file_content_dict = {
36 |                     key: cloudpickle.loads(np.void(hdf["/" + key]))
37 |                     for key in group_dict.values()
38 |                     if key in hdf
39 |                 }
40 |             file_content_dict["filename"] = file_name
41 |             file_lst.append(file_content_dict)
42 |     return file_lst
43 | 


--------------------------------------------------------------------------------
/executorlib/standalone/command.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_command_path(executable: str) -> str:
 5 |     """
 6 |     Get path of the backend executable script
 7 | 
 8 |     Args:
 9 |         executable (str): Name of the backend executable script, either mpiexec.py or serial.py
10 | 
11 |     Returns:
12 |         str: absolute path to the executable script
13 |     """
14 |     return os.path.abspath(os.path.join(__file__, "..", "..", "backend", executable))
15 | 


--------------------------------------------------------------------------------
/executorlib/standalone/inputcheck.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import multiprocessing
  3 | import os.path
  4 | from concurrent.futures import Executor
  5 | from typing import Callable, Optional
  6 | 
  7 | 
  8 | def check_oversubscribe(oversubscribe: bool) -> None:
  9 |     """
 10 |     Check if oversubscribe is True and raise a ValueError if it is.
 11 |     """
 12 |     if oversubscribe:
 13 |         raise ValueError(
 14 |             "Oversubscribing is not supported for the executorlib.flux.PyFLuxExecutor backend."
 15 |             "Please use oversubscribe=False instead of oversubscribe=True."
 16 |         )
 17 | 
 18 | 
 19 | def check_command_line_argument_lst(command_line_argument_lst: list[str]) -> None:
 20 |     """
 21 |     Check if command_line_argument_lst is not empty and raise a ValueError if it is.
 22 |     """
 23 |     if len(command_line_argument_lst) > 0:
 24 |         raise ValueError(
 25 |             "The command_line_argument_lst parameter is not supported for the SLURM backend."
 26 |         )
 27 | 
 28 | 
 29 | def check_gpus_per_worker(gpus_per_worker: int) -> None:
 30 |     """
 31 |     Check if gpus_per_worker is not 0 and raise a TypeError if it is.
 32 |     """
 33 |     if gpus_per_worker != 0:
 34 |         raise TypeError(
 35 |             "GPU assignment is not supported for the executorlib.mpi.PyMPIExecutor backend."
 36 |             "Please use gpus_per_worker=0 instead of gpus_per_worker="
 37 |             + str(gpus_per_worker)
 38 |             + "."
 39 |         )
 40 | 
 41 | 
 42 | def check_executor(executor: Executor) -> None:
 43 |     """
 44 |     Check if executor is not None and raise a ValueError if it is.
 45 |     """
 46 |     if executor is not None:
 47 |         raise ValueError(
 48 |             "The executor parameter is only supported for the flux framework backend."
 49 |         )
 50 | 
 51 | 
 52 | def check_nested_flux_executor(nested_flux_executor: bool) -> None:
 53 |     """
 54 |     Check if nested_flux_executor is True and raise a ValueError if it is.
 55 |     """
 56 |     if nested_flux_executor:
 57 |         raise ValueError(
 58 |             "The nested_flux_executor parameter is only supported for the flux framework backend."
 59 |         )
 60 | 
 61 | 
 62 | def check_resource_dict(function: Callable) -> None:
 63 |     """
 64 |     Check if the function has a parameter named 'resource_dict' and raise a ValueError if it does.
 65 |     """
 66 |     if "resource_dict" in inspect.signature(function).parameters:
 67 |         raise ValueError(
 68 |             "The parameter resource_dict is used internally in executorlib, "
 69 |             "so it cannot be used as a parameter in the submitted functions."
 70 |         )
 71 | 
 72 | 
 73 | def check_resource_dict_is_empty(resource_dict: dict) -> None:
 74 |     """
 75 |     Check if resource_dict is not empty and raise a ValueError if it is.
 76 |     """
 77 |     if len(resource_dict) > 0:
 78 |         raise ValueError(
 79 |             "When block_allocation is enabled, the resource requirements have to be defined on the executor level."
 80 |         )
 81 | 
 82 | 
 83 | def check_refresh_rate(refresh_rate: float) -> None:
 84 |     """
 85 |     Check if refresh_rate is not 0.01 and raise a ValueError if it is.
 86 |     """
 87 |     if refresh_rate != 0.01:
 88 |         raise ValueError(
 89 |             "The sleep_interval parameter is only used when disable_dependencies=False."
 90 |         )
 91 | 
 92 | 
 93 | def check_plot_dependency_graph(plot_dependency_graph: bool) -> None:
 94 |     """
 95 |     Check if plot_dependency_graph is True and raise a ValueError if it is.
 96 |     """
 97 |     if plot_dependency_graph:
 98 |         raise ValueError(
 99 |             "The plot_dependency_graph parameter is only used when disable_dependencies=False."
100 |         )
101 | 
102 | 
103 | def check_pmi(backend: Optional[str], pmi: Optional[str]) -> None:
104 |     """
105 |     Check if pmi is valid for the selected backend and raise a ValueError if it is not.
106 |     """
107 |     if backend is not None:
108 |         if backend != "flux_allocation" and pmi is not None:
109 |             raise ValueError(
110 |                 "The pmi parameter is currently only implemented for flux."
111 |             )
112 |         elif backend == "flux_allocation" and pmi not in ["pmix", "pmi1", "pmi2", None]:
113 |             raise ValueError(
114 |                 "The pmi parameter supports [pmix, pmi1, pmi2], but not: " + str(pmi)
115 |             )
116 | 
117 | 
118 | def check_init_function(
119 |     block_allocation: bool, init_function: Optional[Callable]
120 | ) -> None:
121 |     """
122 |     Check if block_allocation is False and init_function is not None, and raise a ValueError if it is.
123 |     """
124 |     if not block_allocation and init_function is not None:
125 |         raise ValueError("")
126 | 
127 | 
128 | def check_max_workers_and_cores(
129 |     max_workers: Optional[int], max_cores: Optional[int]
130 | ) -> None:
131 |     if max_workers is not None:
132 |         raise ValueError(
133 |             "The number of workers cannot be controlled with the pysqa based backend."
134 |         )
135 |     if max_cores is not None:
136 |         raise ValueError(
137 |             "The number of cores cannot be controlled with the pysqa based backend."
138 |         )
139 | 
140 | 
141 | def check_hostname_localhost(hostname_localhost: Optional[bool]) -> None:
142 |     if hostname_localhost is not None:
143 |         raise ValueError(
144 |             "The option to connect to hosts based on their hostname is not available with the pysqa based backend."
145 |         )
146 | 
147 | 
148 | def check_flux_executor_pmi_mode(flux_executor_pmi_mode: Optional[str]) -> None:
149 |     if flux_executor_pmi_mode is not None:
150 |         raise ValueError(
151 |             "The option to specify the flux pmi mode is not available with the pysqa based backend."
152 |         )
153 | 
154 | 
155 | def check_flux_log_files(flux_log_files: Optional[bool]) -> None:
156 |     """
157 |     Check if flux_log_files is True and raise a ValueError if it is.
158 |     """
159 |     if flux_log_files:
160 |         raise ValueError(
161 |             "The flux_log_files parameter is only supported for the flux framework backend."
162 |         )
163 | 
164 | 
165 | def check_pysqa_config_directory(pysqa_config_directory: Optional[str]) -> None:
166 |     """
167 |     Check if pysqa_config_directory is None and raise a ValueError if it is not.
168 |     """
169 |     if pysqa_config_directory is not None:
170 |         raise ValueError(
171 |             "pysqa_config_directory parameter is only supported for pysqa backend."
172 |         )
173 | 
174 | 
175 | def validate_number_of_cores(
176 |     max_cores: Optional[int] = None,
177 |     max_workers: Optional[int] = None,
178 |     cores_per_worker: Optional[int] = 1,
179 |     set_local_cores: bool = False,
180 | ) -> int:
181 |     """
182 |     Validate the number of cores and return the appropriate value.
183 |     """
184 |     if max_cores is not None and max_workers is None and cores_per_worker is not None:
185 |         return int(max_cores / cores_per_worker)
186 |     elif max_workers is not None:
187 |         return int(max_workers)
188 |     elif max_cores is None and max_workers is None and not set_local_cores:
189 |         raise ValueError(
190 |             "Block allocation requires a fixed set of computational resources. Neither max_cores nor max_workers are defined."
191 |         )
192 |     else:
193 |         return multiprocessing.cpu_count()
194 | 
195 | 
196 | def check_file_exists(file_name: Optional[str]):
197 |     if file_name is None:
198 |         raise ValueError("file_name is not set.")
199 |     if not os.path.exists(file_name):
200 |         raise ValueError("file_name is not written to the file system.")
201 | 


--------------------------------------------------------------------------------
/executorlib/standalone/interactive/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/standalone/interactive/__init__.py


--------------------------------------------------------------------------------
/executorlib/standalone/interactive/arguments.py:
--------------------------------------------------------------------------------
 1 | from asyncio.exceptions import CancelledError
 2 | from concurrent.futures import Future, TimeoutError
 3 | from typing import Any, Union
 4 | 
 5 | 
 6 | def get_future_objects_from_input(args: tuple, kwargs: dict):
 7 |     """
 8 |     Check the input parameters if they contain future objects and which of these future objects are executed
 9 | 
10 |     Args:
11 |         args (tuple): function arguments
12 |         kwargs (dict): function keyword arguments
13 | 
14 |     Returns:
15 |         list, boolean: list of future objects and boolean flag if all future objects are already done
16 |     """
17 |     future_lst = []
18 | 
19 |     def find_future_in_list(lst):
20 |         for el in lst:
21 |             if isinstance(el, Future):
22 |                 future_lst.append(el)
23 |             elif isinstance(el, list):
24 |                 find_future_in_list(lst=el)
25 |             elif isinstance(el, dict):
26 |                 find_future_in_list(lst=el.values())
27 | 
28 |     find_future_in_list(lst=args)
29 |     find_future_in_list(lst=kwargs.values())
30 |     boolean_flag = len([future for future in future_lst if future.done()]) == len(
31 |         future_lst
32 |     )
33 |     return future_lst, boolean_flag
34 | 
35 | 
36 | def get_exception_lst(future_lst: list[Future]) -> list:
37 |     """
38 |     Get list of exceptions raised by the future objects in the list of future objects
39 | 
40 |     Args:
41 |         future_lst (list): list of future objects
42 | 
43 |     Returns:
44 |         list: list of exceptions raised by the future objects in the list of future objects. Returns empty list if no
45 |               exception was raised.
46 |     """
47 |     return [
48 |         f.exception() for f in future_lst if check_exception_was_raised(future_obj=f)
49 |     ]
50 | 
51 | 
52 | def check_exception_was_raised(future_obj: Future) -> bool:
53 |     """
54 |     Check if exception was raised by future object
55 | 
56 |     Args:
57 |         future_obj (Future): future object
58 | 
59 |     Returns:
60 |         bool: True if exception was raised, False otherwise
61 |     """
62 |     try:
63 |         excp = future_obj.exception(timeout=10**-10)
64 |         return excp is not None and not isinstance(excp, CancelledError)
65 |     except TimeoutError:
66 |         return False
67 | 
68 | 
69 | def update_futures_in_input(args: tuple, kwargs: dict) -> tuple[tuple, dict]:
70 |     """
71 |     Evaluate future objects in the arguments and keyword arguments by calling future.result()
72 | 
73 |     Args:
74 |         args (tuple): function arguments
75 |         kwargs (dict): function keyword arguments
76 | 
77 |     Returns:
78 |         tuple, dict: arguments and keyword arguments with each future object in them being evaluated
79 |     """
80 | 
81 |     def get_result(arg: Union[list[Future], Future]) -> Any:
82 |         if isinstance(arg, Future):
83 |             return arg.result()
84 |         elif isinstance(arg, list):
85 |             return [get_result(arg=el) for el in arg]
86 |         elif isinstance(arg, dict):
87 |             return {k: get_result(arg=v) for k, v in arg.items()}
88 |         else:
89 |             return arg
90 | 
91 |     args = tuple([get_result(arg=arg) for arg in args])
92 |     kwargs = {key: get_result(arg=value) for key, value in kwargs.items()}
93 |     return args, kwargs
94 | 


--------------------------------------------------------------------------------
/executorlib/standalone/interactive/backend.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Any, Callable, Optional
 3 | 
 4 | 
 5 | def call_funct(
 6 |     input_dict: dict, funct: Optional[Callable] = None, memory: Optional[dict] = None
 7 | ) -> Any:
 8 |     """
 9 |     Call function from dictionary
10 | 
11 |     Args:
12 |         input_dict (dict): dictionary containing the function 'fn', its arguments 'args' and keyword arguments 'kwargs'
13 |         funct (Callable, optional): function to be evaluated if it is not included in the input dictionary
14 |         memory (dict, optional): variables stored in memory which can be used as keyword arguments
15 | 
16 |     Returns:
17 |         Any: Result of the function
18 |     """
19 |     if funct is None:
20 | 
21 |         def funct(*args, **kwargs):
22 |             return args[0].__call__(*args[1:], **kwargs)
23 | 
24 |     funct_args = inspect.getfullargspec(input_dict["fn"]).args
25 |     if memory is not None:
26 |         input_dict["kwargs"].update(
27 |             _update_dict_delta(
28 |                 dict_input=memory,
29 |                 dict_output=input_dict["kwargs"],
30 |                 keys_possible_lst=funct_args,
31 |             )
32 |         )
33 |     return funct(input_dict["fn"], *input_dict["args"], **input_dict["kwargs"])
34 | 
35 | 
36 | def parse_arguments(argument_lst: list[str]) -> dict:
37 |     """
38 |     Simple function to parse command line arguments
39 | 
40 |     Args:
41 |         argument_lst (list): list of arguments as strings
42 | 
43 |     Returns:
44 |         dict: dictionary with the parsed arguments and their corresponding values
45 |     """
46 |     return update_default_dict_from_arguments(
47 |         argument_lst=argument_lst,
48 |         argument_dict={
49 |             "zmqport": "--zmqport",
50 |             "host": "--host",
51 |         },
52 |         default_dict={"host": "localhost"},
53 |     )
54 | 
55 | 
56 | def update_default_dict_from_arguments(
57 |     argument_lst: list[str], argument_dict: dict, default_dict: dict
58 | ) -> dict:
59 |     """
60 |     Update default dictionary with values from command line arguments
61 | 
62 |     Args:
63 |         argument_lst (list[str]): List of arguments as strings
64 |         argument_dict (dict): Dictionary mapping argument names to their corresponding command line flags
65 |         default_dict (dict): Default dictionary to be updated
66 | 
67 |     Returns:
68 |         dict: Updated default dictionary
69 |     """
70 |     default_dict.update(
71 |         {
72 |             k: argument_lst[argument_lst.index(v) + 1]
73 |             for k, v in argument_dict.items()
74 |             if v in argument_lst
75 |         }
76 |     )
77 |     return default_dict
78 | 
79 | 
80 | def _update_dict_delta(
81 |     dict_input: dict, dict_output: dict, keys_possible_lst: list[str]
82 | ) -> dict:
83 |     """
84 |     Update dictionary with values from another dictionary, only if the keys are present in a given list
85 | 
86 |     Args:
87 |         dict_input (dict): Input dictionary
88 |         dict_output (dict): Output dictionary to be updated
89 |         keys_possible_lst (list[str]): List of possible keys to be updated
90 | 
91 |     Returns:
92 |         dict: Updated dictionary
93 |     """
94 |     return {
95 |         k: v
96 |         for k, v in dict_input.items()
97 |         if k in keys_possible_lst and k not in dict_output
98 |     }
99 | 


--------------------------------------------------------------------------------
/executorlib/standalone/interactive/spawner.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | from abc import ABC, abstractmethod
  3 | from typing import Optional
  4 | 
  5 | MPI_COMMAND = "mpiexec"
  6 | 
  7 | 
  8 | class BaseSpawner(ABC):
  9 |     def __init__(
 10 |         self,
 11 |         cwd: Optional[str] = None,
 12 |         cores: int = 1,
 13 |         openmpi_oversubscribe: bool = False,
 14 |     ):
 15 |         """
 16 |         Base class for interface implementations.
 17 | 
 18 |         Args:
 19 |             cwd (str): The current working directory.
 20 |             cores (int, optional): The number of cores to use. Defaults to 1.
 21 |             openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
 22 |         """
 23 |         self._cwd = cwd
 24 |         self._cores = cores
 25 |         self._openmpi_oversubscribe = openmpi_oversubscribe
 26 | 
 27 |     @abstractmethod
 28 |     def bootup(
 29 |         self,
 30 |         command_lst: list[str],
 31 |     ):
 32 |         """
 33 |         Method to start the interface.
 34 | 
 35 |         Args:
 36 |             command_lst (list[str]): The command list to execute.
 37 |         """
 38 |         raise NotImplementedError
 39 | 
 40 |     @abstractmethod
 41 |     def shutdown(self, wait: bool = True):
 42 |         """
 43 |         Method to shutdown the interface.
 44 | 
 45 |         Args:
 46 |             wait (bool, optional): Whether to wait for the interface to shutdown. Defaults to True.
 47 |         """
 48 |         raise NotImplementedError
 49 | 
 50 |     @abstractmethod
 51 |     def poll(self):
 52 |         """
 53 |         Method to check if the interface is running.
 54 | 
 55 |         Returns:
 56 |             bool: True if the interface is running, False otherwise.
 57 |         """
 58 |         raise NotImplementedError
 59 | 
 60 | 
 61 | class SubprocessSpawner(BaseSpawner):
 62 |     def __init__(
 63 |         self,
 64 |         cwd: Optional[str] = None,
 65 |         cores: int = 1,
 66 |         openmpi_oversubscribe: bool = False,
 67 |         threads_per_core: int = 1,
 68 |     ):
 69 |         """
 70 |         Subprocess interface implementation.
 71 | 
 72 |         Args:
 73 |             cwd (str, optional): The current working directory. Defaults to None.
 74 |             cores (int, optional): The number of cores to use. Defaults to 1.
 75 |             threads_per_core (int, optional): The number of threads per core. Defaults to 1.
 76 |             oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
 77 |         """
 78 |         super().__init__(
 79 |             cwd=cwd,
 80 |             cores=cores,
 81 |             openmpi_oversubscribe=openmpi_oversubscribe,
 82 |         )
 83 |         self._process: Optional[subprocess.Popen] = None
 84 |         self._threads_per_core = threads_per_core
 85 | 
 86 |     def bootup(
 87 |         self,
 88 |         command_lst: list[str],
 89 |     ):
 90 |         """
 91 |         Method to start the subprocess interface.
 92 | 
 93 |         Args:
 94 |             command_lst (list[str]): The command list to execute.
 95 |         """
 96 |         self._process = subprocess.Popen(
 97 |             args=self.generate_command(command_lst=command_lst),
 98 |             cwd=self._cwd,
 99 |             stdin=subprocess.DEVNULL,
100 |         )
101 | 
102 |     def generate_command(self, command_lst: list[str]) -> list[str]:
103 |         """
104 |         Method to generate the command list.
105 | 
106 |         Args:
107 |             command_lst (list[str]): The command list.
108 | 
109 |         Returns:
110 |             list[str]: The generated command list.
111 |         """
112 |         return command_lst
113 | 
114 |     def shutdown(self, wait: bool = True):
115 |         """
116 |         Method to shutdown the subprocess interface.
117 | 
118 |         Args:
119 |             wait (bool, optional): Whether to wait for the interface to shutdown. Defaults to True.
120 |         """
121 |         if self._process is not None:
122 |             self._process.communicate()
123 |             self._process.terminate()
124 |             if wait:
125 |                 self._process.wait()
126 |         self._process = None
127 | 
128 |     def poll(self) -> bool:
129 |         """
130 |         Method to check if the subprocess interface is running.
131 | 
132 |         Returns:
133 |             bool: True if the interface is running, False otherwise.
134 |         """
135 |         return self._process is not None and self._process.poll() is None
136 | 
137 | 
138 | class MpiExecSpawner(SubprocessSpawner):
139 |     def generate_command(self, command_lst: list[str]) -> list[str]:
140 |         """
141 |         Generate the command list for the MPIExec interface.
142 | 
143 |         Args:
144 |             command_lst (list[str]): The command list.
145 | 
146 |         Returns:
147 |             list[str]: The generated command list.
148 |         """
149 |         command_prepend_lst = generate_mpiexec_command(
150 |             cores=self._cores,
151 |             openmpi_oversubscribe=self._openmpi_oversubscribe,
152 |         )
153 |         return super().generate_command(
154 |             command_lst=command_prepend_lst + command_lst,
155 |         )
156 | 
157 | 
158 | def generate_mpiexec_command(
159 |     cores: int, openmpi_oversubscribe: bool = False
160 | ) -> list[str]:
161 |     """
162 |     Generate the command list for the MPIExec interface.
163 | 
164 |     Args:
165 |         cores (int): The number of cores.
166 |         openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
167 | 
168 |     Returns:
169 |         list[str]: The generated command list.
170 |     """
171 |     if cores == 1:
172 |         return []
173 |     else:
174 |         command_prepend_lst = [MPI_COMMAND, "-n", str(cores)]
175 |         if openmpi_oversubscribe:
176 |             command_prepend_lst += ["--oversubscribe"]
177 |         return command_prepend_lst
178 | 


--------------------------------------------------------------------------------
/executorlib/standalone/plot.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | from concurrent.futures import Future
  3 | from typing import Optional
  4 | 
  5 | import cloudpickle
  6 | 
  7 | 
  8 | def generate_nodes_and_edges(
  9 |     task_hash_dict: dict, future_hash_inverse_dict: dict
 10 | ) -> tuple[list, list]:
 11 |     """
 12 |     Generate nodes and edges for visualization.
 13 | 
 14 |     Args:
 15 |         task_hash_dict (dict): Dictionary mapping task hash to task information.
 16 |         future_hash_inverse_dict (dict): Dictionary mapping future hash to future object.
 17 | 
 18 |     Returns:
 19 |         Tuple[list, list]: Tuple containing the list of nodes and the list of edges.
 20 |     """
 21 |     node_lst: list = []
 22 |     edge_lst: list = []
 23 |     hash_id_dict: dict = {}
 24 | 
 25 |     def add_element(arg, link_to, label=""):
 26 |         """
 27 |         Add element to the node and edge lists.
 28 | 
 29 |         Args:
 30 |             arg: Argument to be added.
 31 |             link_to: ID of the node to link the element to.
 32 |             label (str, optional): Label for the edge. Defaults to "".
 33 |         """
 34 |         if isinstance(arg, Future):
 35 |             edge_lst.append(
 36 |                 {
 37 |                     "start": hash_id_dict[future_hash_inverse_dict[arg]],
 38 |                     "end": link_to,
 39 |                     "label": label,
 40 |                 }
 41 |             )
 42 |         elif isinstance(arg, list) and any(isinstance(a, Future) for a in arg):
 43 |             lst_no_future = [a if not isinstance(a, Future) else "$" for a in arg]
 44 |             node_id = len(node_lst)
 45 |             node_lst.append(
 46 |                 {"name": str(lst_no_future), "id": node_id, "shape": "circle"}
 47 |             )
 48 |             edge_lst.append({"start": node_id, "end": link_to, "label": label})
 49 |             for i, a in enumerate(arg):
 50 |                 if isinstance(a, Future):
 51 |                     add_element(arg=a, link_to=node_id, label="ind: " + str(i))
 52 |         elif isinstance(arg, dict) and any(isinstance(a, Future) for a in arg.values()):
 53 |             dict_no_future = {
 54 |                 kt: vt if not isinstance(vt, Future) else "$" for kt, vt in arg.items()
 55 |             }
 56 |             node_id = len(node_lst)
 57 |             node_lst.append(
 58 |                 {"name": str(dict_no_future), "id": node_id, "shape": "circle"}
 59 |             )
 60 |             edge_lst.append({"start": node_id, "end": link_to, "label": label})
 61 |             for kt, vt in arg.items():
 62 |                 if isinstance(vt, Future):
 63 |                     add_element(arg=vt, link_to=node_id, label="key: " + kt)
 64 |         else:
 65 |             node_id = len(node_lst)
 66 |             node_lst.append({"name": str(arg), "id": node_id, "shape": "circle"})
 67 |             edge_lst.append({"start": node_id, "end": link_to, "label": label})
 68 | 
 69 |     for k, v in task_hash_dict.items():
 70 |         hash_id_dict[k] = len(node_lst)
 71 |         node_lst.append(
 72 |             {"name": v["fn"].__name__, "id": hash_id_dict[k], "shape": "box"}
 73 |         )
 74 |     for k, task_dict in task_hash_dict.items():
 75 |         for arg in task_dict["args"]:
 76 |             add_element(arg=arg, link_to=hash_id_dict[k], label="")
 77 | 
 78 |         for kw, v in task_dict["kwargs"].items():
 79 |             add_element(arg=v, link_to=hash_id_dict[k], label=str(kw))
 80 | 
 81 |     return node_lst, edge_lst
 82 | 
 83 | 
 84 | def generate_task_hash(task_dict: dict, future_hash_inverse_dict: dict) -> bytes:
 85 |     """
 86 |     Generate a hash for a task dictionary.
 87 | 
 88 |     Args:
 89 |         task_dict (dict): Dictionary containing task information.
 90 |         future_hash_inverse_dict (dict): Dictionary mapping future hash to future object.
 91 | 
 92 |     Returns:
 93 |         bytes: Hash generated for the task dictionary.
 94 |     """
 95 | 
 96 |     def convert_arg(arg, future_hash_inverse_dict):
 97 |         """
 98 |         Convert an argument to its hash representation.
 99 | 
100 |         Args:
101 |             arg: Argument to be converted.
102 |             future_hash_inverse_dict (dict): Dictionary mapping future hash to future object.
103 | 
104 |         Returns:
105 |             The hash representation of the argument.
106 |         """
107 |         if isinstance(arg, Future):
108 |             return future_hash_inverse_dict[arg]
109 |         elif isinstance(arg, list):
110 |             return [
111 |                 convert_arg(arg=a, future_hash_inverse_dict=future_hash_inverse_dict)
112 |                 for a in arg
113 |             ]
114 |         elif isinstance(arg, dict):
115 |             return {
116 |                 k: convert_arg(arg=v, future_hash_inverse_dict=future_hash_inverse_dict)
117 |                 for k, v in arg.items()
118 |             }
119 |         else:
120 |             return arg
121 | 
122 |     args_for_hash = [
123 |         convert_arg(arg=arg, future_hash_inverse_dict=future_hash_inverse_dict)
124 |         for arg in task_dict["args"]
125 |     ]
126 |     kwargs_for_hash = {
127 |         k: convert_arg(arg=v, future_hash_inverse_dict=future_hash_inverse_dict)
128 |         for k, v in task_dict["kwargs"].items()
129 |     }
130 |     return cloudpickle.dumps(
131 |         {"fn": task_dict["fn"], "args": args_for_hash, "kwargs": kwargs_for_hash}
132 |     )
133 | 
134 | 
135 | def draw(node_lst: list, edge_lst: list, filename: Optional[str] = None):
136 |     """
137 |     Draw the graph visualization of nodes and edges.
138 | 
139 |     Args:
140 |         node_lst (list): List of nodes.
141 |         edge_lst (list): List of edges.
142 |         filename (str): Name of the file to store the plotted graph in.
143 |     """
144 |     import networkx as nx  # noqa
145 | 
146 |     graph = nx.DiGraph()
147 |     for node in node_lst:
148 |         graph.add_node(node["id"], label=node["name"], shape=node["shape"])
149 |     for edge in edge_lst:
150 |         graph.add_edge(edge["start"], edge["end"], label=edge["label"])
151 |     if filename is not None:
152 |         file_format = os.path.splitext(filename)[-1][1:]
153 |         with open(filename, "wb") as f:
154 |             f.write(nx.nx_agraph.to_agraph(graph).draw(prog="dot", format=file_format))
155 |     else:
156 |         from IPython.display import SVG, display  # noqa
157 | 
158 |         display(SVG(nx.nx_agraph.to_agraph(graph).draw(prog="dot", format="svg")))
159 | 


--------------------------------------------------------------------------------
/executorlib/standalone/queue.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | 
 3 | 
 4 | def cancel_items_in_queue(que: queue.Queue):
 5 |     """
 6 |     Cancel items which are still waiting in the queue. If the executor is busy tasks remain in the queue, so the future
 7 |     objects have to be cancelled when the executor shuts down.
 8 | 
 9 |     Args:
10 |         que (queue.Queue): Queue with task objects which should be executed
11 |     """
12 |     while True:
13 |         try:
14 |             item = que.get_nowait()
15 |             if isinstance(item, dict) and "future" in item:
16 |                 item["future"].cancel()
17 |                 que.task_done()
18 |         except queue.Empty:
19 |             break
20 | 


--------------------------------------------------------------------------------
/executorlib/standalone/serialize.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import inspect
 3 | import re
 4 | from typing import Callable, Optional
 5 | 
 6 | import cloudpickle
 7 | 
 8 | 
 9 | def cloudpickle_register(ind: int = 2):
10 |     """
11 |     Cloudpickle can either pickle by value or pickle by reference. The functions which are communicated have to
12 |     be pickled by value rather than by reference, so the module which calls the map function is pickled by value.
13 |     https://github.com/cloudpipe/cloudpickle#overriding-pickles-serialization-mechanism-for-importable-constructs
14 |     inspect can help to find the module which is calling executorlib
15 |     https://docs.python.org/3/library/inspect.html
16 |     to learn more about inspect another good read is:
17 |     http://pymotw.com/2/inspect/index.html#module-inspect
18 |     1 refers to 1 level higher than the map function
19 | 
20 |     Args:
21 |         ind (int): index of the level at which pickle by value starts while for the rest pickle by reference is used
22 |     """
23 |     try:  # When executed in a jupyter notebook this can cause a ValueError - in this case we just ignore it.
24 |         cloudpickle.register_pickle_by_value(inspect.getmodule(inspect.stack()[ind][0]))
25 |     except IndexError:
26 |         cloudpickle_register(ind=ind - 1)
27 |     except ValueError:
28 |         pass
29 | 
30 | 
31 | def serialize_funct_h5(
32 |     fn: Callable,
33 |     fn_args: Optional[list] = None,
34 |     fn_kwargs: Optional[dict] = None,
35 |     resource_dict: Optional[dict] = None,
36 | ) -> tuple[str, dict]:
37 |     """
38 |     Serialize a function and its arguments and keyword arguments into an HDF5 file.
39 | 
40 |     Args:
41 |         fn (Callable): The function to be serialized.
42 |         fn_args (list): The arguments of the function.
43 |         fn_kwargs (dict): The keyword arguments of the function.
44 |         resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
45 |                               Example resource dictionary: {
46 |                                   cores: 1,
47 |                                   threads_per_core: 1,
48 |                                   gpus_per_worker: 0,
49 |                                   oversubscribe: False,
50 |                                   cwd: None,
51 |                                   executor: None,
52 |                                   hostname_localhost: False,
53 |                               }
54 | 
55 |     Returns:
56 |         Tuple[str, dict]: A tuple containing the task key and the serialized data.
57 | 
58 |     """
59 |     if fn_args is None:
60 |         fn_args = []
61 |     if fn_kwargs is None:
62 |         fn_kwargs = {}
63 |     if resource_dict is None:
64 |         resource_dict = {}
65 |     binary_all = cloudpickle.dumps(
66 |         {"fn": fn, "args": fn_args, "kwargs": fn_kwargs, "resource_dict": resource_dict}
67 |     )
68 |     task_key = fn.__name__ + _get_hash(binary=binary_all)
69 |     data = {
70 |         "fn": fn,
71 |         "args": fn_args,
72 |         "kwargs": fn_kwargs,
73 |         "resource_dict": resource_dict,
74 |     }
75 |     return task_key, data
76 | 
77 | 
78 | def _get_hash(binary: bytes) -> str:
79 |     """
80 |     Get the hash of a binary.
81 | 
82 |     Args:
83 |         binary (bytes): The binary to be hashed.
84 | 
85 |     Returns:
86 |         str: The hash of the binary.
87 | 
88 |     """
89 |     # Remove specification of jupyter kernel from hash to be deterministic
90 |     binary_no_ipykernel = re.sub(b"(?<=/ipykernel_)(.*)(?=/)", b"", binary)
91 |     return str(hashlib.md5(binary_no_ipykernel).hexdigest())
92 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/__init__.py


--------------------------------------------------------------------------------
/executorlib/task_scheduler/base.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import queue
  3 | from concurrent.futures import (
  4 |     Executor as FutureExecutor,
  5 | )
  6 | from concurrent.futures import (
  7 |     Future,
  8 | )
  9 | from threading import Thread
 10 | from typing import Callable, Optional, Union
 11 | 
 12 | from executorlib.standalone.inputcheck import check_resource_dict
 13 | from executorlib.standalone.queue import cancel_items_in_queue
 14 | from executorlib.standalone.serialize import cloudpickle_register
 15 | 
 16 | 
 17 | class TaskSchedulerBase(FutureExecutor):
 18 |     """
 19 |     Base class for the executor.
 20 | 
 21 |     Args:
 22 |         max_cores (int): defines the number cores which can be used in parallel
 23 |     """
 24 | 
 25 |     def __init__(self, max_cores: Optional[int] = None):
 26 |         """
 27 |         Initialize the ExecutorBase class.
 28 |         """
 29 |         cloudpickle_register(ind=3)
 30 |         self._process_kwargs: dict = {}
 31 |         self._max_cores = max_cores
 32 |         self._future_queue: Optional[queue.Queue] = queue.Queue()
 33 |         self._process: Optional[Union[Thread, list[Thread]]] = None
 34 | 
 35 |     @property
 36 |     def max_workers(self) -> Optional[int]:
 37 |         return self._process_kwargs.get("max_workers")
 38 | 
 39 |     @max_workers.setter
 40 |     def max_workers(self, max_workers: int):
 41 |         raise NotImplementedError("The max_workers setter is not implemented.")
 42 | 
 43 |     @property
 44 |     def info(self) -> Optional[dict]:
 45 |         """
 46 |         Get the information about the executor.
 47 | 
 48 |         Returns:
 49 |             Optional[dict]: Information about the executor.
 50 |         """
 51 |         meta_data_dict = self._process_kwargs.copy()
 52 |         if "future_queue" in meta_data_dict:
 53 |             del meta_data_dict["future_queue"]
 54 |         if self._process is not None and isinstance(self._process, list):
 55 |             meta_data_dict["max_workers"] = len(self._process)
 56 |             return meta_data_dict
 57 |         elif self._process is not None:
 58 |             return meta_data_dict
 59 |         else:
 60 |             return None
 61 | 
 62 |     @property
 63 |     def future_queue(self) -> Optional[queue.Queue]:
 64 |         """
 65 |         Get the future queue.
 66 | 
 67 |         Returns:
 68 |             queue.Queue: The future queue.
 69 |         """
 70 |         return self._future_queue
 71 | 
 72 |     def submit(  # type: ignore
 73 |         self,
 74 |         fn: Callable,
 75 |         /,
 76 |         *args,
 77 |         resource_dict: Optional[dict] = None,
 78 |         **kwargs,
 79 |     ) -> Future:
 80 |         """
 81 |         Submits a callable to be executed with the given arguments.
 82 | 
 83 |         Schedules the callable to be executed as fn(*args, **kwargs) and returns
 84 |         a Future instance representing the execution of the callable.
 85 | 
 86 |         Args:
 87 |             fn (callable): function to submit for execution
 88 |             args: arguments for the submitted function
 89 |             kwargs: keyword arguments for the submitted function
 90 |             resource_dict (dict): resource dictionary, which defines the resources used for the execution of the
 91 |                                   function. Example resource dictionary: {
 92 |                                       cores: 1,
 93 |                                       threads_per_core: 1,
 94 |                                       gpus_per_worker: 0,
 95 |                                       oversubscribe: False,
 96 |                                       cwd: None,
 97 |                                       executor: None,
 98 |                                       hostname_localhost: False,
 99 |                                   }
100 | 
101 |         Returns:
102 |             Future: A Future representing the given call.
103 |         """
104 |         if resource_dict is None:
105 |             resource_dict = {}
106 |         cores = resource_dict.get("cores")
107 |         if (
108 |             cores is not None
109 |             and self._max_cores is not None
110 |             and cores > self._max_cores
111 |         ):
112 |             raise ValueError(
113 |                 "The specified number of cores is larger than the available number of cores."
114 |             )
115 |         check_resource_dict(function=fn)
116 |         f: Future = Future()
117 |         if self._future_queue is not None:
118 |             self._future_queue.put(
119 |                 {
120 |                     "fn": fn,
121 |                     "args": args,
122 |                     "kwargs": kwargs,
123 |                     "future": f,
124 |                     "resource_dict": resource_dict,
125 |                 }
126 |             )
127 |         return f
128 | 
129 |     def shutdown(self, wait: bool = True, *, cancel_futures: bool = False):
130 |         """
131 |         Clean-up the resources associated with the Executor.
132 | 
133 |         It is safe to call this method several times. Otherwise, no other
134 |         methods can be called after this one.
135 | 
136 |         Args:
137 |             wait (bool): If True then shutdown will not return until all running
138 |                 futures have finished executing and the resources used by the
139 |                 parallel_executors have been reclaimed.
140 |             cancel_futures (bool): If True then shutdown will cancel all pending
141 |                 futures. Futures that are completed or running will not be
142 |                 cancelled.
143 |         """
144 |         if cancel_futures and self._future_queue is not None:
145 |             cancel_items_in_queue(que=self._future_queue)
146 |         if self._process is not None and self._future_queue is not None:
147 |             self._future_queue.put({"shutdown": True, "wait": wait})
148 |             if wait and isinstance(self._process, Thread):
149 |                 self._process.join()
150 |                 self._future_queue.join()
151 |         self._process = None
152 |         self._future_queue = None
153 | 
154 |     def _set_process(self, process: Thread):
155 |         """
156 |         Set the process for the executor.
157 | 
158 |         Args:
159 |             process (RaisingThread): The process for the executor.
160 |         """
161 |         self._process = process
162 |         self._process.start()
163 | 
164 |     def __len__(self) -> int:
165 |         """
166 |         Get the length of the executor.
167 | 
168 |         Returns:
169 |             int: The length of the executor.
170 |         """
171 |         queue_size = 0
172 |         if self._future_queue is not None:
173 |             queue_size = self._future_queue.qsize()
174 |         return queue_size
175 | 
176 |     def __del__(self):
177 |         """
178 |         Clean-up the resources associated with the Executor.
179 |         """
180 |         with contextlib.suppress(AttributeError, RuntimeError):
181 |             self.shutdown(wait=False)
182 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/file/__init__.py


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/backend.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from typing import Any
 4 | 
 5 | from executorlib.task_scheduler.file.hdf import dump, load
 6 | from executorlib.task_scheduler.file.shared import FutureItem
 7 | 
 8 | 
 9 | def backend_load_file(file_name: str) -> dict:
10 |     """
11 |     Load the data from an HDF5 file and convert FutureItem objects to their results.
12 | 
13 |     Args:
14 |         file_name (str): The name of the HDF5 file.
15 | 
16 |     Returns:
17 |         dict: The loaded data from the file.
18 | 
19 |     """
20 |     apply_dict = load(file_name=file_name)
21 |     apply_dict["args"] = [
22 |         arg if not _isinstance(arg, FutureItem) else arg.result()
23 |         for arg in apply_dict["args"]
24 |     ]
25 |     apply_dict["kwargs"] = {
26 |         key: arg if not _isinstance(arg, FutureItem) else arg.result()
27 |         for key, arg in apply_dict["kwargs"].items()
28 |     }
29 |     return apply_dict
30 | 
31 | 
32 | def backend_write_file(file_name: str, output: Any, runtime: float) -> None:
33 |     """
34 |     Write the output to an HDF5 file.
35 | 
36 |     Args:
37 |         file_name (str): The name of the HDF5 file.
38 |         output (Any): The output to be written.
39 |         runtime (float): Time for executing function.
40 | 
41 |     Returns:
42 |         None
43 | 
44 |     """
45 |     file_name_out = os.path.splitext(file_name)[0]
46 |     os.rename(file_name, file_name_out + ".h5ready")
47 |     if "result" in output:
48 |         dump(
49 |             file_name=file_name_out + ".h5ready",
50 |             data_dict={"output": output["result"], "runtime": runtime},
51 |         )
52 |     else:
53 |         dump(
54 |             file_name=file_name_out + ".h5ready",
55 |             data_dict={"error": output["error"], "runtime": runtime},
56 |         )
57 |     os.rename(file_name_out + ".h5ready", file_name_out + ".h5out")
58 | 
59 | 
60 | def backend_execute_task_in_file(file_name: str) -> None:
61 |     """
62 |     Execute the task stored in a given HDF5 file.
63 | 
64 |     Args:
65 |         file_name (str): The file name of the HDF5 file as an absolute path.
66 | 
67 |     Returns:
68 |         None
69 |     """
70 |     apply_dict = backend_load_file(file_name=file_name)
71 |     time_start = time.time()
72 |     try:
73 |         result = {
74 |             "result": apply_dict["fn"].__call__(
75 |                 *apply_dict["args"], **apply_dict["kwargs"]
76 |             )
77 |         }
78 |     except Exception as error:
79 |         result = {"error": error}
80 | 
81 |     backend_write_file(
82 |         file_name=file_name,
83 |         output=result,
84 |         runtime=time.time() - time_start,
85 |     )
86 | 
87 | 
88 | def _isinstance(obj: Any, cls: type) -> bool:
89 |     return str(obj.__class__) == str(cls)
90 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/hdf.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Optional
  2 | 
  3 | import cloudpickle
  4 | import h5py
  5 | import numpy as np
  6 | 
  7 | from executorlib.standalone.cache import group_dict
  8 | 
  9 | 
 10 | def dump(file_name: Optional[str], data_dict: dict) -> None:
 11 |     """
 12 |     Dump data dictionary into HDF5 file
 13 | 
 14 |     Args:
 15 |         file_name (str): file name of the HDF5 file as absolute path
 16 |         data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
 17 |     """
 18 |     if file_name is not None:
 19 |         with h5py.File(file_name, "a") as fname:
 20 |             for data_key, data_value in data_dict.items():
 21 |                 if data_key in group_dict:
 22 |                     fname.create_dataset(
 23 |                         name="/" + group_dict[data_key],
 24 |                         data=np.void(cloudpickle.dumps(data_value)),
 25 |                     )
 26 | 
 27 | 
 28 | def load(file_name: str) -> dict:
 29 |     """
 30 |     Load data dictionary from HDF5 file
 31 | 
 32 |     Args:
 33 |         file_name (str): file name of the HDF5 file as absolute path
 34 | 
 35 |     Returns:
 36 |         dict: dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}}
 37 |     """
 38 |     with h5py.File(file_name, "r") as hdf:
 39 |         data_dict = {}
 40 |         if "function" in hdf:
 41 |             data_dict["fn"] = cloudpickle.loads(np.void(hdf["/function"]))
 42 |         else:
 43 |             raise TypeError("Function not found in HDF5 file.")
 44 |         if "input_args" in hdf:
 45 |             data_dict["args"] = cloudpickle.loads(np.void(hdf["/input_args"]))
 46 |         else:
 47 |             data_dict["args"] = ()
 48 |         if "input_kwargs" in hdf:
 49 |             data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"]))
 50 |         else:
 51 |             data_dict["kwargs"] = {}
 52 |         return data_dict
 53 | 
 54 | 
 55 | def get_output(file_name: str) -> tuple[bool, bool, Any]:
 56 |     """
 57 |     Check if output is available in the HDF5 file
 58 | 
 59 |     Args:
 60 |         file_name (str): file name of the HDF5 file as absolute path
 61 | 
 62 |     Returns:
 63 |         Tuple[bool, bool, object]: boolean flag indicating if output is available and the output object itself
 64 |     """
 65 |     with h5py.File(file_name, "r") as hdf:
 66 |         if "output" in hdf:
 67 |             return True, True, cloudpickle.loads(np.void(hdf["/output"]))
 68 |         elif "error" in hdf:
 69 |             return True, False, cloudpickle.loads(np.void(hdf["/error"]))
 70 |         else:
 71 |             return False, False, None
 72 | 
 73 | 
 74 | def get_runtime(file_name: str) -> float:
 75 |     """
 76 |     Get run time from HDF5 file
 77 | 
 78 |     Args:
 79 |         file_name (str): file name of the HDF5 file as absolute path
 80 | 
 81 |     Returns:
 82 |         float: run time from the execution of the python function
 83 |     """
 84 |     with h5py.File(file_name, "r") as hdf:
 85 |         if "runtime" in hdf:
 86 |             return cloudpickle.loads(np.void(hdf["/runtime"]))
 87 |         else:
 88 |             return 0.0
 89 | 
 90 | 
 91 | def get_queue_id(file_name: Optional[str]) -> Optional[int]:
 92 |     """
 93 |     Get queuing system id from HDF5 file
 94 | 
 95 |     Args:
 96 |         file_name (str): file name of the HDF5 file as absolute path
 97 | 
 98 |     Returns:
 99 |         int: queuing system id from the execution of the python function
100 |     """
101 |     if file_name is not None:
102 |         with h5py.File(file_name, "r") as hdf:
103 |             if "queue_id" in hdf:
104 |                 return cloudpickle.loads(np.void(hdf["/queue_id"]))
105 |     return None
106 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/queue_spawner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | from typing import Optional, Union
  4 | 
  5 | from pysqa import QueueAdapter
  6 | 
  7 | from executorlib.standalone.inputcheck import check_file_exists
  8 | from executorlib.task_scheduler.file.hdf import dump, get_queue_id
  9 | 
 10 | 
 11 | def execute_with_pysqa(
 12 |     command: list,
 13 |     task_dependent_lst: Optional[list[int]] = None,
 14 |     file_name: Optional[str] = None,
 15 |     resource_dict: Optional[dict] = None,
 16 |     config_directory: Optional[str] = None,
 17 |     backend: Optional[str] = None,
 18 |     cache_directory: Optional[str] = None,
 19 | ) -> Optional[int]:
 20 |     """
 21 |     Execute a command by submitting it to the queuing system
 22 | 
 23 |     Args:
 24 |         command (list): The command to be executed.
 25 |         task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
 26 |         file_name (str): Name of the HDF5 file which contains the Python function
 27 |         resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
 28 |                               Example resource dictionary: {
 29 |                                   cwd: None,
 30 |                               }
 31 |         config_directory (str, optional): path to the config directory.
 32 |         backend (str, optional): name of the backend used to spawn tasks.
 33 |         cache_directory (str): The directory to store the HDF5 files.
 34 | 
 35 |     Returns:
 36 |         int: queuing system ID
 37 |     """
 38 |     if task_dependent_lst is None:
 39 |         task_dependent_lst = []
 40 |     check_file_exists(file_name=file_name)
 41 |     queue_id = get_queue_id(file_name=file_name)
 42 |     qa = QueueAdapter(
 43 |         directory=config_directory,
 44 |         queue_type=backend,
 45 |         execute_command=_pysqa_execute_command,
 46 |     )
 47 |     if queue_id is None or qa.get_status_of_job(process_id=queue_id) is None:
 48 |         if resource_dict is None:
 49 |             resource_dict = {}
 50 |         if "cwd" in resource_dict and resource_dict["cwd"] is not None:
 51 |             cwd = resource_dict["cwd"]
 52 |         else:
 53 |             cwd = cache_directory
 54 |         submit_kwargs = {
 55 |             "command": " ".join(command),
 56 |             "dependency_list": [str(qid) for qid in task_dependent_lst],
 57 |             "working_directory": os.path.abspath(cwd),
 58 |         }
 59 |         if "cwd" in resource_dict:
 60 |             del resource_dict["cwd"]
 61 |         if "threads_per_core" in resource_dict:
 62 |             resource_dict["cores"] *= resource_dict["threads_per_core"]
 63 |             del resource_dict["threads_per_core"]
 64 |         unsupported_keys = [
 65 |             "gpus_per_core",
 66 |             "openmpi_oversubscribe",
 67 |             "slurm_cmd_args",
 68 |         ]
 69 |         for k in unsupported_keys:
 70 |             if k in resource_dict:
 71 |                 del resource_dict[k]
 72 |         if "job_name" not in resource_dict:
 73 |             resource_dict["job_name"] = os.path.basename(
 74 |                 os.path.dirname(os.path.abspath(cwd))
 75 |             )
 76 |         submit_kwargs.update(resource_dict)
 77 |         queue_id = qa.submit_job(**submit_kwargs)
 78 |         dump(file_name=file_name, data_dict={"queue_id": queue_id})
 79 |     return queue_id
 80 | 
 81 | 
 82 | def _pysqa_execute_command(
 83 |     commands: str,
 84 |     working_directory: Optional[str] = None,
 85 |     split_output: bool = True,
 86 |     shell: bool = False,
 87 |     error_filename: str = "pysqa.err",
 88 | ) -> Union[str, list[str]]:
 89 |     """
 90 |     A wrapper around the subprocess.check_output function. Modified from pysqa to raise an exception if the subprocess
 91 |     fails to submit the job to the queue.
 92 | 
 93 |     Args:
 94 |         commands (str): The command(s) to be executed on the command line
 95 |         working_directory (str, optional): The directory where the command is executed. Defaults to None.
 96 |         split_output (bool, optional): Boolean flag to split newlines in the output. Defaults to True.
 97 |         shell (bool, optional): Additional switch to convert commands to a single string. Defaults to False.
 98 |         error_filename (str, optional): In case the execution fails, the output is written to this file. Defaults to "pysqa.err".
 99 | 
100 |     Returns:
101 |         Union[str, List[str]]: Output of the shell command either as a string or as a list of strings
102 |     """
103 |     if shell and isinstance(commands, list):
104 |         commands = " ".join(commands)
105 |     out = subprocess.check_output(
106 |         commands,
107 |         cwd=working_directory,
108 |         stderr=subprocess.STDOUT,
109 |         universal_newlines=True,
110 |         shell=not isinstance(commands, list),
111 |     )
112 |     if out is not None and split_output:
113 |         return out.split("\n")
114 |     else:
115 |         return out
116 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/subprocess_spawner.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import time
 3 | from typing import Optional
 4 | 
 5 | from executorlib.standalone.inputcheck import check_file_exists
 6 | 
 7 | 
 8 | def execute_in_subprocess(
 9 |     command: list,
10 |     task_dependent_lst: Optional[list] = None,
11 |     file_name: Optional[str] = None,
12 |     resource_dict: Optional[dict] = None,
13 |     config_directory: Optional[str] = None,
14 |     backend: Optional[str] = None,
15 |     cache_directory: Optional[str] = None,
16 | ) -> subprocess.Popen:
17 |     """
18 |     Execute a command in a subprocess.
19 | 
20 |     Args:
21 |         command (list): The command to be executed.
22 |         task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to [].
23 |         file_name (str): Name of the HDF5 file which contains the Python function
24 |         resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function.
25 |                               Example resource dictionary: {
26 |                                   cwd: None,
27 |                               }
28 |         config_directory (str, optional): path to the config directory.
29 |         backend (str, optional): name of the backend used to spawn tasks.
30 |         cache_directory (str): The directory to store the HDF5 files.
31 | 
32 |     Returns:
33 |         subprocess.Popen: The subprocess object.
34 | 
35 |     """
36 |     if task_dependent_lst is None:
37 |         task_dependent_lst = []
38 |     check_file_exists(file_name=file_name)
39 |     while len(task_dependent_lst) > 0:
40 |         task_dependent_lst = [
41 |             task for task in task_dependent_lst if task.poll() is None
42 |         ]
43 |     if config_directory is not None:
44 |         raise ValueError(
45 |             "config_directory parameter is not supported for subprocess spawner."
46 |         )
47 |     if backend is not None:
48 |         raise ValueError("backend parameter is not supported for subprocess spawner.")
49 |     if resource_dict is None:
50 |         resource_dict = {}
51 |     cwd = resource_dict.get("cwd", cache_directory)
52 |     return subprocess.Popen(command, universal_newlines=True, cwd=cwd)
53 | 
54 | 
55 | def terminate_subprocess(task):
56 |     """
57 |     Terminate a subprocess and wait for it to complete.
58 | 
59 |     Args:
60 |         task (subprocess.Popen): The subprocess.Popen instance to terminate
61 |     """
62 |     task.terminate()
63 |     while task.poll() is None:
64 |         time.sleep(0.1)
65 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/file/task_scheduler.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from threading import Thread
  3 | from typing import Callable, Optional
  4 | 
  5 | from executorlib.standalone.inputcheck import (
  6 |     check_executor,
  7 |     check_flux_executor_pmi_mode,
  8 |     check_flux_log_files,
  9 |     check_hostname_localhost,
 10 |     check_max_workers_and_cores,
 11 |     check_nested_flux_executor,
 12 | )
 13 | from executorlib.task_scheduler.base import TaskSchedulerBase
 14 | from executorlib.task_scheduler.file.shared import execute_tasks_h5
 15 | from executorlib.task_scheduler.file.subprocess_spawner import (
 16 |     execute_in_subprocess,
 17 |     terminate_subprocess,
 18 | )
 19 | 
 20 | try:
 21 |     from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa
 22 | except ImportError:
 23 |     # If pysqa is not available fall back to executing tasks in a subprocess
 24 |     execute_with_pysqa = execute_in_subprocess  # type: ignore
 25 | 
 26 | 
 27 | class FileTaskScheduler(TaskSchedulerBase):
 28 |     def __init__(
 29 |         self,
 30 |         cache_directory: str = "cache",
 31 |         resource_dict: Optional[dict] = None,
 32 |         execute_function: Callable = execute_with_pysqa,
 33 |         terminate_function: Optional[Callable] = None,
 34 |         pysqa_config_directory: Optional[str] = None,
 35 |         backend: Optional[str] = None,
 36 |         disable_dependencies: bool = False,
 37 |     ):
 38 |         """
 39 |         Initialize the FileExecutor.
 40 | 
 41 |         Args:
 42 |             cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
 43 |             resource_dict (dict): A dictionary of resources required by the task. With the following keys:
 44 |                               - cores (int): number of MPI cores to be used for each function call
 45 |                               - cwd (str/None): current working directory where the parallel python task is executed
 46 |             execute_function (Callable, optional): The function to execute tasks. Defaults to execute_in_subprocess.
 47 |             terminate_function (Callable, optional): The function to terminate the tasks.
 48 |             pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
 49 |             backend (str, optional): name of the backend used to spawn tasks.
 50 |             disable_dependencies (boolean): Disable resolving future objects during the submission.
 51 |         """
 52 |         super().__init__(max_cores=None)
 53 |         default_resource_dict = {
 54 |             "cores": 1,
 55 |             "cwd": None,
 56 |         }
 57 |         if resource_dict is None:
 58 |             resource_dict = {}
 59 |         resource_dict.update(
 60 |             {k: v for k, v in default_resource_dict.items() if k not in resource_dict}
 61 |         )
 62 |         if execute_function == execute_in_subprocess and terminate_function is None:
 63 |             terminate_function = terminate_subprocess
 64 |         cache_directory_path = os.path.abspath(cache_directory)
 65 |         os.makedirs(cache_directory_path, exist_ok=True)
 66 |         self._process_kwargs = {
 67 |             "future_queue": self._future_queue,
 68 |             "execute_function": execute_function,
 69 |             "cache_directory": cache_directory_path,
 70 |             "resource_dict": resource_dict,
 71 |             "terminate_function": terminate_function,
 72 |             "pysqa_config_directory": pysqa_config_directory,
 73 |             "backend": backend,
 74 |             "disable_dependencies": disable_dependencies,
 75 |         }
 76 |         self._set_process(
 77 |             Thread(
 78 |                 target=execute_tasks_h5,
 79 |                 kwargs=self._process_kwargs,
 80 |             )
 81 |         )
 82 | 
 83 | 
 84 | def create_file_executor(
 85 |     max_workers: Optional[int] = None,
 86 |     backend: str = "flux_submission",
 87 |     max_cores: Optional[int] = None,
 88 |     cache_directory: Optional[str] = None,
 89 |     resource_dict: Optional[dict] = None,
 90 |     flux_executor=None,
 91 |     flux_executor_pmi_mode: Optional[str] = None,
 92 |     flux_executor_nesting: bool = False,
 93 |     flux_log_files: bool = False,
 94 |     pysqa_config_directory: Optional[str] = None,
 95 |     hostname_localhost: Optional[bool] = None,
 96 |     block_allocation: bool = False,
 97 |     init_function: Optional[Callable] = None,
 98 |     disable_dependencies: bool = False,
 99 | ):
100 |     if cache_directory is None:
101 |         cache_directory = "executorlib_cache"
102 |     if block_allocation:
103 |         raise ValueError(
104 |             "The option block_allocation is not available with the pysqa based backend."
105 |         )
106 |     if init_function is not None:
107 |         raise ValueError(
108 |             "The option to specify an init_function is not available with the pysqa based backend."
109 |         )
110 |     check_flux_executor_pmi_mode(flux_executor_pmi_mode=flux_executor_pmi_mode)
111 |     check_max_workers_and_cores(max_cores=max_cores, max_workers=max_workers)
112 |     check_hostname_localhost(hostname_localhost=hostname_localhost)
113 |     check_executor(executor=flux_executor)
114 |     check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
115 |     check_flux_log_files(flux_log_files=flux_log_files)
116 |     return FileTaskScheduler(
117 |         cache_directory=cache_directory,
118 |         resource_dict=resource_dict,
119 |         pysqa_config_directory=pysqa_config_directory,
120 |         backend=backend.split("_submission")[0],
121 |         disable_dependencies=disable_dependencies,
122 |     )
123 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/interactive/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/interactive/__init__.py


--------------------------------------------------------------------------------
/executorlib/task_scheduler/interactive/fluxspawner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Optional
  3 | 
  4 | import flux
  5 | import flux.job
  6 | 
  7 | from executorlib.standalone.interactive.spawner import BaseSpawner
  8 | 
  9 | 
 10 | def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
 11 |     handle = flux.Flux()
 12 |     cores_total = flux.resource.list.resource_list(handle).get().up.ncores
 13 |     cores_requested = max_workers * cores * threads_per_core
 14 |     if cores_total < cores_requested:
 15 |         raise ValueError(
 16 |             "The number of requested cores is larger than the available cores "
 17 |             + str(cores_total)
 18 |             + " < "
 19 |             + str(cores_requested)
 20 |         )
 21 | 
 22 | 
 23 | class FluxPythonSpawner(BaseSpawner):
 24 |     """
 25 |     A class representing the FluxPythonInterface.
 26 | 
 27 |     Args:
 28 |         cwd (str, optional): The current working directory. Defaults to None.
 29 |         cores (int, optional): The number of cores. Defaults to 1.
 30 |         threads_per_core (int, optional): The number of threads per base. Defaults to 1.
 31 |         gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0.
 32 |         num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
 33 |         exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to
 34 |                           False.
 35 |         openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False.
 36 |         priority (int, optional): job urgency 0 (lowest) through 31 (highest) (default is 16). Priorities 0 through 15
 37 |                                   are restricted to the instance owner.
 38 |         flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
 39 |         flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
 40 |         flux_executor_nesting (bool, optional): Whether to use nested FluxExecutor. Defaults to False.
 41 |         flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
 42 |     """
 43 | 
 44 |     def __init__(
 45 |         self,
 46 |         cwd: Optional[str] = None,
 47 |         cores: int = 1,
 48 |         threads_per_core: int = 1,
 49 |         gpus_per_core: int = 0,
 50 |         num_nodes: Optional[int] = None,
 51 |         exclusive: bool = False,
 52 |         priority: Optional[int] = None,
 53 |         openmpi_oversubscribe: bool = False,
 54 |         flux_executor: Optional[flux.job.FluxExecutor] = None,
 55 |         flux_executor_pmi_mode: Optional[str] = None,
 56 |         flux_executor_nesting: bool = False,
 57 |         flux_log_files: bool = False,
 58 |     ):
 59 |         super().__init__(
 60 |             cwd=cwd,
 61 |             cores=cores,
 62 |             openmpi_oversubscribe=openmpi_oversubscribe,
 63 |         )
 64 |         self._threads_per_core = threads_per_core
 65 |         self._gpus_per_core = gpus_per_core
 66 |         self._num_nodes = num_nodes
 67 |         self._exclusive = exclusive
 68 |         self._flux_executor = flux_executor
 69 |         self._flux_executor_pmi_mode = flux_executor_pmi_mode
 70 |         self._flux_executor_nesting = flux_executor_nesting
 71 |         self._flux_log_files = flux_log_files
 72 |         self._priority = priority
 73 |         self._future = None
 74 | 
 75 |     def bootup(
 76 |         self,
 77 |         command_lst: list[str],
 78 |     ):
 79 |         """
 80 |         Boot up the client process to connect to the SocketInterface.
 81 | 
 82 |         Args:
 83 |             command_lst (list[str]): List of strings to start the client process.
 84 |         Raises:
 85 |             ValueError: If oversubscribing is not supported for the Flux adapter or if conda environments are not supported.
 86 |         """
 87 |         if self._openmpi_oversubscribe:
 88 |             raise ValueError(
 89 |                 "Oversubscribing is currently not supported for the Flux adapter."
 90 |             )
 91 |         if self._flux_executor is None:
 92 |             self._flux_executor = flux.job.FluxExecutor()
 93 |         if not self._flux_executor_nesting:
 94 |             jobspec = flux.job.JobspecV1.from_command(
 95 |                 command=command_lst,
 96 |                 num_tasks=self._cores,
 97 |                 cores_per_task=self._threads_per_core,
 98 |                 gpus_per_task=self._gpus_per_core,
 99 |                 num_nodes=self._num_nodes,
100 |                 exclusive=self._exclusive,
101 |             )
102 |         else:
103 |             jobspec = flux.job.JobspecV1.from_nest_command(
104 |                 command=command_lst,
105 |                 num_slots=self._cores,
106 |                 cores_per_slot=self._threads_per_core,
107 |                 gpus_per_slot=self._gpus_per_core,
108 |                 num_nodes=self._num_nodes,
109 |                 exclusive=self._exclusive,
110 |             )
111 |         jobspec.environment = dict(os.environ)
112 |         if self._flux_executor_pmi_mode is not None:
113 |             jobspec.setattr_shell_option("pmi", self._flux_executor_pmi_mode)
114 |         if self._cwd is not None:
115 |             jobspec.cwd = self._cwd
116 |         if self._flux_log_files and self._cwd is not None:
117 |             jobspec.stderr = os.path.join(self._cwd, "flux.err")
118 |             jobspec.stdout = os.path.join(self._cwd, "flux.out")
119 |         elif self._flux_log_files:
120 |             jobspec.stderr = os.path.abspath("flux.err")
121 |             jobspec.stdout = os.path.abspath("flux.out")
122 |         if self._priority is not None:
123 |             self._future = self._flux_executor.submit(
124 |                 jobspec=jobspec, urgency=self._priority
125 |             )
126 |         else:
127 |             self._future = self._flux_executor.submit(jobspec=jobspec)
128 | 
129 |     def shutdown(self, wait: bool = True):
130 |         """
131 |         Shutdown the FluxPythonInterface.
132 | 
133 |         Args:
134 |             wait (bool, optional): Whether to wait for the execution to complete. Defaults to True.
135 |         """
136 |         if self._future is not None:
137 |             if self.poll():
138 |                 self._future.cancel()
139 |             # The flux future objects are not instantly updated,
140 |             # still showing running after cancel was called,
141 |             # so we wait until the execution is completed.
142 |             self._future.result()
143 | 
144 |     def poll(self):
145 |         """
146 |         Check if the FluxPythonInterface is running.
147 | 
148 |         Returns:
149 |             bool: True if the interface is running, False otherwise.
150 |         """
151 |         return self._future is not None and not self._future.done()
152 | 


--------------------------------------------------------------------------------
/executorlib/task_scheduler/interactive/slurmspawner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Optional
  3 | 
  4 | from executorlib.standalone.interactive.spawner import SubprocessSpawner
  5 | 
  6 | SLURM_COMMAND = "srun"
  7 | 
  8 | 
  9 | def validate_max_workers(max_workers: int, cores: int, threads_per_core: int):
 10 |     cores_total = int(os.environ["SLURM_NTASKS"]) * int(
 11 |         os.environ["SLURM_CPUS_PER_TASK"]
 12 |     )
 13 |     cores_requested = max_workers * cores * threads_per_core
 14 |     if cores_total < cores_requested:
 15 |         raise ValueError(
 16 |             "The number of requested cores is larger than the available cores "
 17 |             + str(cores_total)
 18 |             + " < "
 19 |             + str(cores_requested)
 20 |         )
 21 | 
 22 | 
 23 | class SrunSpawner(SubprocessSpawner):
 24 |     def __init__(
 25 |         self,
 26 |         cwd: Optional[str] = None,
 27 |         cores: int = 1,
 28 |         threads_per_core: int = 1,
 29 |         gpus_per_core: int = 0,
 30 |         num_nodes: Optional[int] = None,
 31 |         exclusive: bool = False,
 32 |         openmpi_oversubscribe: bool = False,
 33 |         slurm_cmd_args: Optional[list[str]] = None,
 34 |     ):
 35 |         """
 36 |         Srun interface implementation.
 37 | 
 38 |         Args:
 39 |             cwd (str, optional): The current working directory. Defaults to None.
 40 |             cores (int, optional): The number of cores to use. Defaults to 1.
 41 |             threads_per_core (int, optional): The number of threads per core. Defaults to 1.
 42 |             gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
 43 |             num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
 44 |             exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
 45 |             openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
 46 |             slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
 47 |         """
 48 |         super().__init__(
 49 |             cwd=cwd,
 50 |             cores=cores,
 51 |             openmpi_oversubscribe=openmpi_oversubscribe,
 52 |             threads_per_core=threads_per_core,
 53 |         )
 54 |         self._gpus_per_core = gpus_per_core
 55 |         self._slurm_cmd_args = slurm_cmd_args
 56 |         self._num_nodes = num_nodes
 57 |         self._exclusive = exclusive
 58 | 
 59 |     def generate_command(self, command_lst: list[str]) -> list[str]:
 60 |         """
 61 |         Generate the command list for the Srun interface.
 62 | 
 63 |         Args:
 64 |             command_lst (list[str]): The command list.
 65 | 
 66 |         Returns:
 67 |             list[str]: The generated command list.
 68 |         """
 69 |         command_prepend_lst = generate_slurm_command(
 70 |             cores=self._cores,
 71 |             cwd=self._cwd,
 72 |             threads_per_core=self._threads_per_core,
 73 |             gpus_per_core=self._gpus_per_core,
 74 |             num_nodes=self._num_nodes,
 75 |             exclusive=self._exclusive,
 76 |             openmpi_oversubscribe=self._openmpi_oversubscribe,
 77 |             slurm_cmd_args=self._slurm_cmd_args,
 78 |         )
 79 |         return super().generate_command(
 80 |             command_lst=command_prepend_lst + command_lst,
 81 |         )
 82 | 
 83 | 
 84 | def generate_slurm_command(
 85 |     cores: int,
 86 |     cwd: Optional[str],
 87 |     threads_per_core: int = 1,
 88 |     gpus_per_core: int = 0,
 89 |     num_nodes: Optional[int] = None,
 90 |     exclusive: bool = False,
 91 |     openmpi_oversubscribe: bool = False,
 92 |     slurm_cmd_args: Optional[list[str]] = None,
 93 | ) -> list[str]:
 94 |     """
 95 |     Generate the command list for the SLURM interface.
 96 | 
 97 |     Args:
 98 |         cores (int): The number of cores.
 99 |         cwd (str): The current working directory.
100 |         threads_per_core (int, optional): The number of threads per core. Defaults to 1.
101 |         gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0.
102 |         num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None.
103 |         exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False.
104 |         openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False.
105 |         slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to [].
106 | 
107 |     Returns:
108 |         list[str]: The generated command list.
109 |     """
110 |     command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)]
111 |     if cwd is not None:
112 |         command_prepend_lst += ["-D", cwd]
113 |     if num_nodes is not None:
114 |         command_prepend_lst += ["-N", str(num_nodes)]
115 |     if threads_per_core > 1:
116 |         command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)]
117 |     if gpus_per_core > 0:
118 |         command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)]
119 |     if exclusive:
120 |         command_prepend_lst += ["--exact"]
121 |     if openmpi_oversubscribe:
122 |         command_prepend_lst += ["--oversubscribe"]
123 |     if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
124 |         command_prepend_lst += slurm_cmd_args
125 |     return command_prepend_lst
126 | 


--------------------------------------------------------------------------------
/notebooks/images/uml_executor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/notebooks/images/uml_executor.png


--------------------------------------------------------------------------------
/notebooks/images/uml_spawner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/notebooks/images/uml_spawner.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = [
  3 |     "hatchling==1.27.0",
  4 |     "hatch-vcs==0.5.0",
  5 |     "cloudpickle==3.1.1",
  6 |     "pyzmq==26.4.0",
  7 | ]
  8 | build-backend = "hatchling.build"
  9 | 
 10 | [project]
 11 | name = "executorlib"
 12 | description = "Up-scale python functions for high performance computing (HPC) with executorlib."
 13 | authors = [
 14 |     { name = "Jan Janssen", email = "janssen@lanl.gov" },
 15 | ]
 16 | readme = "README.md"
 17 | license = { file = "LICENSE" }
 18 | keywords = ["high performance computing", "hpc", "task scheduler", "slurm", "flux-framework", "executor"]
 19 | requires-python = ">=3.9, <3.14"
 20 | classifiers = [
 21 |     "Development Status :: 5 - Production/Stable",
 22 |     "Topic :: Scientific/Engineering :: Physics",
 23 |     "License :: OSI Approved :: BSD License",
 24 |     "Intended Audience :: Science/Research",
 25 |     "Operating System :: OS Independent",
 26 |     "Programming Language :: Python :: 3.9",
 27 |     "Programming Language :: Python :: 3.10",
 28 |     "Programming Language :: Python :: 3.11",
 29 |     "Programming Language :: Python :: 3.12",
 30 |     "Programming Language :: Python :: 3.13",
 31 | ]
 32 | dependencies = [
 33 |     "cloudpickle==3.1.1",
 34 |     "pyzmq==26.4.0",
 35 | ]
 36 | dynamic = ["version"]
 37 | 
 38 | [project.urls]
 39 | Homepage = "https://github.com/pyiron/executorlib"
 40 | Documentation = "https://executorlib.readthedocs.io"
 41 | Repository = "https://github.com/pyiron/executorlib"
 42 | 
 43 | [project.optional-dependencies]
 44 | cache = ["h5py==3.13.0"]
 45 | graph = [
 46 |     "pygraphviz==1.14",
 47 |     "networkx==3.4.2",
 48 | ]
 49 | graphnotebook = [
 50 |     "pygraphviz==1.14",
 51 |     "networkx==3.4.2",
 52 |     "ipython==9.0.2",
 53 | ]
 54 | mpi = ["mpi4py==4.0.1"]
 55 | cluster = [
 56 |     "pysqa==0.2.6",
 57 |     "h5py==3.13.0",
 58 | ]
 59 | all = [
 60 |     "mpi4py==4.0.1",
 61 |     "pysqa==0.2.6",
 62 |     "h5py==3.13.0",
 63 |     "pygraphviz==1.14",
 64 |     "networkx==3.4.2",
 65 |     "ipython==9.0.2",
 66 | ]
 67 | 
 68 | [tool.ruff]
 69 | exclude = [".ci_support", "docs", "notebooks", "tests", "setup.py", "_version.py"]
 70 | 
 71 | [tool.ruff.lint]
 72 | select = [
 73 |     # pycodestyle
 74 |     "E",
 75 |     # Pyflakes
 76 |     "F",
 77 |     # pyupgrade
 78 |     "UP",
 79 |     # flake8-bugbear
 80 |     "B",
 81 |     # flake8-simplify
 82 |     "SIM",
 83 |     # isort
 84 |     "I",
 85 |     # flake8-comprehensions
 86 |     "C4",
 87 |     # eradicate
 88 |     "ERA",
 89 |     # pylint
 90 |     "PL",
 91 | ]
 92 | ignore = [
 93 |     # ignore line-length violations
 94 |     "E501",
 95 |     # Too many arguments in function definition
 96 |     "PLR0913",
 97 |     # Magic value used in comparison
 98 |     "PLR2004",
 99 |     # Too many branches
100 |     "PLR0912",
101 | ]
102 | 
103 | [tool.hatch.build]
104 | include = [
105 |     "executorlib"
106 | ]
107 | 
108 | [tool.hatch.build.hooks.vcs]
109 | version-file = "executorlib/_version.py"
110 | 
111 | [tool.hatch.build.targets.sdist]
112 | include = [
113 |     "executorlib"
114 | ]
115 | 
116 | [tool.hatch.build.targets.wheel]
117 | packages = [
118 |     "executorlib"
119 | ]
120 | 
121 | [tool.hatch.version]
122 | source = "vcs"
123 | path = "executorlib/_version.py"


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/tests/__init__.py


--------------------------------------------------------------------------------
/tests/benchmark/llh.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from time import time
 3 | 
 4 | 
 5 | def llh_numpy(mean, sigma):
 6 |     import numpy
 7 | 
 8 |     data = numpy.random.normal(size=100000000).astype("float64")
 9 |     s = (data - mean) ** 2 / (2 * (sigma**2))
10 |     pdfs = numpy.exp(-s)
11 |     pdfs /= numpy.sqrt(2 * numpy.pi) * sigma
12 |     return numpy.log(pdfs).sum()
13 | 
14 | 
15 | def run_with_executor(executor=None, mean=0.1, sigma=1.1, runs=32, **kwargs):
16 |     with executor(**kwargs) as exe:
17 |         future_lst = [
18 |             exe.submit(llh_numpy, mean=mean, sigma=sigma) for i in range(runs)
19 |         ]
20 |         return [f.result() for f in future_lst]
21 | 
22 | 
23 | def run_static(mean=0.1, sigma=1.1, runs=32):
24 |     return [llh_numpy(mean=mean, sigma=sigma) for i in range(runs)]
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     run_mode = sys.argv[1]
29 |     start_time = time()
30 |     if run_mode == "static":
31 |         run_static(mean=0.1, sigma=1.1, runs=32)
32 |     elif run_mode == "process":
33 |         from concurrent.futures import ProcessPoolExecutor
34 | 
35 |         run_with_executor(
36 |             executor=ProcessPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4
37 |         )
38 |     elif run_mode == "thread":
39 |         from concurrent.futures import ThreadPoolExecutor
40 | 
41 |         run_with_executor(
42 |             executor=ThreadPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4
43 |         )
44 |     elif run_mode == "block_allocation":
45 |         from executorlib import SingleNodeExecutor
46 | 
47 |         run_with_executor(
48 |             executor=SingleNodeExecutor,
49 |             mean=0.1,
50 |             sigma=1.1,
51 |             runs=32,
52 |             max_cores=4,
53 |             block_allocation=True,
54 |         )
55 |     elif run_mode == "executorlib":
56 |         from executorlib import SingleNodeExecutor
57 | 
58 |         run_with_executor(
59 |             executor=SingleNodeExecutor,
60 |             mean=0.1,
61 |             sigma=1.1,
62 |             runs=32,
63 |             max_cores=4,
64 |             block_allocation=False,
65 |         )
66 |     elif run_mode == "flux":
67 |         from executorlib import FluxJobExecutor
68 | 
69 |         run_with_executor(
70 |             executor=FluxJobExecutor,
71 |             mean=0.1,
72 |             sigma=1.1,
73 |             runs=32,
74 |             max_cores=4,
75 |             block_allocation=True,
76 |         )
77 |     elif run_mode == "mpi4py":
78 |         from mpi4py.futures import MPIPoolExecutor
79 | 
80 |         run_with_executor(
81 |             executor=MPIPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4
82 |         )
83 |     else:
84 |         raise ValueError(run_mode)
85 |     stop_time = time()
86 |     print(run_mode, stop_time - start_time)
87 | 


--------------------------------------------------------------------------------
/tests/benchmark/test_results.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestResults(unittest.TestCase):
 5 |     def test_result(self):
 6 |         with open("timing.log") as f:
 7 |             content = f.readlines()
 8 |         timing_dict = {l.split()[0]: float(l.split()[1]) for l in content}
 9 |         self.assertEqual(min(timing_dict, key=timing_dict.get), "process")
10 |         self.assertEqual(max(timing_dict, key=timing_dict.get), "static")
11 |         self.assertTrue(timing_dict["process"] < timing_dict["executorlib"])
12 |         self.assertTrue(timing_dict["block_allocation"] < timing_dict["process"] * 1.1)
13 |         self.assertTrue(timing_dict["executorlib"] < timing_dict["process"] * 1.35)
14 |         self.assertTrue(timing_dict["process"] < timing_dict["mpi4py"])
15 |         self.assertTrue(timing_dict["block_allocation"] < timing_dict["mpi4py"])
16 |         self.assertTrue(timing_dict["mpi4py"] < timing_dict["process"] * 1.15)
17 |         self.assertTrue(timing_dict["thread"] < timing_dict["static"])
18 |         self.assertTrue(timing_dict["mpi4py"] < timing_dict["thread"])
19 | 


--------------------------------------------------------------------------------
/tests/executables/count.py:
--------------------------------------------------------------------------------
 1 | def count(iterations):
 2 |     for i in range(int(iterations)):
 3 |         print(i)
 4 |     print("done")
 5 | 
 6 | 
 7 | if __name__ == "__main__":
 8 |     while True:
 9 |         user_input = input()
10 |         if "shutdown" in user_input:
11 |             break
12 |         else:
13 |             count(iterations=int(user_input))
14 | 


--------------------------------------------------------------------------------
/tests/test_backend_interactive_serial.py:
--------------------------------------------------------------------------------
 1 | from threading import Thread
 2 | import unittest
 3 | 
 4 | import cloudpickle
 5 | import zmq
 6 | 
 7 | from executorlib.backend.interactive_serial import main
 8 | 
 9 | 
10 | def calc(i, j):
11 |     return i + j
12 | 
13 | 
14 | def set_global():
15 |     return {"j": 5}
16 | 
17 | 
18 | def submit(socket):
19 |     socket.send(
20 |         cloudpickle.dumps({"init": True, "fn": set_global, "args": (), "kwargs": {}})
21 |     )
22 |     socket.send(cloudpickle.dumps({"fn": calc, "args": (), "kwargs": {"i": 2}}))
23 |     socket.send(cloudpickle.dumps({"shutdown": True, "wait": True}))
24 | 
25 | 
26 | def submit_error(socket):
27 |     socket.send(
28 |         cloudpickle.dumps({"init": True, "fn": set_global, "args": (), "kwargs": {}})
29 |     )
30 |     socket.send(cloudpickle.dumps({"fn": calc, "args": (), "kwargs": {}}))
31 |     socket.send(cloudpickle.dumps({"shutdown": True, "wait": True}))
32 | 
33 | 
34 | class TestSerial(unittest.TestCase):
35 |     def test_main_as_thread(self):
36 |         context = zmq.Context()
37 |         socket = context.socket(zmq.PAIR)
38 |         port = socket.bind_to_random_port("tcp://*")
39 |         t = Thread(target=main, kwargs={"argument_lst": ["--zmqport", str(port)]})
40 |         t.start()
41 |         submit(socket=socket)
42 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": 7})
43 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True})
44 |         socket.close()
45 |         context.term()
46 | 
47 |     def test_main_as_thread_error(self):
48 |         context = zmq.Context()
49 |         socket = context.socket(zmq.PAIR)
50 |         port = socket.bind_to_random_port("tcp://*")
51 |         t = Thread(target=main, kwargs={"argument_lst": ["--zmqport", str(port)]})
52 |         t.start()
53 |         submit_error(socket=socket)
54 |         self.assertEqual(
55 |             str(type(cloudpickle.loads(socket.recv())["error"])), "<class 'TypeError'>"
56 |         )
57 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True})
58 |         socket.close()
59 |         context.term()
60 | 
61 |     def test_submit_as_thread(self):
62 |         context = zmq.Context()
63 |         socket = context.socket(zmq.PAIR)
64 |         port = socket.bind_to_random_port("tcp://*")
65 |         t = Thread(target=submit, kwargs={"socket": socket})
66 |         t.start()
67 |         main(argument_lst=["--zmqport", str(port)])
68 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": 7})
69 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True})
70 |         socket.close()
71 |         context.term()
72 | 
73 |     def test_submit_as_thread_error(self):
74 |         context = zmq.Context()
75 |         socket = context.socket(zmq.PAIR)
76 |         port = socket.bind_to_random_port("tcp://*")
77 |         t = Thread(target=submit_error, kwargs={"socket": socket})
78 |         t.start()
79 |         main(argument_lst=["--zmqport", str(port)])
80 |         self.assertEqual(
81 |             str(type(cloudpickle.loads(socket.recv())["error"])), "<class 'TypeError'>"
82 |         )
83 |         self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True})
84 |         socket.close()
85 |         context.term()
86 | 


--------------------------------------------------------------------------------
/tests/test_base_executor_queue.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import Future, CancelledError
 2 | from queue import Queue
 3 | import unittest
 4 | 
 5 | from executorlib.standalone.queue import cancel_items_in_queue
 6 | 
 7 | 
 8 | class TestQueue(unittest.TestCase):
 9 |     def test_cancel_items_in_queue(self):
10 |         q = Queue()
11 |         fs1 = Future()
12 |         fs2 = Future()
13 |         q.put({"future": fs1})
14 |         q.put({"future": fs2})
15 |         cancel_items_in_queue(que=q)
16 |         self.assertEqual(q.qsize(), 0)
17 |         self.assertTrue(fs1.done())
18 |         with self.assertRaises(CancelledError):
19 |             self.assertTrue(fs1.result())
20 |         self.assertTrue(fs2.done())
21 |         with self.assertRaises(CancelledError):
22 |             self.assertTrue(fs2.result())
23 |         q.join()
24 | 


--------------------------------------------------------------------------------
/tests/test_cache_backend_execute.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import os
  3 | import shutil
  4 | import unittest
  5 | 
  6 | 
  7 | try:
  8 |     from executorlib.task_scheduler.file.backend import backend_execute_task_in_file
  9 |     from executorlib.task_scheduler.file.shared import _check_task_output, FutureItem
 10 |     from executorlib.task_scheduler.file.hdf import dump, get_runtime
 11 |     from executorlib.standalone.serialize import serialize_funct_h5
 12 | 
 13 |     skip_h5io_test = False
 14 | except ImportError:
 15 |     skip_h5io_test = True
 16 | 
 17 | 
 18 | def my_funct(a, b):
 19 |     return a + b
 20 | 
 21 | 
 22 | def get_error(a):
 23 |     raise ValueError(a)
 24 | 
 25 | 
 26 | @unittest.skipIf(
 27 |     skip_h5io_test, "h5io is not installed, so the h5io tests are skipped."
 28 | )
 29 | class TestSharedFunctions(unittest.TestCase):
 30 |     def test_execute_function_mixed(self):
 31 |         cache_directory = os.path.abspath("cache")
 32 |         os.makedirs(cache_directory, exist_ok=True)
 33 |         task_key, data_dict = serialize_funct_h5(
 34 |             fn=my_funct,
 35 |             fn_args=[1],
 36 |             fn_kwargs={"b": 2},
 37 |         )
 38 |         file_name = os.path.join(cache_directory, task_key, "cache.h5in")
 39 |         os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True)
 40 |         dump(file_name=file_name, data_dict=data_dict)
 41 |         backend_execute_task_in_file(file_name=file_name)
 42 |         future_obj = Future()
 43 |         _check_task_output(
 44 |             task_key=task_key, future_obj=future_obj, cache_directory=cache_directory
 45 |         )
 46 |         self.assertTrue(future_obj.done())
 47 |         self.assertEqual(future_obj.result(), 3)
 48 |         self.assertTrue(
 49 |             get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out"))
 50 |             > 0.0
 51 |         )
 52 |         future_file_obj = FutureItem(
 53 |             file_name=os.path.join(cache_directory, task_key, "cache.h5out")
 54 |         )
 55 |         self.assertTrue(future_file_obj.done())
 56 |         self.assertEqual(future_file_obj.result(), 3)
 57 | 
 58 |     def test_execute_function_args(self):
 59 |         cache_directory = os.path.abspath("cache")
 60 |         os.makedirs(cache_directory, exist_ok=True)
 61 |         task_key, data_dict = serialize_funct_h5(
 62 |             fn=my_funct,
 63 |             fn_args=[1, 2],
 64 |             fn_kwargs={},
 65 |         )
 66 |         file_name = os.path.join(cache_directory, task_key, "cache.h5in")
 67 |         os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True)
 68 |         dump(file_name=file_name, data_dict=data_dict)
 69 |         backend_execute_task_in_file(file_name=file_name)
 70 |         future_obj = Future()
 71 |         _check_task_output(
 72 |             task_key=task_key, future_obj=future_obj, cache_directory=cache_directory
 73 |         )
 74 |         self.assertTrue(future_obj.done())
 75 |         self.assertEqual(future_obj.result(), 3)
 76 |         self.assertTrue(
 77 |             get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out"))
 78 |             > 0.0
 79 |         )
 80 |         future_file_obj = FutureItem(
 81 |             file_name=os.path.join(cache_directory, task_key, "cache.h5out")
 82 |         )
 83 |         self.assertTrue(future_file_obj.done())
 84 |         self.assertEqual(future_file_obj.result(), 3)
 85 | 
 86 |     def test_execute_function_kwargs(self):
 87 |         cache_directory = os.path.abspath("cache")
 88 |         os.makedirs(cache_directory, exist_ok=True)
 89 |         task_key, data_dict = serialize_funct_h5(
 90 |             fn=my_funct,
 91 |             fn_args=[],
 92 |             fn_kwargs={"a": 1, "b": 2},
 93 |         )
 94 |         file_name = os.path.join(cache_directory, task_key, "cache.h5in")
 95 |         os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True)
 96 |         dump(file_name=file_name, data_dict=data_dict)
 97 |         backend_execute_task_in_file(file_name=file_name)
 98 |         future_obj = Future()
 99 |         _check_task_output(
100 |             task_key=task_key, future_obj=future_obj, cache_directory=cache_directory
101 |         )
102 |         self.assertTrue(future_obj.done())
103 |         self.assertEqual(future_obj.result(), 3)
104 |         self.assertTrue(
105 |             get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out"))
106 |             > 0.0
107 |         )
108 |         future_file_obj = FutureItem(
109 |             file_name=os.path.join(cache_directory, task_key, "cache.h5out")
110 |         )
111 |         self.assertTrue(future_file_obj.done())
112 |         self.assertEqual(future_file_obj.result(), 3)
113 | 
114 |     def test_execute_function_error(self):
115 |         cache_directory = os.path.abspath("cache")
116 |         os.makedirs(cache_directory, exist_ok=True)
117 |         task_key, data_dict = serialize_funct_h5(
118 |             fn=get_error,
119 |             fn_args=[],
120 |             fn_kwargs={"a": 1},
121 |         )
122 |         file_name = os.path.join(cache_directory, task_key, "cache.h5in")
123 |         os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True)
124 |         dump(file_name=file_name, data_dict=data_dict)
125 |         backend_execute_task_in_file(file_name=file_name)
126 |         future_obj = Future()
127 |         _check_task_output(
128 |             task_key=task_key, future_obj=future_obj, cache_directory=cache_directory
129 |         )
130 |         self.assertTrue(future_obj.done())
131 |         with self.assertRaises(ValueError):
132 |             future_obj.result()
133 |         self.assertTrue(
134 |             get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out"))
135 |             > 0.0
136 |         )
137 |         future_file_obj = FutureItem(
138 |             file_name=os.path.join(cache_directory, task_key, "cache.h5out")
139 |         )
140 |         self.assertTrue(future_file_obj.done())
141 |         with self.assertRaises(ValueError):
142 |             future_file_obj.result()
143 | 
144 |     def tearDown(self):
145 |         if os.path.exists("cache"):
146 |             shutil.rmtree("cache")
147 | 


--------------------------------------------------------------------------------
/tests/test_cache_fileexecutor_mpi.py:
--------------------------------------------------------------------------------
 1 | import importlib.util
 2 | import os
 3 | import shutil
 4 | import unittest
 5 | 
 6 | from executorlib.task_scheduler.file.subprocess_spawner import execute_in_subprocess
 7 | 
 8 | 
 9 | try:
10 |     from executorlib.task_scheduler.file.task_scheduler import FileTaskScheduler
11 | 
12 |     skip_h5py_test = False
13 | except ImportError:
14 |     skip_h5py_test = True
15 | 
16 | 
17 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
18 | 
19 | 
20 | def mpi_funct(i):
21 |     from mpi4py import MPI
22 | 
23 |     size = MPI.COMM_WORLD.Get_size()
24 |     rank = MPI.COMM_WORLD.Get_rank()
25 |     return i, size, rank
26 | 
27 | 
28 | @unittest.skipIf(
29 |     skip_h5py_test or skip_mpi4py_test,
30 |     "h5py or mpi4py are not installed, so the h5py and mpi4py tests are skipped.",
31 | )
32 | class TestCacheExecutorMPI(unittest.TestCase):
33 |     def test_executor(self):
34 |         with FileTaskScheduler(
35 |             resource_dict={"cores": 2}, execute_function=execute_in_subprocess
36 |         ) as exe:
37 |             fs1 = exe.submit(mpi_funct, 1)
38 |             self.assertFalse(fs1.done())
39 |             self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)])
40 |             self.assertTrue(fs1.done())
41 | 
42 |     def tearDown(self):
43 |         if os.path.exists("cache"):
44 |             shutil.rmtree("cache")
45 | 


--------------------------------------------------------------------------------
/tests/test_cache_fileexecutor_serial.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import os
  3 | from queue import Queue
  4 | import shutil
  5 | import unittest
  6 | from threading import Thread
  7 | 
  8 | from executorlib.task_scheduler.file.subprocess_spawner import (
  9 |     execute_in_subprocess,
 10 |     terminate_subprocess,
 11 | )
 12 | 
 13 | try:
 14 |     from executorlib.task_scheduler.file.task_scheduler import FileTaskScheduler, create_file_executor
 15 |     from executorlib.task_scheduler.file.shared import execute_tasks_h5
 16 | 
 17 |     skip_h5py_test = False
 18 | except ImportError:
 19 |     skip_h5py_test = True
 20 | 
 21 | 
 22 | def my_funct(a, b):
 23 |     return a + b
 24 | 
 25 | 
 26 | def list_files_in_working_directory():
 27 |     return os.listdir(os.getcwd())
 28 | 
 29 | 
 30 | def get_error(a):
 31 |     raise ValueError(a)
 32 | 
 33 | 
 34 | @unittest.skipIf(
 35 |     skip_h5py_test, "h5py is not installed, so the h5py tests are skipped."
 36 | )
 37 | class TestCacheExecutorSerial(unittest.TestCase):
 38 |     def test_executor_mixed(self):
 39 |         with FileTaskScheduler(execute_function=execute_in_subprocess) as exe:
 40 |             fs1 = exe.submit(my_funct, 1, b=2)
 41 |             self.assertFalse(fs1.done())
 42 |             self.assertEqual(fs1.result(), 3)
 43 |             self.assertTrue(fs1.done())
 44 | 
 45 |     def test_executor_dependence_mixed(self):
 46 |         with FileTaskScheduler(execute_function=execute_in_subprocess) as exe:
 47 |             fs1 = exe.submit(my_funct, 1, b=2)
 48 |             fs2 = exe.submit(my_funct, 1, b=fs1)
 49 |             self.assertFalse(fs2.done())
 50 |             self.assertEqual(fs2.result(), 4)
 51 |             self.assertTrue(fs2.done())
 52 | 
 53 |     def test_create_file_executor_error(self):
 54 |         with self.assertRaises(ValueError):
 55 |             create_file_executor(block_allocation=True)
 56 |         with self.assertRaises(ValueError):
 57 |             create_file_executor(init_function=True)
 58 | 
 59 |     def test_executor_dependence_error(self):
 60 |         with self.assertRaises(ValueError):
 61 |             with FileTaskScheduler(
 62 |                 execute_function=execute_in_subprocess, disable_dependencies=True
 63 |             ) as exe:
 64 |                 fs = exe.submit(my_funct, 1, b=exe.submit(my_funct, 1, b=2))
 65 |                 fs.result()
 66 | 
 67 |     def test_executor_working_directory(self):
 68 |         cwd = os.path.join(os.path.dirname(__file__), "executables")
 69 |         with FileTaskScheduler(
 70 |             resource_dict={"cwd": cwd}, execute_function=execute_in_subprocess
 71 |         ) as exe:
 72 |             fs1 = exe.submit(list_files_in_working_directory)
 73 |             self.assertEqual(fs1.result(), os.listdir(cwd))
 74 | 
 75 |     def test_executor_error(self):
 76 |         cwd = os.path.join(os.path.dirname(__file__), "executables")
 77 |         with FileTaskScheduler(
 78 |             resource_dict={"cwd": cwd}, execute_function=execute_in_subprocess
 79 |         ) as exe:
 80 |             fs1 = exe.submit(get_error, a=1)
 81 |             with self.assertRaises(ValueError):
 82 |                 fs1.result()
 83 | 
 84 |     def test_executor_function(self):
 85 |         fs1 = Future()
 86 |         q = Queue()
 87 |         q.put(
 88 |             {
 89 |                 "fn": my_funct,
 90 |                 "args": (),
 91 |                 "kwargs": {"a": 1, "b": 2},
 92 |                 "future": fs1,
 93 |                 "resource_dict": {},
 94 |             }
 95 |         )
 96 |         cache_dir = os.path.abspath("cache")
 97 |         os.makedirs(cache_dir, exist_ok=True)
 98 |         process = Thread(
 99 |             target=execute_tasks_h5,
100 |             kwargs={
101 |                 "future_queue": q,
102 |                 "cache_directory": cache_dir,
103 |                 "execute_function": execute_in_subprocess,
104 |                 "resource_dict": {"cores": 1, "cwd": None},
105 |                 "terminate_function": terminate_subprocess,
106 |             },
107 |         )
108 |         process.start()
109 |         self.assertFalse(fs1.done())
110 |         self.assertEqual(fs1.result(), 3)
111 |         self.assertTrue(fs1.done())
112 |         q.put({"shutdown": True, "wait": True})
113 |         process.join()
114 | 
115 |     def test_executor_function_dependence_kwargs(self):
116 |         fs1 = Future()
117 |         fs2 = Future()
118 |         q = Queue()
119 |         q.put(
120 |             {
121 |                 "fn": my_funct,
122 |                 "args": (),
123 |                 "kwargs": {"a": 1, "b": 2},
124 |                 "future": fs1,
125 |                 "resource_dict": {},
126 |             }
127 |         )
128 |         q.put(
129 |             {
130 |                 "fn": my_funct,
131 |                 "args": (),
132 |                 "kwargs": {"a": 1, "b": fs1},
133 |                 "future": fs2,
134 |                 "resource_dict": {},
135 |             }
136 |         )
137 |         cache_dir = os.path.abspath("cache")
138 |         os.makedirs(cache_dir, exist_ok=True)
139 |         process = Thread(
140 |             target=execute_tasks_h5,
141 |             kwargs={
142 |                 "future_queue": q,
143 |                 "cache_directory": cache_dir,
144 |                 "execute_function": execute_in_subprocess,
145 |                 "resource_dict": {"cores": 1, "cwd": None},
146 |                 "terminate_function": terminate_subprocess,
147 |             },
148 |         )
149 |         process.start()
150 |         self.assertFalse(fs2.done())
151 |         self.assertEqual(fs2.result(), 4)
152 |         self.assertTrue(fs2.done())
153 |         q.put({"shutdown": True, "wait": True})
154 |         process.join()
155 | 
156 |     def test_executor_function_dependence_args(self):
157 |         fs1 = Future()
158 |         fs2 = Future()
159 |         q = Queue()
160 |         q.put(
161 |             {
162 |                 "fn": my_funct,
163 |                 "args": (),
164 |                 "kwargs": {"a": 1, "b": 2},
165 |                 "future": fs1,
166 |                 "resource_dict": {},
167 |             }
168 |         )
169 |         q.put(
170 |             {
171 |                 "fn": my_funct,
172 |                 "args": [fs1],
173 |                 "kwargs": {"b": 2},
174 |                 "future": fs2,
175 |                 "resource_dict": {},
176 |             }
177 |         )
178 |         cache_dir = os.path.abspath("cache")
179 |         os.makedirs(cache_dir, exist_ok=True)
180 |         process = Thread(
181 |             target=execute_tasks_h5,
182 |             kwargs={
183 |                 "future_queue": q,
184 |                 "cache_directory": cache_dir,
185 |                 "execute_function": execute_in_subprocess,
186 |                 "resource_dict": {"cores": 1},
187 |                 "terminate_function": terminate_subprocess,
188 |             },
189 |         )
190 |         process.start()
191 |         self.assertFalse(fs2.done())
192 |         self.assertEqual(fs2.result(), 5)
193 |         self.assertTrue(fs2.done())
194 |         q.put({"shutdown": True, "wait": True})
195 |         process.join()
196 | 
197 |     def test_execute_in_subprocess_errors(self):
198 |         with self.assertRaises(ValueError):
199 |             execute_in_subprocess(
200 |                 file_name=__file__, command=[], config_directory="test"
201 |             )
202 |         with self.assertRaises(ValueError):
203 |             execute_in_subprocess(file_name=__file__, command=[], backend="flux")
204 | 
205 |     def tearDown(self):
206 |         if os.path.exists("cache"):
207 |             shutil.rmtree("cache")
208 | 


--------------------------------------------------------------------------------
/tests/test_fluxclusterexecutor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import importlib
 3 | import unittest
 4 | import shutil
 5 | 
 6 | from executorlib import FluxClusterExecutor
 7 | from executorlib.standalone.serialize import cloudpickle_register
 8 | 
 9 | try:
10 |     import flux.job
11 | 
12 |     skip_flux_test = "FLUX_URI" not in os.environ
13 |     pmi = os.environ.get("EXECUTORLIB_PMIX", None)
14 | except ImportError:
15 |     skip_flux_test = True
16 | 
17 | 
18 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
19 | 
20 | 
21 | def mpi_funct(i):
22 |     from mpi4py import MPI
23 | 
24 |     size = MPI.COMM_WORLD.Get_size()
25 |     rank = MPI.COMM_WORLD.Get_rank()
26 |     return i, size, rank
27 | 
28 | 
29 | @unittest.skipIf(
30 |     skip_flux_test or skip_mpi4py_test,
31 |     "h5py or mpi4py or flux are not installed, so the h5py, flux and mpi4py tests are skipped.",
32 | )
33 | class TestCacheExecutorPysqa(unittest.TestCase):
34 |     def test_executor(self):
35 |         with FluxClusterExecutor(
36 |             resource_dict={"cores": 2, "cwd": "cache"},
37 |             block_allocation=False,
38 |             cache_directory="cache",
39 |         ) as exe:
40 |             cloudpickle_register(ind=1)
41 |             fs1 = exe.submit(mpi_funct, 1)
42 |             self.assertFalse(fs1.done())
43 |             self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)])
44 |             self.assertTrue(fs1.done())
45 | 
46 |     def tearDown(self):
47 |         if os.path.exists("cache"):
48 |             shutil.rmtree("cache")
49 | 


--------------------------------------------------------------------------------
/tests/test_fluxjobexecutor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | 
  6 | from executorlib import FluxJobExecutor
  7 | 
  8 | 
  9 | try:
 10 |     import flux.job
 11 |     from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner
 12 | 
 13 |     skip_flux_test = "FLUX_URI" not in os.environ
 14 |     pmi = os.environ.get("EXECUTORLIB_PMIX", None)
 15 | except ImportError:
 16 |     skip_flux_test = True
 17 | 
 18 | 
 19 | def calc(i):
 20 |     return i
 21 | 
 22 | 
 23 | def mpi_funct(i):
 24 |     from mpi4py import MPI
 25 | 
 26 |     size = MPI.COMM_WORLD.Get_size()
 27 |     rank = MPI.COMM_WORLD.Get_rank()
 28 |     return i, size, rank
 29 | 
 30 | 
 31 | def get_global(memory=None):
 32 |     return memory
 33 | 
 34 | 
 35 | def set_global():
 36 |     return {"memory": np.array([5])}
 37 | 
 38 | 
 39 | @unittest.skipIf(
 40 |     skip_flux_test, "Flux is not installed, so the flux tests are skipped."
 41 | )
 42 | class TestFluxBackend(unittest.TestCase):
 43 |     def setUp(self):
 44 |         self.executor = flux.job.FluxExecutor()
 45 | 
 46 |     def test_flux_executor_serial(self):
 47 |         with FluxJobExecutor(
 48 |             max_cores=2,
 49 |             flux_executor=self.executor,
 50 |             block_allocation=True,
 51 |         ) as exe:
 52 |             fs_1 = exe.submit(calc, 1)
 53 |             fs_2 = exe.submit(calc, 2)
 54 |             self.assertEqual(fs_1.result(), 1)
 55 |             self.assertEqual(fs_2.result(), 2)
 56 |             self.assertTrue(fs_1.done())
 57 |             self.assertTrue(fs_2.done())
 58 | 
 59 |     def test_flux_executor_serial_no_depencies(self):
 60 |         with FluxJobExecutor(
 61 |             max_cores=2,
 62 |             flux_executor=self.executor,
 63 |             block_allocation=True,
 64 |             disable_dependencies=True,
 65 |         ) as exe:
 66 |             fs_1 = exe.submit(calc, 1)
 67 |             fs_2 = exe.submit(calc, 2)
 68 |             self.assertEqual(fs_1.result(), 1)
 69 |             self.assertEqual(fs_2.result(), 2)
 70 |             self.assertTrue(fs_1.done())
 71 |             self.assertTrue(fs_2.done())
 72 | 
 73 |     def test_flux_executor_threads(self):
 74 |         with FluxJobExecutor(
 75 |             max_cores=1,
 76 |             resource_dict={"threads_per_core": 2},
 77 |             flux_executor=self.executor,
 78 |             block_allocation=True,
 79 |         ) as exe:
 80 |             fs_1 = exe.submit(calc, 1)
 81 |             fs_2 = exe.submit(calc, 2)
 82 |             self.assertEqual(fs_1.result(), 1)
 83 |             self.assertEqual(fs_2.result(), 2)
 84 |             self.assertTrue(fs_1.done())
 85 |             self.assertTrue(fs_2.done())
 86 | 
 87 |     def test_flux_executor_parallel(self):
 88 |         with FluxJobExecutor(
 89 |             max_cores=2,
 90 |             resource_dict={"cores": 2},
 91 |             flux_executor=self.executor,
 92 |             block_allocation=True,
 93 |             flux_executor_pmi_mode=pmi,
 94 |         ) as exe:
 95 |             fs_1 = exe.submit(mpi_funct, 1)
 96 |             self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)])
 97 |             self.assertTrue(fs_1.done())
 98 | 
 99 |     def test_single_task(self):
100 |         with FluxJobExecutor(
101 |             max_cores=2,
102 |             resource_dict={"cores": 2},
103 |             flux_executor=self.executor,
104 |             block_allocation=True,
105 |             flux_executor_pmi_mode=pmi,
106 |         ) as p:
107 |             output = p.map(mpi_funct, [1, 2, 3])
108 |         self.assertEqual(
109 |             list(output),
110 |             [[(1, 2, 0), (1, 2, 1)], [(2, 2, 0), (2, 2, 1)], [(3, 2, 0), (3, 2, 1)]],
111 |         )
112 | 
113 |     def test_output_files_cwd(self):
114 |         dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
115 |         os.makedirs(dirname, exist_ok=True)
116 |         file_stdout = os.path.join(dirname, "flux.out")
117 |         file_stderr = os.path.join(dirname, "flux.err")
118 |         with FluxJobExecutor(
119 |             max_cores=1,
120 |             resource_dict={"cores": 1, "cwd": dirname},
121 |             flux_executor=self.executor,
122 |             block_allocation=True,
123 |             flux_log_files=True,
124 |         ) as p:
125 |             output = p.map(calc, [1, 2, 3])
126 |         self.assertEqual(
127 |             list(output),
128 |             [1, 2, 3],
129 |         )
130 |         self.assertTrue(os.path.exists(file_stdout))
131 |         self.assertTrue(os.path.exists(file_stderr))
132 |         os.remove(file_stdout)
133 |         os.remove(file_stderr)
134 | 
135 |     def test_output_files_abs(self):
136 |         file_stdout = os.path.abspath("flux.out")
137 |         file_stderr = os.path.abspath("flux.err")
138 |         with FluxJobExecutor(
139 |             max_cores=1,
140 |             resource_dict={"cores": 1},
141 |             flux_executor=self.executor,
142 |             block_allocation=True,
143 |             flux_log_files=True,
144 |         ) as p:
145 |             output = p.map(calc, [1, 2, 3])
146 |         self.assertEqual(
147 |             list(output),
148 |             [1, 2, 3],
149 |         )
150 |         self.assertTrue(os.path.exists(file_stdout))
151 |         self.assertTrue(os.path.exists(file_stderr))
152 |         os.remove(file_stdout)
153 |         os.remove(file_stderr)
154 | 
155 |     def test_internal_memory(self):
156 |         with FluxJobExecutor(
157 |             max_cores=1,
158 |             resource_dict={"cores": 1},
159 |             init_function=set_global,
160 |             flux_executor=self.executor,
161 |             block_allocation=True,
162 |         ) as p:
163 |             f = p.submit(get_global)
164 |             self.assertFalse(f.done())
165 |             self.assertEqual(f.result(), np.array([5]))
166 |             self.assertTrue(f.done())
167 | 
168 |     def test_validate_max_workers(self):
169 |         with self.assertRaises(ValueError):
170 |             FluxJobExecutor(
171 |                 max_workers=10,
172 |                 resource_dict={"cores": 10, "threads_per_core": 10},
173 |                 flux_executor=self.executor,
174 |                 block_allocation=True,
175 |             )
176 | 


--------------------------------------------------------------------------------
/tests/test_fluxjobexecutor_plot.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | from time import sleep
  4 | 
  5 | from executorlib import FluxJobExecutor, FluxClusterExecutor
  6 | from executorlib.standalone.plot import generate_nodes_and_edges
  7 | from executorlib.standalone.serialize import cloudpickle_register
  8 | 
  9 | 
 10 | try:
 11 |     import pygraphviz
 12 |     import flux.job
 13 |     from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner
 14 | 
 15 |     skip_graphviz_flux_test = "FLUX_URI" not in os.environ
 16 | except ImportError:
 17 |     skip_graphviz_flux_test = True
 18 | 
 19 | 
 20 | def add_function(parameter_1, parameter_2):
 21 |     sleep(0.2)
 22 |     return parameter_1 + parameter_2
 23 | 
 24 | 
 25 | def generate_tasks(length):
 26 |     sleep(0.2)
 27 |     return range(length)
 28 | 
 29 | 
 30 | def calc_from_lst(lst, ind, parameter):
 31 |     sleep(0.2)
 32 |     return lst[ind] + parameter
 33 | 
 34 | 
 35 | def merge(lst):
 36 |     sleep(0.2)
 37 |     return sum(lst)
 38 | 
 39 | 
 40 | @unittest.skipIf(
 41 |     skip_graphviz_flux_test,
 42 |     "Either graphviz or flux are not installed, so the plot_dependency_graph tests are skipped.",
 43 | )
 44 | class TestFluxAllocationExecutorWithDependencies(unittest.TestCase):
 45 |     def test_executor_dependency_plot(self):
 46 |         with FluxJobExecutor(
 47 |             max_cores=1,
 48 |             plot_dependency_graph=True,
 49 |             block_allocation=False,
 50 |         ) as exe:
 51 |             cloudpickle_register(ind=1)
 52 |             future_1 = exe.submit(add_function, 1, parameter_2=2)
 53 |             future_2 = exe.submit(add_function, 1, parameter_2=future_1)
 54 |             self.assertTrue(future_1.done())
 55 |             self.assertTrue(future_2.done())
 56 |             self.assertEqual(len(exe._task_scheduler._future_hash_dict), 2)
 57 |             self.assertEqual(len(exe._task_scheduler._task_hash_dict), 2)
 58 |             nodes, edges = generate_nodes_and_edges(
 59 |                 task_hash_dict=exe._task_scheduler._task_hash_dict,
 60 |                 future_hash_inverse_dict={
 61 |                     v: k for k, v in exe._task_scheduler._future_hash_dict.items()
 62 |                 },
 63 |             )
 64 |             self.assertEqual(len(nodes), 5)
 65 |             self.assertEqual(len(edges), 4)
 66 | 
 67 |     def test_many_to_one_plot(self):
 68 |         length = 5
 69 |         parameter = 1
 70 |         with FluxJobExecutor(
 71 |             max_cores=2,
 72 |             plot_dependency_graph=True,
 73 |             block_allocation=True,
 74 |         ) as exe:
 75 |             cloudpickle_register(ind=1)
 76 |             future_lst = exe.submit(
 77 |                 generate_tasks,
 78 |                 length=length,
 79 |                 resource_dict={"cores": 1},
 80 |             )
 81 |             lst = []
 82 |             for i in range(length):
 83 |                 lst.append(
 84 |                     exe.submit(
 85 |                         calc_from_lst,
 86 |                         lst=future_lst,
 87 |                         ind=i,
 88 |                         parameter=parameter,
 89 |                         resource_dict={"cores": 1},
 90 |                     )
 91 |                 )
 92 |             future_sum = exe.submit(
 93 |                 merge,
 94 |                 lst=lst,
 95 |                 resource_dict={"cores": 1},
 96 |             )
 97 |             self.assertTrue(future_lst.done())
 98 |             for l in lst:
 99 |                 self.assertTrue(l.done())
100 |             self.assertTrue(future_sum.done())
101 |             self.assertEqual(len(exe._task_scheduler._future_hash_dict), 7)
102 |             self.assertEqual(len(exe._task_scheduler._task_hash_dict), 7)
103 |             nodes, edges = generate_nodes_and_edges(
104 |                 task_hash_dict=exe._task_scheduler._task_hash_dict,
105 |                 future_hash_inverse_dict={
106 |                     v: k for k, v in exe._task_scheduler._future_hash_dict.items()
107 |                 },
108 |             )
109 |             self.assertEqual(len(nodes), 19)
110 |             self.assertEqual(len(edges), 22)
111 | 
112 | 
113 | @unittest.skipIf(
114 |     skip_graphviz_flux_test,
115 |     "Either graphviz or flux are not installed, so the plot_dependency_graph tests are skipped.",
116 | )
117 | class TestFluxSubmissionExecutorWithDependencies(unittest.TestCase):
118 |     def test_executor_dependency_plot(self):
119 |         with FluxClusterExecutor(
120 |             plot_dependency_graph=True,
121 |         ) as exe:
122 |             cloudpickle_register(ind=1)
123 |             future_1 = exe.submit(add_function, 1, parameter_2=2)
124 |             future_2 = exe.submit(add_function, 1, parameter_2=future_1)
125 |             self.assertTrue(future_1.done())
126 |             self.assertTrue(future_2.done())
127 |             self.assertEqual(len(exe._task_scheduler._future_hash_dict), 2)
128 |             self.assertEqual(len(exe._task_scheduler._task_hash_dict), 2)
129 |             nodes, edges = generate_nodes_and_edges(
130 |                 task_hash_dict=exe._task_scheduler._task_hash_dict,
131 |                 future_hash_inverse_dict={
132 |                     v: k for k, v in exe._task_scheduler._future_hash_dict.items()
133 |                 },
134 |             )
135 |             self.assertEqual(len(nodes), 5)
136 |             self.assertEqual(len(edges), 4)
137 | 
138 |     def test_many_to_one_plot(self):
139 |         length = 5
140 |         parameter = 1
141 |         with FluxClusterExecutor(
142 |             plot_dependency_graph=True,
143 |         ) as exe:
144 |             cloudpickle_register(ind=1)
145 |             future_lst = exe.submit(
146 |                 generate_tasks,
147 |                 length=length,
148 |                 resource_dict={"cores": 1},
149 |             )
150 |             lst = []
151 |             for i in range(length):
152 |                 lst.append(
153 |                     exe.submit(
154 |                         calc_from_lst,
155 |                         lst=future_lst,
156 |                         ind=i,
157 |                         parameter=parameter,
158 |                         resource_dict={"cores": 1},
159 |                     )
160 |                 )
161 |             future_sum = exe.submit(
162 |                 merge,
163 |                 lst=lst,
164 |                 resource_dict={"cores": 1},
165 |             )
166 |             self.assertTrue(future_lst.done())
167 |             for l in lst:
168 |                 self.assertTrue(l.done())
169 |             self.assertTrue(future_sum.done())
170 |             self.assertEqual(len(exe._task_scheduler._future_hash_dict), 7)
171 |             self.assertEqual(len(exe._task_scheduler._task_hash_dict), 7)
172 |             nodes, edges = generate_nodes_and_edges(
173 |                 task_hash_dict=exe._task_scheduler._task_hash_dict,
174 |                 future_hash_inverse_dict={
175 |                     v: k for k, v in exe._task_scheduler._future_hash_dict.items()
176 |                 },
177 |             )
178 |             self.assertEqual(len(nodes), 19)
179 |             self.assertEqual(len(edges), 22)
180 | 


--------------------------------------------------------------------------------
/tests/test_fluxpythonspawner.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import os
  3 | from queue import Queue
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | 
  8 | from executorlib.task_scheduler.interactive.shared import execute_tasks
  9 | from executorlib.task_scheduler.interactive.blockallocation import BlockAllocationTaskScheduler
 10 | from executorlib.standalone.serialize import cloudpickle_register
 11 | 
 12 | 
 13 | try:
 14 |     import flux.job
 15 |     from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner
 16 | 
 17 |     skip_flux_test = "FLUX_URI" not in os.environ
 18 |     pmi = os.environ.get("EXECUTORLIB_PMIX", None)
 19 | except ImportError:
 20 |     skip_flux_test = True
 21 | 
 22 | 
 23 | def calc(i):
 24 |     return i
 25 | 
 26 | 
 27 | def mpi_funct(i):
 28 |     from mpi4py import MPI
 29 | 
 30 |     size = MPI.COMM_WORLD.Get_size()
 31 |     rank = MPI.COMM_WORLD.Get_rank()
 32 |     return i, size, rank
 33 | 
 34 | 
 35 | def get_global(memory=None):
 36 |     return memory
 37 | 
 38 | 
 39 | def set_global():
 40 |     return {"memory": np.array([5])}
 41 | 
 42 | 
 43 | @unittest.skipIf(
 44 |     skip_flux_test, "Flux is not installed, so the flux tests are skipped."
 45 | )
 46 | class TestFlux(unittest.TestCase):
 47 |     def setUp(self):
 48 |         self.flux_executor = flux.job.FluxExecutor()
 49 | 
 50 |     def test_flux_executor_serial(self):
 51 |         with BlockAllocationTaskScheduler(
 52 |             max_workers=2,
 53 |             executor_kwargs={"flux_executor": self.flux_executor, "priority": 20},
 54 |             spawner=FluxPythonSpawner,
 55 |         ) as exe:
 56 |             fs_1 = exe.submit(calc, 1)
 57 |             fs_2 = exe.submit(calc, 2)
 58 |             self.assertEqual(fs_1.result(), 1)
 59 |             self.assertEqual(fs_2.result(), 2)
 60 |             self.assertTrue(fs_1.done())
 61 |             self.assertTrue(fs_2.done())
 62 | 
 63 |     def test_flux_executor_threads(self):
 64 |         with BlockAllocationTaskScheduler(
 65 |             max_workers=1,
 66 |             executor_kwargs={
 67 |                 "flux_executor": self.flux_executor,
 68 |                 "threads_per_core": 2,
 69 |             },
 70 |             spawner=FluxPythonSpawner,
 71 |         ) as exe:
 72 |             fs_1 = exe.submit(calc, 1)
 73 |             fs_2 = exe.submit(calc, 2)
 74 |             self.assertEqual(fs_1.result(), 1)
 75 |             self.assertEqual(fs_2.result(), 2)
 76 |             self.assertTrue(fs_1.done())
 77 |             self.assertTrue(fs_2.done())
 78 | 
 79 |     def test_flux_executor_parallel(self):
 80 |         with BlockAllocationTaskScheduler(
 81 |             max_workers=1,
 82 |             executor_kwargs={
 83 |                 "flux_executor": self.flux_executor,
 84 |                 "cores": 2,
 85 |                 "flux_executor_pmi_mode": pmi,
 86 |             },
 87 |             spawner=FluxPythonSpawner,
 88 |         ) as exe:
 89 |             fs_1 = exe.submit(mpi_funct, 1)
 90 |             self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)])
 91 |             self.assertTrue(fs_1.done())
 92 | 
 93 |     def test_single_task(self):
 94 |         with BlockAllocationTaskScheduler(
 95 |             max_workers=1,
 96 |             executor_kwargs={
 97 |                 "flux_executor": self.flux_executor,
 98 |                 "cores": 2,
 99 |                 "flux_executor_pmi_mode": pmi,
100 |             },
101 |             spawner=FluxPythonSpawner,
102 |         ) as p:
103 |             output = p.map(mpi_funct, [1, 2, 3])
104 |         self.assertEqual(
105 |             list(output),
106 |             [[(1, 2, 0), (1, 2, 1)], [(2, 2, 0), (2, 2, 1)], [(3, 2, 0), (3, 2, 1)]],
107 |         )
108 | 
109 |     def test_execute_task(self):
110 |         f = Future()
111 |         q = Queue()
112 |         q.put({"fn": calc, "args": (), "kwargs": {"i": 2}, "future": f})
113 |         q.put({"shutdown": True, "wait": True})
114 |         cloudpickle_register(ind=1)
115 |         execute_tasks(
116 |             future_queue=q,
117 |             cores=1,
118 |             flux_executor=self.flux_executor,
119 |             spawner=FluxPythonSpawner,
120 |         )
121 |         self.assertEqual(f.result(), 2)
122 |         q.join()
123 | 
124 |     def test_execute_task_threads(self):
125 |         f = Future()
126 |         q = Queue()
127 |         q.put({"fn": calc, "args": (), "kwargs": {"i": 2}, "future": f})
128 |         q.put({"shutdown": True, "wait": True})
129 |         cloudpickle_register(ind=1)
130 |         execute_tasks(
131 |             future_queue=q,
132 |             cores=1,
133 |             threads_per_core=1,
134 |             flux_executor=self.flux_executor,
135 |             spawner=FluxPythonSpawner,
136 |         )
137 |         self.assertEqual(f.result(), 2)
138 |         q.join()
139 | 
140 |     def test_internal_memory(self):
141 |         with BlockAllocationTaskScheduler(
142 |             max_workers=1,
143 |             executor_kwargs={
144 |                 "flux_executor": self.flux_executor,
145 |                 "cores": 1,
146 |                 "init_function": set_global,
147 |             },
148 |             spawner=FluxPythonSpawner,
149 |         ) as p:
150 |             f = p.submit(get_global)
151 |             self.assertFalse(f.done())
152 |             self.assertEqual(f.result(), np.array([5]))
153 |             self.assertTrue(f.done())
154 | 
155 |     def test_interface_exception(self):
156 |         with self.assertRaises(ValueError):
157 |             flux_interface = FluxPythonSpawner(
158 |                 flux_executor=self.flux_executor, openmpi_oversubscribe=True
159 |             )
160 |             flux_interface.bootup(command_lst=[])
161 | 


--------------------------------------------------------------------------------
/tests/test_interactive_dependencies.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import importlib.util
  3 | from time import sleep
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | 
  8 | from executorlib.task_scheduler.interactive.blockallocation import BlockAllocationTaskScheduler
  9 | from executorlib.standalone.interactive.spawner import MpiExecSpawner
 10 | 
 11 | 
 12 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
 13 | 
 14 | 
 15 | def calc(i):
 16 |     return np.array(i**2)
 17 | 
 18 | 
 19 | class TestFuture(unittest.TestCase):
 20 |     def test_pool_serial(self):
 21 |         with BlockAllocationTaskScheduler(
 22 |             max_workers=1,
 23 |             executor_kwargs={"cores": 1},
 24 |             spawner=MpiExecSpawner,
 25 |         ) as p:
 26 |             output = p.submit(calc, i=2)
 27 |             self.assertTrue(isinstance(output, Future))
 28 |             self.assertFalse(output.done())
 29 |             sleep(1)
 30 |         self.assertTrue(output.done())
 31 |         self.assertEqual(output.result(), np.array(4))
 32 | 
 33 |     @unittest.skipIf(
 34 |         skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped."
 35 |     )
 36 |     def test_pool_serial_multi_core(self):
 37 |         with BlockAllocationTaskScheduler(
 38 |             max_workers=1,
 39 |             executor_kwargs={"cores": 2},
 40 |             spawner=MpiExecSpawner,
 41 |         ) as p:
 42 |             output = p.submit(calc, i=2)
 43 |             self.assertTrue(isinstance(output, Future))
 44 |             self.assertFalse(output.done())
 45 |             sleep(1)
 46 |         self.assertTrue(output.done())
 47 |         self.assertEqual(output.result(), [np.array(4), np.array(4)])
 48 | 
 49 |     def test_independence_from_executor(self):
 50 |         """
 51 |         Ensure that futures are able to live on after the executor gets garbage
 52 |         collected.
 53 |         """
 54 | 
 55 |         with self.subTest("From the main process"):
 56 |             mutable = []
 57 | 
 58 |             def slow_callable():
 59 |                 from time import sleep
 60 | 
 61 |                 sleep(1)
 62 |                 return True
 63 | 
 64 |             def callback(future):
 65 |                 mutable.append("Called back")
 66 | 
 67 |             def submit():
 68 |                 # Executor only exists in this scope and can get garbage collected after
 69 |                 # this function is exits
 70 |                 future = BlockAllocationTaskScheduler(
 71 |                     max_workers=1,
 72 |                     executor_kwargs={},
 73 |                     spawner=MpiExecSpawner,
 74 |                 ).submit(slow_callable)
 75 |                 future.add_done_callback(callback)
 76 |                 return future
 77 | 
 78 |             self.assertListEqual(
 79 |                 [],
 80 |                 mutable,
 81 |                 msg="Sanity check that test is starting in the expected condition",
 82 |             )
 83 |             future = submit()
 84 | 
 85 |             self.assertFalse(
 86 |                 future.done(),
 87 |                 msg="The submit function is slow, it should be running still",
 88 |             )
 89 |             self.assertListEqual(
 90 |                 [],
 91 |                 mutable,
 92 |                 msg="While running, the mutable should not have been impacted by the "
 93 |                 "callback",
 94 |             )
 95 |             future.result()  # Wait for the calculation to finish
 96 |             self.assertListEqual(
 97 |                 ["Called back"],
 98 |                 mutable,
 99 |                 msg="After completion, the callback should modify the mutable data",
100 |             )
101 | 
102 |         with self.subTest("From inside a class"):
103 | 
104 |             class Foo:
105 |                 def __init__(self):
106 |                     self.running = False
107 | 
108 |                 def run(self):
109 |                     self.running = True
110 | 
111 |                     future = BlockAllocationTaskScheduler(
112 |                         max_workers=1,
113 |                         executor_kwargs={},
114 |                         spawner=MpiExecSpawner,
115 |                     ).submit(self.return_42)
116 |                     future.add_done_callback(self.finished)
117 | 
118 |                     return future
119 | 
120 |                 def return_42(self):
121 |                     from time import sleep
122 | 
123 |                     sleep(1)
124 |                     return 42
125 | 
126 |                 def finished(self, future):
127 |                     self.running = False
128 | 
129 |             foo = Foo()
130 |             self.assertFalse(
131 |                 foo.running,
132 |                 msg="Sanity check that the test starts in the expected condition",
133 |             )
134 |             fs = foo.run()
135 |             self.assertTrue(
136 |                 foo.running,
137 |                 msg="We should be able to exit the run method before the task completes",
138 |             )
139 |             fs.result()  # Wait for completion
140 |             self.assertFalse(
141 |                 foo.running,
142 |                 msg="After task completion, we expect the callback to modify the class",
143 |             )
144 | 


--------------------------------------------------------------------------------
/tests/test_interactive_slurmspawner.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from executorlib.task_scheduler.interactive.slurmspawner import generate_slurm_command
 3 | 
 4 | try:
 5 |     from executorlib.task_scheduler.file.queue_spawner import _pysqa_execute_command
 6 | 
 7 |     skip_pysqa_test = False
 8 | except ImportError:
 9 |     skip_pysqa_test = True
10 | 
11 | 
12 | @unittest.skipIf(
13 |     skip_pysqa_test, "pysqa is not installed, so the pysqa tests are skipped."
14 | )
15 | class TestPysqaExecuteCommand(unittest.TestCase):
16 |     def test_pysqa_execute_command_list(self):
17 |         out = _pysqa_execute_command(
18 |             commands=["echo", "test"],
19 |             working_directory=None,
20 |             split_output=True,
21 |             shell=True,
22 |             error_filename="pysqa.err",
23 |         )
24 |         self.assertEqual(len(out), 2)
25 |         self.assertEqual("test", out[0])
26 | 
27 |     def test_pysqa_execute_command_string(self):
28 |         out = _pysqa_execute_command(
29 |             commands="echo test",
30 |             working_directory=None,
31 |             split_output=False,
32 |             shell=False,
33 |             error_filename="pysqa.err",
34 |         )
35 |         self.assertEqual(len(out), 5)
36 |         self.assertEqual("test\n", out)
37 | 
38 |     def test_pysqa_execute_command_fail(self):
39 |         with self.assertRaises(FileNotFoundError):
40 |             _pysqa_execute_command(
41 |                 commands=["no/executable/available"],
42 |                 working_directory=None,
43 |                 split_output=True,
44 |                 shell=False,
45 |                 error_filename="pysqa.err",
46 |             )
47 | 
48 |     def test_generate_slurm_command(self):
49 |         command_lst = generate_slurm_command(
50 |             cores=1,
51 |             cwd="/tmp/test",
52 |             threads_per_core=2,
53 |             gpus_per_core=1,
54 |             num_nodes=1,
55 |             exclusive=True,
56 |             openmpi_oversubscribe=True,
57 |             slurm_cmd_args=["--help"],
58 |         )
59 |         self.assertEqual(len(command_lst), 12)
60 |         reply_lst = ['srun', '-n', '1', '-D', '/tmp/test', '-N', '1', '--cpus-per-task=2', '--gpus-per-task=1', '--exact', '--oversubscribe', '--help']
61 |         self.assertEqual(command_lst, reply_lst)
62 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_cache.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import unittest
 4 | 
 5 | from executorlib import SingleNodeExecutor, get_cache_data
 6 | from executorlib.standalone.serialize import cloudpickle_register
 7 | 
 8 | try:
 9 |     import h5py
10 | 
11 |     skip_h5py_test = False
12 | except ImportError:
13 |     skip_h5py_test = True
14 | 
15 | 
16 | def get_error(a):
17 |     raise ValueError(a)
18 | 
19 | 
20 | @unittest.skipIf(
21 |     skip_h5py_test, "h5py is not installed, so the h5io tests are skipped."
22 | )
23 | class TestCacheFunctions(unittest.TestCase):
24 |     def test_cache_data(self):
25 |         cache_directory = "./cache"
26 |         with SingleNodeExecutor(cache_directory=cache_directory) as exe:
27 |             self.assertTrue(exe)
28 |             future_lst = [exe.submit(sum, [i, i]) for i in range(1, 4)]
29 |             result_lst = [f.result() for f in future_lst]
30 | 
31 |         cache_lst = get_cache_data(cache_directory=cache_directory)
32 |         self.assertEqual(sum([c["output"] for c in cache_lst]), sum(result_lst))
33 |         self.assertEqual(
34 |             sum([sum(c["input_args"][0]) for c in cache_lst]), sum(result_lst)
35 |         )
36 | 
37 |     def test_cache_error(self):
38 |         cache_directory = "./cache_error"
39 |         with SingleNodeExecutor(cache_directory=cache_directory) as exe:
40 |             self.assertTrue(exe)
41 |             cloudpickle_register(ind=1)
42 |             f = exe.submit(get_error, a=1)
43 |             with self.assertRaises(ValueError):
44 |                 print(f.result())
45 | 
46 |     def tearDown(self):
47 |         if os.path.exists("cache"):
48 |             shutil.rmtree("cache")
49 |         if os.path.exists("cache_error"):
50 |             shutil.rmtree("cache_error")
51 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_mpi.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import importlib.util
  3 | import shutil
  4 | import time
  5 | import unittest
  6 | 
  7 | from executorlib import SingleNodeExecutor, SlurmJobExecutor
  8 | from executorlib.standalone.serialize import cloudpickle_register
  9 | 
 10 | 
 11 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
 12 | 
 13 | 
 14 | def calc(i):
 15 |     return i
 16 | 
 17 | 
 18 | def mpi_funct(i):
 19 |     from mpi4py import MPI
 20 | 
 21 |     size = MPI.COMM_WORLD.Get_size()
 22 |     rank = MPI.COMM_WORLD.Get_rank()
 23 |     return i, size, rank
 24 | 
 25 | 
 26 | def mpi_funct_sleep(i):
 27 |     from mpi4py import MPI
 28 | 
 29 |     size = MPI.COMM_WORLD.Get_size()
 30 |     rank = MPI.COMM_WORLD.Get_rank()
 31 |     time.sleep(i)
 32 |     return i, size, rank
 33 | 
 34 | 
 35 | class TestExecutorBackend(unittest.TestCase):
 36 |     def test_meta_executor_serial(self):
 37 |         with SingleNodeExecutor(max_cores=2, block_allocation=True) as exe:
 38 |             cloudpickle_register(ind=1)
 39 |             fs_1 = exe.submit(calc, 1)
 40 |             fs_2 = exe.submit(calc, 2)
 41 |             self.assertEqual(fs_1.result(), 1)
 42 |             self.assertEqual(fs_2.result(), 2)
 43 |             self.assertTrue(fs_1.done())
 44 |             self.assertTrue(fs_2.done())
 45 | 
 46 |     def test_meta_executor_single(self):
 47 |         with SingleNodeExecutor(max_cores=1, block_allocation=True) as exe:
 48 |             cloudpickle_register(ind=1)
 49 |             fs_1 = exe.submit(calc, 1)
 50 |             fs_2 = exe.submit(calc, 2)
 51 |             self.assertEqual(fs_1.result(), 1)
 52 |             self.assertEqual(fs_2.result(), 2)
 53 |             self.assertTrue(fs_1.done())
 54 |             self.assertTrue(fs_2.done())
 55 | 
 56 |     def test_oversubscribe(self):
 57 |         with self.assertRaises(ValueError):
 58 |             with SingleNodeExecutor(max_cores=1, block_allocation=True) as exe:
 59 |                 cloudpickle_register(ind=1)
 60 |                 fs_1 = exe.submit(calc, 1, resource_dict={"cores": 2})
 61 | 
 62 |     @unittest.skipIf(
 63 |         skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped."
 64 |     )
 65 |     def test_meta_executor_parallel(self):
 66 |         with SingleNodeExecutor(
 67 |             max_workers=2,
 68 |             resource_dict={"cores": 2},
 69 |             block_allocation=True,
 70 |         ) as exe:
 71 |             cloudpickle_register(ind=1)
 72 |             fs_1 = exe.submit(mpi_funct, 1)
 73 |             self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)])
 74 |             self.assertTrue(fs_1.done())
 75 | 
 76 |     def test_errors(self):
 77 |         with self.assertRaises(TypeError):
 78 |             SingleNodeExecutor(
 79 |                 max_cores=1,
 80 |                 resource_dict={"cores": 1, "gpus_per_core": 1},
 81 |             )
 82 | 
 83 | 
 84 | class TestExecutorBackendCache(unittest.TestCase):
 85 |     def tearDown(self):
 86 |         shutil.rmtree("./cache")
 87 | 
 88 |     @unittest.skipIf(
 89 |         skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped."
 90 |     )
 91 |     def test_meta_executor_parallel_cache(self):
 92 |         with SingleNodeExecutor(
 93 |             max_workers=2,
 94 |             resource_dict={"cores": 2},
 95 |             block_allocation=True,
 96 |             cache_directory="./cache",
 97 |         ) as exe:
 98 |             cloudpickle_register(ind=1)
 99 |             time_1 = time.time()
100 |             fs_1 = exe.submit(mpi_funct_sleep, 1)
101 |             self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)])
102 |             self.assertTrue(fs_1.done())
103 |             time_2 = time.time()
104 |             self.assertTrue(time_2 - time_1 > 1)
105 |             time_3 = time.time()
106 |             fs_2 = exe.submit(mpi_funct_sleep, 1)
107 |             self.assertEqual(fs_2.result(), [(1, 2, 0), (1, 2, 1)])
108 |             self.assertTrue(fs_2.done())
109 |             time_4 = time.time()
110 |             self.assertTrue(time_3 - time_4 < 1)
111 | 
112 | 
113 | class TestWorkingDirectory(unittest.TestCase):
114 |     def test_output_files_cwd(self):
115 |         dirname = os.path.abspath(os.path.dirname(__file__))
116 |         os.makedirs(dirname, exist_ok=True)
117 |         with SingleNodeExecutor(
118 |             max_cores=1,
119 |             resource_dict={"cores": 1, "cwd": dirname},
120 |             block_allocation=True,
121 |         ) as p:
122 |             output = p.map(calc, [1, 2, 3])
123 |         self.assertEqual(
124 |             list(output),
125 |             [1, 2, 3],
126 |         )
127 | 
128 | 
129 | class TestSLURMExecutor(unittest.TestCase):
130 |     def test_validate_max_workers(self):
131 |         os.environ["SLURM_NTASKS"] = "6"
132 |         os.environ["SLURM_CPUS_PER_TASK"] = "4"
133 |         with self.assertRaises(ValueError):
134 |             SlurmJobExecutor(
135 |                 max_workers=10,
136 |                 resource_dict={"cores": 10, "threads_per_core": 10},
137 |                 block_allocation=True,
138 |             )
139 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_noblock.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from executorlib import SingleNodeExecutor
 4 | from executorlib.standalone.serialize import cloudpickle_register
 5 | 
 6 | 
 7 | def calc(i):
 8 |     return i
 9 | 
10 | 
11 | def resource_dict(resource_dict):
12 |     return resource_dict
13 | 
14 | 
15 | class TestExecutorBackend(unittest.TestCase):
16 |     def test_meta_executor_serial_with_dependencies(self):
17 |         with SingleNodeExecutor(
18 |             max_cores=2,
19 |             block_allocation=False,
20 |             disable_dependencies=True,
21 |         ) as exe:
22 |             cloudpickle_register(ind=1)
23 |             fs_1 = exe.submit(calc, 1)
24 |             fs_2 = exe.submit(calc, 2)
25 |             self.assertEqual(fs_1.result(), 1)
26 |             self.assertEqual(fs_2.result(), 2)
27 |             self.assertTrue(fs_1.done())
28 |             self.assertTrue(fs_2.done())
29 | 
30 |     def test_meta_executor_serial_without_dependencies(self):
31 |         with SingleNodeExecutor(
32 |             max_cores=2,
33 |             block_allocation=False,
34 |             disable_dependencies=False,
35 |         ) as exe:
36 |             cloudpickle_register(ind=1)
37 |             fs_1 = exe.submit(calc, 1)
38 |             fs_2 = exe.submit(calc, 2)
39 |             self.assertEqual(fs_1.result(), 1)
40 |             self.assertEqual(fs_2.result(), 2)
41 |             self.assertTrue(fs_1.done())
42 |             self.assertTrue(fs_2.done())
43 | 
44 |     def test_meta_executor_single(self):
45 |         with SingleNodeExecutor(
46 |             max_cores=1,
47 |             block_allocation=False,
48 |         ) as exe:
49 |             cloudpickle_register(ind=1)
50 |             fs_1 = exe.submit(calc, 1)
51 |             fs_2 = exe.submit(calc, 2)
52 |             self.assertEqual(fs_1.result(), 1)
53 |             self.assertEqual(fs_2.result(), 2)
54 |             self.assertTrue(fs_1.done())
55 |             self.assertTrue(fs_2.done())
56 | 
57 |     def test_errors(self):
58 |         with self.assertRaises(TypeError):
59 |             SingleNodeExecutor(
60 |                 max_cores=1,
61 |                 resource_dict={
62 |                     "cores": 1,
63 |                     "gpus_per_core": 1,
64 |                 },
65 |             )
66 |         with self.assertRaises(ValueError):
67 |             with SingleNodeExecutor(
68 |                 max_cores=1,
69 |                 block_allocation=False,
70 |             ) as exe:
71 |                 exe.submit(resource_dict, resource_dict={})
72 |         with self.assertRaises(ValueError):
73 |             with SingleNodeExecutor(
74 |                 max_cores=1,
75 |                 block_allocation=True,
76 |             ) as exe:
77 |                 exe.submit(resource_dict, resource_dict={})
78 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_resize.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from executorlib import SingleNodeExecutor
 3 | from executorlib.standalone.serialize import cloudpickle_register
 4 | 
 5 | 
 6 | def sleep_funct(sec):
 7 |     from time import sleep
 8 |     sleep(sec)
 9 |     return sec
10 | 
11 | 
12 | class TestResizing(unittest.TestCase):
13 |     def test_without_dependencies_decrease(self):
14 |         cloudpickle_register(ind=1)
15 |         with SingleNodeExecutor(max_workers=2, block_allocation=True, disable_dependencies=True) as exe:
16 |             future_lst = [exe.submit(sleep_funct, 1) for _ in range(4)]
17 |             self.assertEqual([f.done() for f in future_lst], [False, False, False, False])
18 |             self.assertEqual(len(exe), 4)
19 |             sleep_funct(sec=0.5)
20 |             exe.max_workers = 1
21 |             self.assertTrue(len(exe) >= 1)
22 |             self.assertEqual(len(exe._task_scheduler._process), 1)
23 |             self.assertTrue(1 <= sum([f.done() for f in future_lst]) < 3)
24 |             self.assertEqual([f.result() for f in future_lst], [1, 1, 1, 1])
25 |             self.assertEqual([f.done() for f in future_lst], [True, True, True, True])
26 | 
27 |     def test_without_dependencies_increase(self):
28 |         cloudpickle_register(ind=1)
29 |         with SingleNodeExecutor(max_workers=1, block_allocation=True, disable_dependencies=True) as exe:
30 |             future_lst = [exe.submit(sleep_funct, 0.1) for _ in range(4)]
31 |             self.assertEqual([f.done() for f in future_lst], [False, False, False, False])
32 |             self.assertEqual(len(exe), 4)
33 |             self.assertEqual(exe.max_workers, 1)
34 |             future_lst[0].result()
35 |             exe.max_workers = 2
36 |             self.assertEqual(exe.max_workers, 2)
37 |             self.assertTrue(len(exe) >= 1)
38 |             self.assertEqual(len(exe._task_scheduler._process), 2)
39 |             self.assertEqual([f.done() for f in future_lst], [True, False, False, False])
40 |             self.assertEqual([f.result() for f in future_lst], [0.1, 0.1, 0.1, 0.1])
41 |             self.assertEqual([f.done() for f in future_lst], [True, True, True, True])
42 | 
43 |     def test_with_dependencies_decrease(self):
44 |         cloudpickle_register(ind=1)
45 |         with SingleNodeExecutor(max_workers=2, block_allocation=True, disable_dependencies=False) as exe:
46 |             future_lst = [exe.submit(sleep_funct, 1) for _ in range(4)]
47 |             self.assertEqual([f.done() for f in future_lst], [False, False, False, False])
48 |             self.assertEqual(len(exe), 4)
49 |             sleep_funct(sec=0.5)
50 |             exe.max_workers = 1
51 |             self.assertTrue(1 <= sum([f.done() for f in future_lst]) < 3)
52 |             self.assertEqual([f.result() for f in future_lst], [1, 1, 1, 1])
53 |             self.assertEqual([f.done() for f in future_lst], [True, True, True, True])
54 | 
55 |     def test_with_dependencies_increase(self):
56 |         cloudpickle_register(ind=1)
57 |         with SingleNodeExecutor(max_workers=1, block_allocation=True, disable_dependencies=False) as exe:
58 |             future_lst = [exe.submit(sleep_funct, 0.1) for _ in range(4)]
59 |             self.assertEqual([f.done() for f in future_lst], [False, False, False, False])
60 |             self.assertEqual(len(exe), 4)
61 |             self.assertEqual(exe.max_workers, 1)
62 |             future_lst[0].result()
63 |             exe.max_workers = 2
64 |             self.assertEqual(exe.max_workers, 2)
65 |             self.assertEqual([f.done() for f in future_lst], [True, False, False, False])
66 |             self.assertEqual([f.result() for f in future_lst], [0.1, 0.1, 0.1, 0.1])
67 |             self.assertEqual([f.done() for f in future_lst], [True, True, True, True])
68 | 
69 |     def test_no_block_allocation(self):
70 |         with self.assertRaises(NotImplementedError):
71 |             with SingleNodeExecutor(block_allocation=False, disable_dependencies=False) as exe:
72 |                 exe.max_workers = 2
73 |         with self.assertRaises(NotImplementedError):
74 |             with SingleNodeExecutor(block_allocation=False, disable_dependencies=True) as exe:
75 |                 exe.max_workers = 2
76 | 
77 |     def test_max_workers_stopped_executor(self):
78 |         exe = SingleNodeExecutor(block_allocation=True)
79 |         exe.shutdown(wait=True)
80 |         self.assertIsNone(exe.max_workers)
81 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_shell_executor.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import subprocess
  3 | import queue
  4 | import unittest
  5 | 
  6 | from executorlib import SingleNodeExecutor
  7 | from executorlib.standalone.serialize import cloudpickle_register
  8 | from executorlib.task_scheduler.interactive.shared import execute_tasks
  9 | from executorlib.standalone.interactive.spawner import MpiExecSpawner
 10 | 
 11 | 
 12 | def submit_shell_command(
 13 |     command: list, universal_newlines: bool = True, shell: bool = False
 14 | ):
 15 |     return subprocess.check_output(
 16 |         command, universal_newlines=universal_newlines, shell=shell
 17 |     )
 18 | 
 19 | 
 20 | class SubprocessExecutorTest(unittest.TestCase):
 21 |     def test_execute_single_task(self):
 22 |         test_queue = queue.Queue()
 23 |         f = Future()
 24 |         test_queue.put(
 25 |             {
 26 |                 "fn": submit_shell_command,
 27 |                 "args": [["echo", "test"]],
 28 |                 "kwargs": {"universal_newlines": True},
 29 |                 "future": f,
 30 |             }
 31 |         )
 32 |         test_queue.put({"shutdown": True, "wait": True})
 33 |         cloudpickle_register(ind=1)
 34 |         self.assertFalse(f.done())
 35 |         execute_tasks(
 36 |             future_queue=test_queue,
 37 |             cores=1,
 38 |             openmpi_oversubscribe=False,
 39 |             spawner=MpiExecSpawner,
 40 |         )
 41 |         self.assertTrue(f.done())
 42 |         self.assertEqual("test\n", f.result())
 43 |         test_queue.join()
 44 | 
 45 |     def test_wrong_error(self):
 46 |         test_queue = queue.Queue()
 47 |         f = Future()
 48 |         test_queue.put(
 49 |             {
 50 |                 "fn": submit_shell_command,
 51 |                 "args": [["echo", "test"]],
 52 |                 "kwargs": {"wrong_key": True},
 53 |                 "future": f,
 54 |             }
 55 |         )
 56 |         test_queue.put(
 57 |             {"shutdown": True, "wait": True}
 58 |         )
 59 |         cloudpickle_register(ind=1)
 60 |         with self.assertRaises(TypeError):
 61 |             execute_tasks(
 62 |                 future_queue=test_queue,
 63 |                 cores=1,
 64 |                 openmpi_oversubscribe=False,
 65 |                 spawner=MpiExecSpawner,
 66 |             )
 67 |             f.result()
 68 | 
 69 |     def test_broken_executable(self):
 70 |         test_queue = queue.Queue()
 71 |         f = Future()
 72 |         test_queue.put(
 73 |             {
 74 |                 "fn": submit_shell_command,
 75 |                 "args": [["/executable/does/not/exist"]],
 76 |                 "kwargs": {"universal_newlines": True},
 77 |                 "future": f,
 78 |             }
 79 |         )
 80 |         test_queue.put(
 81 |             {
 82 |                 "shutdown": True,
 83 |                 "wait": True,
 84 |             }
 85 |         )
 86 |         cloudpickle_register(ind=1)
 87 |         with self.assertRaises(FileNotFoundError):
 88 |             execute_tasks(
 89 |                 future_queue=test_queue,
 90 |                 cores=1,
 91 |                 openmpi_oversubscribe=False,
 92 |                 spawner=MpiExecSpawner,
 93 |             )
 94 |             f.result()
 95 | 
 96 |     def test_shell_static_executor_args(self):
 97 |         with SingleNodeExecutor(max_workers=1) as exe:
 98 |             cloudpickle_register(ind=1)
 99 |             future = exe.submit(
100 |                 submit_shell_command,
101 |                 ["echo", "test"],
102 |                 universal_newlines=True,
103 |                 shell=False,
104 |             )
105 |             self.assertFalse(future.done())
106 |             self.assertEqual("test\n", future.result())
107 |             self.assertTrue(future.done())
108 | 
109 |     def test_shell_static_executor_binary(self):
110 |         with SingleNodeExecutor(max_workers=1) as exe:
111 |             cloudpickle_register(ind=1)
112 |             future = exe.submit(
113 |                 submit_shell_command,
114 |                 ["echo", "test"],
115 |                 universal_newlines=False,
116 |                 shell=False,
117 |             )
118 |             self.assertFalse(future.done())
119 |             self.assertEqual(b"test\n", future.result())
120 |             self.assertTrue(future.done())
121 | 
122 |     def test_shell_static_executor_shell(self):
123 |         with SingleNodeExecutor(max_workers=1) as exe:
124 |             cloudpickle_register(ind=1)
125 |             future = exe.submit(
126 |                 submit_shell_command, "echo test", universal_newlines=True, shell=True
127 |             )
128 |             self.assertFalse(future.done())
129 |             self.assertEqual("test\n", future.result())
130 |             self.assertTrue(future.done())
131 | 
132 |     def test_shell_executor(self):
133 |         with SingleNodeExecutor(max_workers=2) as exe:
134 |             cloudpickle_register(ind=1)
135 |             f_1 = exe.submit(
136 |                 submit_shell_command, ["echo", "test_1"], universal_newlines=True
137 |             )
138 |             f_2 = exe.submit(
139 |                 submit_shell_command, ["echo", "test_2"], universal_newlines=True
140 |             )
141 |             f_3 = exe.submit(
142 |                 submit_shell_command, ["echo", "test_3"], universal_newlines=True
143 |             )
144 |             f_4 = exe.submit(
145 |                 submit_shell_command, ["echo", "test_4"], universal_newlines=True
146 |             )
147 |             self.assertFalse(f_1.done())
148 |             self.assertFalse(f_2.done())
149 |             self.assertFalse(f_3.done())
150 |             self.assertFalse(f_4.done())
151 |             self.assertEqual("test_1\n", f_1.result())
152 |             self.assertEqual("test_2\n", f_2.result())
153 |             self.assertTrue(f_1.done())
154 |             self.assertTrue(f_2.done())
155 |             self.assertEqual("test_3\n", f_3.result())
156 |             self.assertEqual("test_4\n", f_4.result())
157 |             self.assertTrue(f_1.done())
158 |             self.assertTrue(f_2.done())
159 |             self.assertTrue(f_3.done())
160 |             self.assertTrue(f_4.done())
161 | 


--------------------------------------------------------------------------------
/tests/test_singlenodeexecutor_shell_interactive.py:
--------------------------------------------------------------------------------
  1 | from concurrent.futures import Future
  2 | import os
  3 | import subprocess
  4 | import queue
  5 | import unittest
  6 | 
  7 | from executorlib import SingleNodeExecutor
  8 | from executorlib.standalone.serialize import cloudpickle_register
  9 | from executorlib.task_scheduler.interactive.shared import execute_tasks
 10 | from executorlib.standalone.interactive.spawner import MpiExecSpawner
 11 | 
 12 | 
 13 | executable_path = os.path.join(os.path.dirname(__file__), "executables", "count.py")
 14 | 
 15 | 
 16 | def init_process():
 17 |     return {
 18 |         "process": subprocess.Popen(
 19 |             ["python", executable_path],
 20 |             stdin=subprocess.PIPE,
 21 |             stdout=subprocess.PIPE,
 22 |             universal_newlines=True,
 23 |             shell=False,
 24 |         )
 25 |     }
 26 | 
 27 | 
 28 | def interact(shell_input, process, lines_to_read=None, stop_read_pattern=None):
 29 |     process.stdin.write(shell_input)
 30 |     process.stdin.flush()
 31 |     lines_count = 0
 32 |     output = ""
 33 |     while True:
 34 |         output_current = process.stdout.readline()
 35 |         output += output_current
 36 |         lines_count += 1
 37 |         if stop_read_pattern is not None and stop_read_pattern in output_current:
 38 |             break
 39 |         elif lines_to_read is not None and lines_to_read == lines_count:
 40 |             break
 41 |     return output
 42 | 
 43 | 
 44 | def shutdown(process):
 45 |     process.stdin.write("shutdown\n")
 46 |     process.stdin.flush()
 47 | 
 48 | 
 49 | class ShellInteractiveExecutorTest(unittest.TestCase):
 50 |     def test_execute_single_task(self):
 51 |         test_queue = queue.Queue()
 52 |         future_lines = Future()
 53 |         future_pattern = Future()
 54 |         future_shutdown = Future()
 55 |         test_queue.put(
 56 |             {
 57 |                 "fn": interact,
 58 |                 "future": future_lines,
 59 |                 "args": (),
 60 |                 "kwargs": {
 61 |                     "shell_input": "4\n",
 62 |                     "lines_to_read": 5,
 63 |                     "stop_read_pattern": None,
 64 |                 },
 65 |             }
 66 |         )
 67 |         test_queue.put(
 68 |             {
 69 |                 "fn": interact,
 70 |                 "future": future_pattern,
 71 |                 "args": (),
 72 |                 "kwargs": {
 73 |                     "shell_input": "4\n",
 74 |                     "lines_to_read": None,
 75 |                     "stop_read_pattern": "done",
 76 |                 },
 77 |             }
 78 |         )
 79 |         test_queue.put(
 80 |             {
 81 |                 "fn": shutdown,
 82 |                 "future": future_shutdown,
 83 |                 "args": (),
 84 |                 "kwargs": {},
 85 |             }
 86 |         )
 87 |         test_queue.put({"shutdown": True, "wait": True})
 88 |         cloudpickle_register(ind=1)
 89 |         self.assertFalse(future_lines.done())
 90 |         self.assertFalse(future_pattern.done())
 91 |         execute_tasks(
 92 |             future_queue=test_queue,
 93 |             cores=1,
 94 |             openmpi_oversubscribe=False,
 95 |             spawner=MpiExecSpawner,
 96 |             init_function=init_process,
 97 |         )
 98 |         self.assertTrue(future_lines.done())
 99 |         self.assertTrue(future_pattern.done())
100 |         self.assertTrue(future_shutdown.done())
101 |         self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result())
102 |         self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result())
103 |         test_queue.join()
104 | 
105 |     def test_shell_interactive_executor(self):
106 |         cloudpickle_register(ind=1)
107 |         with SingleNodeExecutor(
108 |             max_workers=1,
109 |             init_function=init_process,
110 |             block_allocation=True,
111 |         ) as exe:
112 |             future_lines = exe.submit(
113 |                 interact, shell_input="4\n", lines_to_read=5, stop_read_pattern=None
114 |             )
115 |             future_pattern = exe.submit(
116 |                 interact,
117 |                 shell_input="4\n",
118 |                 lines_to_read=None,
119 |                 stop_read_pattern="done",
120 |             )
121 |             self.assertFalse(future_lines.done())
122 |             self.assertFalse(future_pattern.done())
123 |             self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result())
124 |             self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result())
125 |             self.assertTrue(future_lines.done())
126 |             self.assertTrue(future_pattern.done())
127 |             future_shutdown = exe.submit(shutdown)
128 |             self.assertIsNone(future_shutdown.result())
129 |             self.assertTrue(future_shutdown.done())
130 | 


--------------------------------------------------------------------------------
/tests/test_standalone_hdf.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import unittest
  4 | 
  5 | 
  6 | try:
  7 |     from executorlib.task_scheduler.file.hdf import (
  8 |         dump,
  9 |         load,
 10 |         get_output,
 11 |         get_runtime,
 12 |         get_queue_id,
 13 |     )
 14 | 
 15 |     skip_h5py_test = False
 16 | except ImportError:
 17 |     skip_h5py_test = True
 18 | 
 19 | 
 20 | def my_funct(a, b):
 21 |     return a + b
 22 | 
 23 | 
 24 | @unittest.skipIf(
 25 |     skip_h5py_test, "h5py is not installed, so the h5io tests are skipped."
 26 | )
 27 | class TestSharedFunctions(unittest.TestCase):
 28 |     def test_hdf_mixed(self):
 29 |         cache_directory = os.path.abspath("cache")
 30 |         os.makedirs(cache_directory, exist_ok=True)
 31 |         file_name = os.path.join(cache_directory, "test_mixed.h5")
 32 |         a = 1
 33 |         b = 2
 34 |         dump(
 35 |             file_name=file_name,
 36 |             data_dict={"fn": my_funct, "args": [a], "kwargs": {"b": b}},
 37 |         )
 38 |         data_dict = load(file_name=file_name)
 39 |         self.assertTrue("fn" in data_dict.keys())
 40 |         self.assertEqual(data_dict["args"], [a])
 41 |         self.assertEqual(data_dict["kwargs"], {"b": b})
 42 |         flag, no_error, output = get_output(file_name=file_name)
 43 |         self.assertTrue(get_runtime(file_name=file_name) == 0.0)
 44 |         self.assertFalse(no_error)
 45 |         self.assertFalse(flag)
 46 |         self.assertIsNone(output)
 47 | 
 48 |     def test_hdf_args(self):
 49 |         cache_directory = os.path.abspath("cache")
 50 |         os.makedirs(cache_directory, exist_ok=True)
 51 |         file_name = os.path.join(cache_directory, "test_args.h5")
 52 |         a = 1
 53 |         b = 2
 54 |         dump(file_name=file_name, data_dict={"fn": my_funct, "args": [a, b]})
 55 |         data_dict = load(file_name=file_name)
 56 |         self.assertTrue("fn" in data_dict.keys())
 57 |         self.assertEqual(data_dict["args"], [a, b])
 58 |         self.assertEqual(data_dict["kwargs"], {})
 59 |         flag, no_error, output = get_output(file_name=file_name)
 60 |         self.assertTrue(get_runtime(file_name=file_name) == 0.0)
 61 |         self.assertFalse(flag)
 62 |         self.assertFalse(no_error)
 63 |         self.assertIsNone(output)
 64 | 
 65 |     def test_hdf_kwargs(self):
 66 |         cache_directory = os.path.abspath("cache")
 67 |         os.makedirs(cache_directory, exist_ok=True)
 68 |         file_name = os.path.join(cache_directory, "test_kwargs.h5")
 69 |         a = 1
 70 |         b = 2
 71 |         dump(
 72 |             file_name=file_name,
 73 |             data_dict={
 74 |                 "fn": my_funct,
 75 |                 "args": (),
 76 |                 "kwargs": {"a": a, "b": b},
 77 |                 "queue_id": 123,
 78 |             },
 79 |         )
 80 |         data_dict = load(file_name=file_name)
 81 |         self.assertTrue("fn" in data_dict.keys())
 82 |         self.assertEqual(data_dict["args"], ())
 83 |         self.assertEqual(data_dict["kwargs"], {"a": a, "b": b})
 84 |         self.assertEqual(get_queue_id(file_name=file_name), 123)
 85 |         flag, no_error, output = get_output(file_name=file_name)
 86 |         self.assertTrue(get_runtime(file_name=file_name) == 0.0)
 87 |         self.assertFalse(flag)
 88 |         self.assertFalse(no_error)
 89 |         self.assertIsNone(output)
 90 | 
 91 |     def test_hdf_queue_id(self):
 92 |         cache_directory = os.path.abspath("cache")
 93 |         os.makedirs(cache_directory, exist_ok=True)
 94 |         file_name = os.path.join(cache_directory, "test_queue.h5")
 95 |         queue_id = 123
 96 |         dump(
 97 |             file_name=file_name,
 98 |             data_dict={"queue_id": queue_id},
 99 |         )
100 |         self.assertEqual(get_queue_id(file_name=file_name), 123)
101 |         flag, no_error, output = get_output(file_name=file_name)
102 |         self.assertTrue(get_runtime(file_name=file_name) == 0.0)
103 |         self.assertFalse(flag)
104 |         self.assertFalse(no_error)
105 |         self.assertIsNone(output)
106 | 
107 |     def test_hdf_error(self):
108 |         cache_directory = os.path.abspath("cache")
109 |         os.makedirs(cache_directory, exist_ok=True)
110 |         file_name = os.path.join(cache_directory, "test_error.h5")
111 |         error = ValueError()
112 |         dump(
113 |             file_name=file_name,
114 |             data_dict={"error": error},
115 |         )
116 |         flag, no_error, output = get_output(file_name=file_name)
117 |         self.assertTrue(get_runtime(file_name=file_name) == 0.0)
118 |         self.assertTrue(flag)
119 |         self.assertFalse(no_error)
120 |         self.assertTrue(isinstance(output, error.__class__))
121 | 
122 |     def tearDown(self):
123 |         if os.path.exists("cache"):
124 |             shutil.rmtree("cache")
125 | 


--------------------------------------------------------------------------------
/tests/test_standalone_inputcheck.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from executorlib.standalone.inputcheck import (
  4 |     check_command_line_argument_lst,
  5 |     check_gpus_per_worker,
  6 |     check_oversubscribe,
  7 |     check_executor,
  8 |     check_init_function,
  9 |     check_nested_flux_executor,
 10 |     check_flux_log_files,
 11 |     check_pmi,
 12 |     check_plot_dependency_graph,
 13 |     check_refresh_rate,
 14 |     check_resource_dict,
 15 |     check_resource_dict_is_empty,
 16 |     check_flux_executor_pmi_mode,
 17 |     check_max_workers_and_cores,
 18 |     check_hostname_localhost,
 19 |     check_pysqa_config_directory,
 20 |     check_file_exists,
 21 |     validate_number_of_cores,
 22 | )
 23 | 
 24 | 
 25 | class TestInputCheck(unittest.TestCase):
 26 |     def test_check_command_line_argument_lst(self):
 27 |         with self.assertRaises(ValueError):
 28 |             check_command_line_argument_lst(command_line_argument_lst=["a"])
 29 | 
 30 |     def test_check_gpus_per_worker(self):
 31 |         with self.assertRaises(TypeError):
 32 |             check_gpus_per_worker(gpus_per_worker=1)
 33 | 
 34 |     def test_check_oversubscribe(self):
 35 |         with self.assertRaises(ValueError):
 36 |             check_oversubscribe(oversubscribe=True)
 37 | 
 38 |     def test_check_executor(self):
 39 |         with self.assertRaises(ValueError):
 40 |             check_executor(executor=1)
 41 | 
 42 |     def test_check_init_function(self):
 43 |         with self.assertRaises(ValueError):
 44 |             check_init_function(init_function=1, block_allocation=False)
 45 | 
 46 |     def test_check_refresh_rate(self):
 47 |         with self.assertRaises(ValueError):
 48 |             check_refresh_rate(refresh_rate=1)
 49 | 
 50 |     def test_check_resource_dict(self):
 51 |         def simple_function(resource_dict):
 52 |             return resource_dict
 53 | 
 54 |         with self.assertRaises(ValueError):
 55 |             check_resource_dict(function=simple_function)
 56 | 
 57 |     def test_check_resource_dict_is_empty(self):
 58 |         with self.assertRaises(ValueError):
 59 |             check_resource_dict_is_empty(resource_dict={"a": 1})
 60 | 
 61 |     def test_check_pmi(self):
 62 |         with self.assertRaises(ValueError):
 63 |             check_pmi(backend="test", pmi="test")
 64 |         with self.assertRaises(ValueError):
 65 |             check_pmi(backend="flux_allocation", pmi="test")
 66 | 
 67 |     def test_check_nested_flux_executor(self):
 68 |         with self.assertRaises(ValueError):
 69 |             check_nested_flux_executor(nested_flux_executor=True)
 70 | 
 71 |     def test_check_flux_log_files(self):
 72 |         with self.assertRaises(ValueError):
 73 |             check_flux_log_files(flux_log_files=True)
 74 | 
 75 |     def test_check_plot_dependency_graph(self):
 76 |         with self.assertRaises(ValueError):
 77 |             check_plot_dependency_graph(plot_dependency_graph=True)
 78 | 
 79 |     def test_check_flux_executor_pmi_mode(self):
 80 |         with self.assertRaises(ValueError):
 81 |             check_flux_executor_pmi_mode(flux_executor_pmi_mode="test")
 82 | 
 83 |     def test_check_max_workers_and_cores(self):
 84 |         with self.assertRaises(ValueError):
 85 |             check_max_workers_and_cores(max_workers=2, max_cores=None)
 86 |         with self.assertRaises(ValueError):
 87 |             check_max_workers_and_cores(max_workers=None, max_cores=2)
 88 |         with self.assertRaises(ValueError):
 89 |             check_max_workers_and_cores(max_workers=2, max_cores=2)
 90 | 
 91 |     def test_check_hostname_localhost(self):
 92 |         with self.assertRaises(ValueError):
 93 |             check_hostname_localhost(hostname_localhost=True)
 94 |         with self.assertRaises(ValueError):
 95 |             check_hostname_localhost(hostname_localhost=False)
 96 | 
 97 |     def test_check_pysqa_config_directory(self):
 98 |         with self.assertRaises(ValueError):
 99 |             check_pysqa_config_directory(pysqa_config_directory="path/to/config")
100 | 
101 |     def test_check_file_exists(self):
102 |         with self.assertRaises(ValueError):
103 |             check_file_exists(file_name=None)
104 |         with self.assertRaises(ValueError):
105 |             check_file_exists(file_name="/path/does/not/exist")
106 | 
107 |     def test_validate_number_of_cores(self):
108 |         with self.assertRaises(ValueError):
109 |             validate_number_of_cores(
110 |                 max_cores=None, max_workers=None, cores_per_worker=None
111 |             )
112 |         self.assertIsInstance(
113 |             validate_number_of_cores(max_cores=1, max_workers=None, cores_per_worker=1),
114 |             int,
115 |         )
116 |         self.assertIsInstance(
117 |             validate_number_of_cores(
118 |                 max_cores=None, max_workers=1, cores_per_worker=None
119 |             ),
120 |             int,
121 |         )
122 | 


--------------------------------------------------------------------------------
/tests/test_standalone_interactive_arguments.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import Future
 2 | import unittest
 3 | 
 4 | from executorlib.standalone.interactive.arguments import (
 5 |     check_exception_was_raised,
 6 |     get_exception_lst,
 7 |     get_future_objects_from_input,
 8 |     update_futures_in_input,
 9 | )
10 | 
11 | 
12 | class TestSerial(unittest.TestCase):
13 |     def test_get_future_objects_from_input_with_future(self):
14 |         input_args = (1, 2, Future(), [Future()], {3: Future()})
15 |         input_kwargs = {"a": 1, "b": [Future()], "c": {"d": Future()}, "e": Future()}
16 |         future_lst, boolean_flag = get_future_objects_from_input(args=input_args, kwargs=input_kwargs)
17 |         self.assertEqual(len(future_lst), 6)
18 |         self.assertFalse(boolean_flag)
19 | 
20 |     def test_get_future_objects_from_input_without_future(self):
21 |         input_args = (1, 2)
22 |         input_kwargs = {"a": 1}
23 |         future_lst, boolean_flag = get_future_objects_from_input(args=input_args, kwargs=input_kwargs)
24 |         self.assertEqual(len(future_lst), 0)
25 |         self.assertTrue(boolean_flag)
26 | 
27 |     def test_update_futures_in_input_with_future(self):
28 |         f1 = Future()
29 |         f1.set_result(1)
30 |         f2 = Future()
31 |         f2.set_result(2)
32 |         f3 = Future()
33 |         f3.set_result(3)
34 |         f4 = Future()
35 |         f4.set_result(4)
36 |         f5 = Future()
37 |         f5.set_result(5)
38 |         f6 = Future()
39 |         f6.set_result(6)
40 |         input_args = (1, 2, f1, [f2], {3: f3})
41 |         input_kwargs = {"a": 1, "b": [f4], "c": {"d": f5}, "e": f6}
42 |         output_args, output_kwargs = update_futures_in_input(args=input_args, kwargs=input_kwargs)
43 |         self.assertEqual(output_args, (1, 2, 1, [2], {3: 3}))
44 |         self.assertEqual(output_kwargs, {"a": 1, "b": [4], "c": {"d": 5}, "e": 6})
45 | 
46 |     def test_update_futures_in_input_without_future(self):
47 |         input_args = (1, 2)
48 |         input_kwargs = {"a": 1}
49 |         output_args, output_kwargs = update_futures_in_input(args=input_args, kwargs=input_kwargs)
50 |         self.assertEqual(input_args, output_args)
51 |         self.assertEqual(input_kwargs, output_kwargs)
52 | 
53 |     def test_check_exception_was_raised(self):
54 |         f_with_exception = Future()
55 |         f_with_exception.set_exception(ValueError())
56 |         f_without_exception = Future()
57 |         self.assertTrue(check_exception_was_raised(future_obj=f_with_exception))
58 |         self.assertFalse(check_exception_was_raised(future_obj=f_without_exception))
59 | 
60 |     def test_get_exception_lst(self):
61 |         f_with_exception = Future()
62 |         f_with_exception.set_exception(ValueError())
63 |         f_without_exception = Future()
64 |         future_with_exception_lst = [f_with_exception, f_with_exception, f_without_exception, f_without_exception, f_with_exception]
65 |         future_without_exception_lst = [f_without_exception, f_without_exception, f_without_exception, f_without_exception]
66 |         exception_lst = get_exception_lst(future_lst=future_with_exception_lst)
67 |         self.assertEqual(len(exception_lst), 3)
68 |         exception_lst = get_exception_lst(future_lst=future_without_exception_lst)
69 |         self.assertEqual(len(exception_lst), 0)
70 | 


--------------------------------------------------------------------------------
/tests/test_standalone_interactive_backend.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import unittest
  4 | 
  5 | from executorlib.standalone.interactive.backend import parse_arguments
  6 | from executorlib.standalone.interactive.spawner import MpiExecSpawner
  7 | from executorlib.task_scheduler.interactive.slurmspawner import SrunSpawner
  8 | 
  9 | 
 10 | class TestParser(unittest.TestCase):
 11 |     def test_command_local(self):
 12 |         result_dict = {
 13 |             "host": "localhost",
 14 |             "zmqport": "22",
 15 |         }
 16 |         command_lst = [
 17 |             "mpiexec",
 18 |             "-n",
 19 |             "2",
 20 |             "--oversubscribe",
 21 |             sys.executable,
 22 |             "/",
 23 |             "--zmqport",
 24 |             result_dict["zmqport"],
 25 |         ]
 26 |         interface = MpiExecSpawner(cwd=None, cores=2, openmpi_oversubscribe=True)
 27 |         self.assertEqual(
 28 |             command_lst,
 29 |             interface.generate_command(
 30 |                 command_lst=[sys.executable, "/", "--zmqport", result_dict["zmqport"]]
 31 |             ),
 32 |         )
 33 |         self.assertEqual(result_dict, parse_arguments(command_lst))
 34 | 
 35 |     def test_command_slurm(self):
 36 |         result_dict = {
 37 |             "host": "127.0.0.1",
 38 |             "zmqport": "22",
 39 |         }
 40 |         command_lst = [
 41 |             "srun",
 42 |             "-n",
 43 |             "2",
 44 |             "-D",
 45 |             os.path.abspath("."),
 46 |             "--gpus-per-task=1",
 47 |             "--oversubscribe",
 48 |             sys.executable,
 49 |             "/",
 50 |             "--host",
 51 |             result_dict["host"],
 52 |             "--zmqport",
 53 |             result_dict["zmqport"],
 54 |         ]
 55 |         interface = SrunSpawner(
 56 |             cwd=os.path.abspath("."),
 57 |             cores=2,
 58 |             gpus_per_core=1,
 59 |             openmpi_oversubscribe=True,
 60 |         )
 61 |         self.assertEqual(
 62 |             command_lst,
 63 |             interface.generate_command(
 64 |                 command_lst=[
 65 |                     sys.executable,
 66 |                     "/",
 67 |                     "--host",
 68 |                     result_dict["host"],
 69 |                     "--zmqport",
 70 |                     result_dict["zmqport"],
 71 |                 ]
 72 |             ),
 73 |         )
 74 |         self.assertEqual(result_dict, parse_arguments(command_lst))
 75 | 
 76 |     def test_command_slurm_user_command(self):
 77 |         result_dict = {
 78 |             "host": "127.0.0.1",
 79 |             "zmqport": "22",
 80 |         }
 81 |         command_lst = [
 82 |             "srun",
 83 |             "-n",
 84 |             "2",
 85 |             "-D",
 86 |             os.path.abspath("."),
 87 |             "--gpus-per-task=1",
 88 |             "--oversubscribe",
 89 |             "--account=test",
 90 |             "--job-name=executorlib",
 91 |             sys.executable,
 92 |             "/",
 93 |             "--host",
 94 |             result_dict["host"],
 95 |             "--zmqport",
 96 |             result_dict["zmqport"],
 97 |         ]
 98 |         interface = SrunSpawner(
 99 |             cwd=os.path.abspath("."),
100 |             cores=2,
101 |             gpus_per_core=1,
102 |             openmpi_oversubscribe=True,
103 |             slurm_cmd_args=["--account=test", "--job-name=executorlib"],
104 |         )
105 |         self.assertEqual(
106 |             command_lst,
107 |             interface.generate_command(
108 |                 command_lst=[
109 |                     sys.executable,
110 |                     "/",
111 |                     "--host",
112 |                     result_dict["host"],
113 |                     "--zmqport",
114 |                     result_dict["zmqport"],
115 |                 ]
116 |             ),
117 |         )
118 |         self.assertEqual(result_dict, parse_arguments(command_lst))
119 | 


--------------------------------------------------------------------------------
/tests/test_standalone_interactive_communication.py:
--------------------------------------------------------------------------------
  1 | import importlib.util
  2 | import os
  3 | import sys
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | import zmq
  8 | 
  9 | from executorlib.standalone.interactive.communication import (
 10 |     interface_connect,
 11 |     interface_shutdown,
 12 |     interface_send,
 13 |     interface_receive,
 14 |     SocketInterface,
 15 | )
 16 | from executorlib.standalone.serialize import cloudpickle_register
 17 | from executorlib.standalone.interactive.spawner import MpiExecSpawner
 18 | 
 19 | 
 20 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None
 21 | 
 22 | 
 23 | def calc(i):
 24 |     return np.array(i**2)
 25 | 
 26 | 
 27 | class TestInterface(unittest.TestCase):
 28 |     @unittest.skipIf(
 29 |         skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped."
 30 |     )
 31 |     def test_interface_mpi(self):
 32 |         cloudpickle_register(ind=1)
 33 |         task_dict = {"fn": calc, "args": (), "kwargs": {"i": 2}}
 34 |         interface = SocketInterface(
 35 |             spawner=MpiExecSpawner(cwd=None, cores=1, openmpi_oversubscribe=False)
 36 |         )
 37 |         interface.bootup(
 38 |             command_lst=[
 39 |                 sys.executable,
 40 |                 os.path.abspath(
 41 |                     os.path.join(
 42 |                         __file__,
 43 |                         "..",
 44 |                         "..",
 45 |                         "executorlib",
 46 |                         "backend",
 47 |                         "interactive_parallel.py",
 48 |                     )
 49 |                 ),
 50 |                 "--zmqport",
 51 |                 str(interface.bind_to_random_port()),
 52 |             ]
 53 |         )
 54 |         self.assertEqual(
 55 |             interface.send_and_receive_dict(input_dict=task_dict), np.array(4)
 56 |         )
 57 |         interface.shutdown(wait=True)
 58 | 
 59 |     def test_interface_serial(self):
 60 |         cloudpickle_register(ind=1)
 61 |         task_dict = {"fn": calc, "args": (), "kwargs": {"i": 2}}
 62 |         interface = SocketInterface(
 63 |             spawner=MpiExecSpawner(cwd=None, cores=1, openmpi_oversubscribe=False)
 64 |         )
 65 |         interface.bootup(
 66 |             command_lst=[
 67 |                 sys.executable,
 68 |                 os.path.abspath(
 69 |                     os.path.join(
 70 |                         __file__,
 71 |                         "..",
 72 |                         "..",
 73 |                         "executorlib",
 74 |                         "backend",
 75 |                         "interactive_serial.py",
 76 |                     )
 77 |                 ),
 78 |                 "--zmqport",
 79 |                 str(interface.bind_to_random_port()),
 80 |             ]
 81 |         )
 82 |         self.assertEqual(
 83 |             interface.send_and_receive_dict(input_dict=task_dict), np.array(4)
 84 |         )
 85 |         interface.shutdown(wait=True)
 86 | 
 87 | 
 88 | class TestZMQ(unittest.TestCase):
 89 |     def test_interface_receive(self):
 90 |         self.assertEqual(len(interface_receive(socket=None)), 0)
 91 | 
 92 |     def test_initialize_zmq(self):
 93 |         message = "test"
 94 |         host = "localhost"
 95 | 
 96 |         context_server = zmq.Context()
 97 |         socket_server = context_server.socket(zmq.PAIR)
 98 |         port = str(socket_server.bind_to_random_port("tcp://*"))
 99 |         context_client, socket_client = interface_connect(host=host, port=port)
100 |         interface_send(socket=socket_server, result_dict={"message": message})
101 |         self.assertEqual(interface_receive(socket=socket_client), {"message": message})
102 |         interface_shutdown(socket=socket_client, context=context_client)
103 |         interface_shutdown(socket=socket_server, context=context_server)
104 | 


--------------------------------------------------------------------------------