├── .ci_support ├── check.py ├── environment-docs.yml ├── environment-integration.yml ├── environment-mini.yml ├── environment-mpich.yml ├── environment-notebooks.yml ├── environment-old.yml ├── environment-openmpi.yml ├── environment-uml.yml ├── environment-win.yml └── release.py ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── dependabot.yml │ ├── deploy.yml │ └── pipeline.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── README.md ├── binder ├── environment.yml ├── kernel.json └── postBuild ├── codemeta.json ├── docs ├── _config.yml ├── _templates │ ├── custom-class-template.rst │ └── custom-module-template.rst ├── _toc.yml ├── api.rst ├── application.md ├── images │ └── pyiron-logo.png ├── installation.md ├── paper │ ├── paper.bib │ ├── paper.md │ └── process.png └── trouble_shooting.md ├── executorlib ├── __init__.py ├── _version.py ├── api.py ├── backend │ ├── __init__.py │ ├── cache_parallel.py │ ├── cache_serial.py │ ├── interactive_parallel.py │ └── interactive_serial.py ├── executor │ ├── __init__.py │ ├── base.py │ ├── flux.py │ ├── single.py │ └── slurm.py ├── standalone │ ├── __init__.py │ ├── cache.py │ ├── command.py │ ├── inputcheck.py │ ├── interactive │ │ ├── __init__.py │ │ ├── arguments.py │ │ ├── backend.py │ │ ├── communication.py │ │ └── spawner.py │ ├── plot.py │ ├── queue.py │ └── serialize.py └── task_scheduler │ ├── __init__.py │ ├── base.py │ ├── file │ ├── __init__.py │ ├── backend.py │ ├── hdf.py │ ├── queue_spawner.py │ ├── shared.py │ ├── subprocess_spawner.py │ └── task_scheduler.py │ └── interactive │ ├── __init__.py │ ├── blockallocation.py │ ├── dependency.py │ ├── fluxspawner.py │ ├── onetoone.py │ ├── shared.py │ └── slurmspawner.py ├── notebooks ├── 1-single-node.ipynb ├── 2-hpc-cluster.ipynb ├── 3-hpc-job.ipynb ├── 4-developer.ipynb ├── 5-1-gpaw.ipynb ├── 5-2-quantum-espresso.ipynb └── images │ ├── uml_executor.png │ └── uml_spawner.png ├── pyproject.toml └── tests ├── __init__.py ├── benchmark ├── llh.py └── test_results.py ├── executables └── count.py ├── integration └── Al.pbe-n-kjpaw_psl.1.0.0.UPF ├── test_backend_interactive_serial.py ├── test_base_executor_queue.py ├── test_cache_backend_execute.py ├── test_cache_fileexecutor_mpi.py ├── test_cache_fileexecutor_serial.py ├── test_fluxclusterexecutor.py ├── test_fluxjobexecutor.py ├── test_fluxjobexecutor_plot.py ├── test_fluxpythonspawner.py ├── test_integration_pyiron_workflow.py ├── test_interactive_dependencies.py ├── test_interactive_slurmspawner.py ├── test_mpiexecspawner.py ├── test_singlenodeexecutor_cache.py ├── test_singlenodeexecutor_dependencies.py ├── test_singlenodeexecutor_mpi.py ├── test_singlenodeexecutor_noblock.py ├── test_singlenodeexecutor_plot_dependency.py ├── test_singlenodeexecutor_resize.py ├── test_singlenodeexecutor_shell_executor.py ├── test_singlenodeexecutor_shell_interactive.py ├── test_standalone_hdf.py ├── test_standalone_inputcheck.py ├── test_standalone_interactive_arguments.py ├── test_standalone_interactive_backend.py └── test_standalone_interactive_communication.py /.ci_support/check.py: -------------------------------------------------------------------------------- 1 | import tomlkit 2 | 3 | 4 | if __name__ == "__main__": 5 | with open("pyproject.toml", "r") as f: 6 | data = tomlkit.load(f) 7 | 8 | lst = list(data['build-system']['requires']) 9 | for sub_lst in data["project"]["optional-dependencies"].values(): 10 | for el in sub_lst: 11 | lst.append(el) 12 | 13 | data["project"]["dependencies"] += list(set(lst)) 14 | 15 | with open("pyproject.toml", "w") as f: 16 | f.writelines(tomlkit.dumps(data)) -------------------------------------------------------------------------------- /.ci_support/environment-docs.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - hatchling 5 | - hatch-vcs 6 | - nbsphinx 7 | - sphinx 8 | - sphinx_rtd_theme 9 | - myst-parser 10 | - numpy 11 | - openmpi 12 | - cloudpickle =3.1.1 13 | - mpi4py =4.0.1 14 | - pyzmq =26.4.0 15 | - flux-core 16 | - jupyter-book =1.0.0 17 | - h5py =3.13.0 18 | - python =3.12 19 | -------------------------------------------------------------------------------- /.ci_support/environment-integration.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - jupyter 5 | - papermill 6 | - numpy 7 | - openmpi 8 | - cloudpickle =3.1.1 9 | - mpi4py 10 | - pyzmq =26.3.0 11 | - flux-core =0.59.0 12 | - hatchling =1.27.0 13 | - hatch-vcs =0.5.0 14 | - h5py 15 | - matplotlib =3.10.0 16 | - networkx =3.4.2 17 | - pygraphviz =1.14 18 | - pysqa =0.2.4 19 | - ipython =9.0.2 20 | - atomistics =0.2.4 21 | - qe =7.2 22 | - gpaw =24.6.0 23 | -------------------------------------------------------------------------------- /.ci_support/environment-mini.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy 6 | - cloudpickle =3.1.0 7 | - pyzmq =26.2.0 8 | - hatchling =1.27.0 9 | - hatch-vcs =0.5.0 10 | -------------------------------------------------------------------------------- /.ci_support/environment-mpich.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy 6 | - mpich 7 | - cloudpickle =3.1.1 8 | - mpi4py =4.0.1 9 | - pyzmq =26.4.0 10 | - h5py =3.13.0 11 | - networkx =3.4.2 12 | - pygraphviz =1.14 13 | - ipython =9.0.2 14 | - pysqa =0.2.6 15 | - hatchling =1.27.0 16 | - hatch-vcs =0.5.0 17 | -------------------------------------------------------------------------------- /.ci_support/environment-notebooks.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - jupyter 5 | - papermill -------------------------------------------------------------------------------- /.ci_support/environment-old.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy =1.23.5 6 | - openmpi =4.1.4 7 | - cloudpickle =2.0.0 8 | - mpi4py =3.1.4 9 | - pyzmq =25.0.0 10 | - h5py =3.6.0 11 | - networkx =2.8.8 12 | - ipython =7.33.0 13 | - pygraphviz =1.10 14 | - hatchling =1.27.0 15 | - hatch-vcs =0.4.0 16 | -------------------------------------------------------------------------------- /.ci_support/environment-openmpi.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy 6 | - openmpi 7 | - cloudpickle =3.1.1 8 | - mpi4py =4.0.1 9 | - pyzmq =26.4.0 10 | - h5py =3.13.0 11 | - networkx =3.4.2 12 | - pygraphviz =1.14 13 | - pysqa =0.2.6 14 | - ipython =9.0.2 15 | - hatchling =1.27.0 16 | - hatch-vcs =0.5.0 17 | -------------------------------------------------------------------------------- /.ci_support/environment-uml.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - pylint 6 | - graphviz -------------------------------------------------------------------------------- /.ci_support/environment-win.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy 6 | - msmpi 7 | - cloudpickle =3.1.1 8 | - mpi4py =4.0.1 9 | - pyzmq =26.4.0 10 | - h5py =3.13.0 11 | - networkx =3.4.2 12 | - pygraphviz =1.14 13 | - ipython =9.0.2 14 | - hatchling =1.27.0 15 | - hatch-vcs =0.5.0 16 | -------------------------------------------------------------------------------- /.ci_support/release.py: -------------------------------------------------------------------------------- 1 | def get_setup_version_and_pattern(setup_content): 2 | depend_lst, version_lst = [], [] 3 | for l in setup_content: 4 | if "==" in l: 5 | lst = ( 6 | l.split("= [")[-1] 7 | .split("]\n")[0] 8 | .replace(" ", "") 9 | .replace('"', "") 10 | .replace("'", "") 11 | .split(",") 12 | ) 13 | for dep in lst: 14 | if dep != "\n": 15 | version_lst.append(dep.split("==")[1]) 16 | depend_lst.append(dep.split("==")[0]) 17 | 18 | version_high_dict = {d: v for d, v in zip(depend_lst, version_lst)} 19 | return version_high_dict 20 | 21 | 22 | def get_env_version(env_content): 23 | read_flag = False 24 | depend_lst, version_lst = [], [] 25 | for l in env_content: 26 | if "dependencies:" in l: 27 | read_flag = True 28 | elif read_flag: 29 | lst = l.replace("-", "").replace(" ", "").replace("\n", "").split("=") 30 | if len(lst) == 2: 31 | depend_lst.append(lst[0]) 32 | version_lst.append(lst[1]) 33 | return {d: v for d, v in zip(depend_lst, version_lst)} 34 | 35 | 36 | def update_dependencies(setup_content, version_low_dict, version_high_dict): 37 | version_combo_dict = {} 38 | for dep, ver in version_high_dict.items(): 39 | if dep in version_low_dict.keys() and version_low_dict[dep] != ver: 40 | version_combo_dict[dep] = dep + ">=" + version_low_dict[dep] + ",<=" + ver 41 | else: 42 | version_combo_dict[dep] = dep + "==" + ver 43 | 44 | setup_content_new = "" 45 | pattern_dict = {d: d + "==" + v for d, v in version_high_dict.items()} 46 | for l in setup_content: 47 | for k, v in pattern_dict.items(): 48 | if v in l: 49 | l = l.replace(v, version_combo_dict[k]) 50 | setup_content_new += l 51 | return setup_content_new 52 | 53 | 54 | if __name__ == "__main__": 55 | with open("pyproject.toml", "r") as f: 56 | setup_content = f.readlines() 57 | 58 | with open("environment.yml", "r") as f: 59 | env_content = f.readlines() 60 | 61 | setup_content_new = update_dependencies( 62 | setup_content=setup_content[2:], 63 | version_low_dict=get_env_version(env_content=env_content), 64 | version_high_dict=get_setup_version_and_pattern( 65 | setup_content=setup_content[2:] 66 | ), 67 | ) 68 | 69 | with open("pyproject.toml", "w") as f: 70 | f.writelines("".join(setup_content[:2]) + setup_content_new) 71 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @jan-janssen 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 -------------------------------------------------------------------------------- /.github/workflows/dependabot.yml: -------------------------------------------------------------------------------- 1 | name: Update Dependabot 2 | 3 | on: 4 | pull_request_target: 5 | branches: [ main ] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | if: (github.actor == 'dependabot[bot]') 11 | steps: 12 | - uses: actions/checkout@v4 13 | with: 14 | ref: ${{ github.event.pull_request.head.ref }} # Check out the head of the actual branch, not the PR 15 | fetch-depth: 0 # otherwise, you will fail to push refs to dest repo 16 | token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} 17 | - name: UpdateEnvironmentFile 18 | env: 19 | PR_TITLE: ${{ github.event.pull_request.title }} 20 | shell: bash -l {0} 21 | run: | 22 | package=$(echo "$PR_TITLE" | awk '{print $2}') 23 | from=$(echo "$PR_TITLE" | awk '{print $4}') 24 | to=$(echo "$PR_TITLE" | awk '{print $6}') 25 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mpich.yml 26 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-openmpi.yml 27 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-win.yml 28 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-docs.yml 29 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-mini.yml 30 | sed -i "/${package}/s/${from}/${to}/g" .ci_support/environment-integration.yml 31 | sed -i "/${package}/s/${from}/${to}/g" binder/environment.yml 32 | - name: UpdateDependabotPR commit 33 | run: | 34 | git config --local user.email "pyiron@mpie.de" 35 | git config --local user.name "pyironrunner" 36 | git commit -m "[dependabot skip] Update environment" -a 37 | - name: UpdateDependabotPR push 38 | uses: ad-m/github-push-action@master 39 | with: 40 | github_token: ${{ secrets.DEPENDABOT_WORKFLOW_TOKEN }} 41 | branch: ${{ github.event.pull_request.head.ref }} 42 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | # This workflow is used to upload and deploy a new release to PyPi 2 | # Based on https://github.com/pypa/gh-action-pypi-publish 3 | 4 | name: PyPi Release 5 | 6 | on: 7 | push: 8 | pull_request: 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build: 13 | if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' 14 | runs-on: ubuntu-latest 15 | environment: 16 | name: pypi 17 | url: https://pypi.org/p/${{ github.event.repository.name }} 18 | permissions: 19 | id-token: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Conda config 23 | run: echo -e "channels:\n - conda-forge\n" > .condarc 24 | - uses: conda-incubator/setup-miniconda@v3 25 | with: 26 | python-version: "3.13" 27 | miniforge-version: latest 28 | condarc-file: .condarc 29 | environment-file: .ci_support/environment-openmpi.yml 30 | - name: Build 31 | shell: bash -l {0} 32 | run: | 33 | cp .ci_support/environment-old.yml environment.yml 34 | python .ci_support/release.py; cat pyproject.toml 35 | git update-index --assume-unchanged pyproject.toml executorlib/_version.py 36 | hatchling build -t sdist -t wheel 37 | - name: Publish distribution 📦 to PyPI 38 | uses: pypa/gh-action-pypi-publish@release/v1 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | .coverage 4 | .idea/ 5 | .vscode/ 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.11.12 4 | hooks: 5 | - id: ruff 6 | name: ruff lint 7 | args: ["--fix"] 8 | files: ^executorlib/ 9 | - id: ruff-format 10 | name: ruff format 11 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | os: "ubuntu-24.04" 10 | tools: 11 | python: "mambaforge-23.11" 12 | jobs: 13 | pre_build: 14 | # Generate the Sphinx configuration for this Jupyter Book so it builds. 15 | - pip install . --no-deps --no-build-isolation 16 | - "cp README.md docs" 17 | - "cp notebooks/*.ipynb docs" 18 | - "cp -r notebooks/images docs" 19 | - "jupyter-book config sphinx docs/" 20 | 21 | # Build documentation in the docs/ directory with Sphinx 22 | sphinx: 23 | builder: html 24 | configuration: docs/conf.py 25 | 26 | # Optionally build your docs in additional formats such as PDF and ePub 27 | formats: [] 28 | 29 | # Install executorlib from conda 30 | conda: 31 | environment: .ci_support/environment-docs.yml 32 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | authors: 3 | - family-names: Janssen 4 | given-names: Jan 5 | orcid: "https://orcid.org/0000-0001-9948-7119" 6 | - family-names: Taylor 7 | given-names: Michael Gilbert 8 | orcid: "https://orcid.org/0000-0003-4327-2746" 9 | - family-names: Yang 10 | given-names: Ping 11 | orcid: "https://orcid.org/0000-0003-4726-2860" 12 | - family-names: Neugebauer 13 | given-names: Joerg 14 | orcid: "https://orcid.org/0000-0002-7903-2472" 15 | - family-names: Perez 16 | given-names: Danny 17 | orcid: "https://orcid.org/0000-0003-3028-5249" 18 | doi: 10.5281/zenodo.15121422 19 | message: If you use this software, please cite our article in the 20 | Journal of Open Source Software. 21 | preferred-citation: 22 | authors: 23 | - family-names: Janssen 24 | given-names: Jan 25 | orcid: "https://orcid.org/0000-0001-9948-7119" 26 | - family-names: Taylor 27 | given-names: Michael Gilbert 28 | orcid: "https://orcid.org/0000-0003-4327-2746" 29 | - family-names: Yang 30 | given-names: Ping 31 | orcid: "https://orcid.org/0000-0003-4726-2860" 32 | - family-names: Neugebauer 33 | given-names: Joerg 34 | orcid: "https://orcid.org/0000-0002-7903-2472" 35 | - family-names: Perez 36 | given-names: Danny 37 | orcid: "https://orcid.org/0000-0003-3028-5249" 38 | date-published: 2025-04-01 39 | doi: 10.21105/joss.07782 40 | issn: 2475-9066 41 | issue: 108 42 | journal: Journal of Open Source Software 43 | publisher: 44 | name: Open Journals 45 | start: 7782 46 | title: Executorlib -- Up-scaling Python workflows for hierarchical 47 | heterogenous high-performance computing 48 | type: article 49 | url: "https://joss.theoj.org/papers/10.21105/joss.07782" 50 | volume: 10 51 | title: Executorlib -- Up-scaling Python workflows for hierarchical 52 | heterogenous high-performance computing 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Jan Janssen 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python 5 | - numpy 6 | - openmpi 7 | - cloudpickle =3.1.1 8 | - mpi4py =4.0.1 9 | - pyzmq =26.4.0 10 | - flux-core =0.59.0 11 | - flux-pmix =0.5.0 12 | - hatchling =1.27.0 13 | - hatch-vcs =0.5.0 14 | - h5py =3.12.1 15 | - matplotlib =3.10.0 16 | - networkx =3.4.2 17 | - pygraphviz =1.14 18 | - pysqa =0.2.6 19 | - ipython =9.0.2 20 | -------------------------------------------------------------------------------- /binder/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": [ 3 | "flux", 4 | "start", 5 | "/srv/conda/envs/notebook/bin/python", 6 | "-m", 7 | "ipykernel_launcher", 8 | "-f", 9 | "{connection_file}" 10 | ], 11 | "display_name": "Flux", 12 | "language": "python", 13 | "metadata": { 14 | "debugger": true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | # jupyter kernel 2 | mkdir -p /home/jovyan/.local/share/jupyter/kernels/flux 3 | cp binder/kernel.json /home/jovyan/.local/share/jupyter/kernels/flux 4 | 5 | # install executorlib 6 | pip install . --no-deps --no-build-isolation 7 | 8 | # copy notebooks 9 | mv notebooks/*.ipynb . 10 | mv notebooks/images . 11 | 12 | # clean up 13 | rm -rf .ci_support .github binder docs notebooks executorlib executorlib.egg-info tests .coveralls.yml .gitignore .readthedocs.yml LICENSE MANIFEST.in README.md pyproject.toml setup.py build 14 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", 3 | "@type": "Code", 4 | "author": [ 5 | { 6 | "@id": "https://orcid.org/0000-0001-9948-7119", 7 | "@type": "Person", 8 | "email": "j.janssen@mpi-susmat.de", 9 | "name": "Jan Janssen", 10 | "affiliation": "Max Planck Institute for Sustainable Materials, Düsseldorf, Germany" 11 | }, 12 | { 13 | "@id": "https://orcid.org/0000-0003-4327-2746", 14 | "@type": "Person", 15 | "email": "mgt16@lanl.gov", 16 | "name": "Michael Gilbert Taylor", 17 | "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America" 18 | }, 19 | { 20 | "@id": "https://orcid.org/0000-0003-4726-2860", 21 | "@type": "Person", 22 | "email": "pyang@lanl.gov", 23 | "name": "Ping Yang", 24 | "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America" 25 | }, 26 | { 27 | "@id": "https://orcid.org/0000-0002-7903-2472", 28 | "@type": "Person", 29 | "email": "j.neugebauer@mpi-susmat.de", 30 | "name": "Joerg Neugebauer", 31 | "affiliation": "Max Planck Institute for Sustainable Materials, Düsseldorf, Germany" 32 | }, 33 | { 34 | "@id": "https://orcid.org/0000-0003-3028-5249", 35 | "@type": "Person", 36 | "email": "danny_perez@lanl.gov", 37 | "name": "Danny Perez", 38 | "affiliation": "Los Alamos National Laboratory, Los Alamos, NM, United States of America" 39 | } 40 | ], 41 | "identifier": "", 42 | "codeRepository": "https://github.com/pyiron/executorlib", 43 | "datePublished": "2025-02-14", 44 | "dateModified": "2025-02-14", 45 | "dateCreated": "2025-02-14", 46 | "description": "Up-scale python functions for high performance computing (HPC) with executorlib.", 47 | "keywords": "Python, High Performance Computing, Task Scheduling", 48 | "license": "BSD", 49 | "title": "executorlib", 50 | "version": "0.3.0" 51 | } 52 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | title: executorlib 2 | author: Jan Janssen 3 | 4 | execute: 5 | execute_notebooks : off 6 | 7 | repository: 8 | url : https://github.com/pyiron/executorlib 9 | html: 10 | use_repository_button: true 11 | 12 | launch_buttons: 13 | notebook_interface : jupyterlab 14 | binderhub_url : https://mybinder.org 15 | 16 | sphinx: 17 | extra_extensions: 18 | - 'sphinx.ext.autodoc' 19 | - 'sphinx.ext.napoleon' 20 | - 'sphinx.ext.viewcode' 21 | - 'sphinx.ext.autodoc' 22 | - 'sphinx.ext.autosummary' 23 | config: 24 | autosummary_generate: True 25 | templates_path: ['_templates'] 26 | -------------------------------------------------------------------------------- /docs/_templates/custom-class-template.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} 6 | :members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | {% block methods %} 11 | .. automethod:: __init__ 12 | 13 | {% if methods %} 14 | .. rubric:: {{ _('Methods') }} 15 | 16 | .. autosummary:: 17 | {% for item in methods %} 18 | ~{{ name }}.{{ item }} 19 | {%- endfor %} 20 | {% endif %} 21 | {% endblock %} 22 | 23 | {% block attributes %} 24 | {% if attributes %} 25 | .. rubric:: {{ _('Attributes') }} 26 | 27 | .. autosummary:: 28 | {% for item in attributes %} 29 | ~{{ name }}.{{ item }} 30 | {%- endfor %} 31 | {% endif %} 32 | {% endblock %} -------------------------------------------------------------------------------- /docs/_templates/custom-module-template.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. automodule:: {{ fullname }} 4 | 5 | {% block attributes %} 6 | {% if attributes %} 7 | .. rubric:: Module Attributes 8 | 9 | .. autosummary:: 10 | :toctree: 11 | {% for item in attributes %} 12 | {{ item }} 13 | {%- endfor %} 14 | {% endif %} 15 | {% endblock %} 16 | 17 | {% block functions %} 18 | {% if functions %} 19 | .. rubric:: {{ _('Functions') }} 20 | 21 | .. autosummary:: 22 | :toctree: 23 | {% for item in functions %} 24 | {{ item }} 25 | {%- endfor %} 26 | {% endif %} 27 | {% endblock %} 28 | 29 | {% block classes %} 30 | {% if classes %} 31 | .. rubric:: {{ _('Classes') }} 32 | 33 | .. autosummary:: 34 | :toctree: 35 | :template: custom-class-template.rst 36 | {% for item in classes %} 37 | {{ item }} 38 | {%- endfor %} 39 | {% endif %} 40 | {% endblock %} 41 | 42 | {% block exceptions %} 43 | {% if exceptions %} 44 | .. rubric:: {{ _('Exceptions') }} 45 | 46 | .. autosummary:: 47 | :toctree: 48 | {% for item in exceptions %} 49 | {{ item }} 50 | {%- endfor %} 51 | {% endif %} 52 | {% endblock %} 53 | 54 | {% block modules %} 55 | {% if modules %} 56 | .. rubric:: Modules 57 | 58 | .. autosummary:: 59 | :toctree: 60 | :template: custom-module-template.rst 61 | :recursive: 62 | {% for item in modules %} 63 | {{ item }} 64 | {%- endfor %} 65 | {% endif %} 66 | {% endblock %} -------------------------------------------------------------------------------- /docs/_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-book 2 | root: README 3 | chapters: 4 | - file: installation.md 5 | - file: 1-single-node.ipynb 6 | - file: 2-hpc-cluster.ipynb 7 | - file: 3-hpc-job.ipynb 8 | - file: application.md 9 | sections: 10 | - file: 5-1-gpaw.ipynb 11 | - file: 5-2-quantum-espresso.ipynb 12 | - file: trouble_shooting.md 13 | - file: 4-developer.ipynb 14 | - file: api.rst 15 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | Interface 2 | ========= 3 | 4 | Documentation of the classes and functions defined in the :code:`executorlib` package. 5 | 6 | .. autosummary:: 7 | :toctree: _autosummary 8 | :template: custom-module-template.rst 9 | :recursive: 10 | 11 | executorlib -------------------------------------------------------------------------------- /docs/application.md: -------------------------------------------------------------------------------- 1 | # Application 2 | While `executorlib` is designed to up-scale any Python function for high performance computing (HPC), it was initially 3 | developed to accelerate atomistic computational materials science simulation. To demonstrate the usage of `executorlib` 4 | in the context of atomistic simulation, it is combined with [atomistics](https://atomistics.readthedocs.io/) and the 5 | [atomic simulation environment (ASE)](https://wiki.fysik.dtu.dk/ase/) to calculate the bulk modulus with two density 6 | functional theory simulation codes [gpaw](https://gpaw.readthedocs.io/index.html) and [quantum espresso](https://www.quantum-espresso.org). 7 | The bulk modulus is calculated by uniformly deforming a supercell of atoms and measuring the change in total energy 8 | during compression and elongation. The first derivative of this curve is the pressure and the second derivative is 9 | proportional to the bulk modulus. Other material properties like the heat capacity, thermal expansion or thermal conductivity 10 | can be calculated in similar ways following the [atomistics](https://atomistics.readthedocs.io/) documentation. 11 | -------------------------------------------------------------------------------- /docs/images/pyiron-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/docs/images/pyiron-logo.png -------------------------------------------------------------------------------- /docs/paper/process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/docs/paper/process.png -------------------------------------------------------------------------------- /docs/trouble_shooting.md: -------------------------------------------------------------------------------- 1 | # Trouble Shooting 2 | Some of the most frequent issues are covered below, for everything else do not be shy and [open an issue on Github](https://github.com/pyiron/executorlib/issues). 3 | 4 | ## Filesystem Usage 5 | The cache of executorlib is not removed after the Python process completed. So it is the responsibility of the user to 6 | clean up the cache directory they created. This can be easily forgot, so it is important to check for remaining cache 7 | directories from time to time and remove them. 8 | 9 | ## Firewall Issues 10 | MacOS comes with a rather strict firewall, which does not allow to connect to an MacOS computer using the hostname even 11 | if it is the hostname of the current computer. MacOS only supports connections based on the hostname `localhost`. To use 12 | `localhost` rather than the hostname to connect to the Python processes executorlib uses for the execution of the Python 13 | function, executorlib provides the option to set `hostname_localhost=True`. For MacOS this option is enabled by default, 14 | still if other operating systems implement similar strict firewall rules, the option can also be set manually to enabled 15 | local mode on computers with strict firewall rules. 16 | 17 | ## Message Passing Interface 18 | To use the message passing interface (MPI) executorlib requires [mpi4py](https://mpi4py.readthedocs.io/) as optional 19 | dependency. The installation of this and other optional dependencies is covered in the [installation section](https://executorlib.readthedocs.io/en/latest/installation.html#mpi-support). 20 | 21 | ## Missing Dependencies 22 | The default installation of executorlib only comes with a limited number of dependencies, especially the [zero message queue](https://zeromq.org) 23 | and [cloudpickle](https://github.com/cloudpipe/cloudpickle). Additional features like [caching](https://executorlib.readthedocs.io/en/latest/installation.html#caching), the [HPC Cluster Executors](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-cluster-executor) 24 | and the [HPC Job Executors](https://executorlib.readthedocs.io/en/latest/installation.html#hpc-job-executor) require 25 | additional dependencies. The dependencies are explained in more detail in the 26 | [installation section](https://executorlib.readthedocs.io/en/latest/installation.html). 27 | 28 | ## Python Version 29 | Executorlib supports all current Python version ranging from 3.9 to 3.13. Still some of the dependencies and especially 30 | the [flux](http://flux-framework.org) job scheduler are currently limited to Python 3.12 and below. Consequently for high 31 | performance computing installations Python 3.12 is the recommended Python verion. 32 | 33 | ## Resource Dictionary 34 | The resource dictionary parameter `resource_dict` can contain one or more of the following options: 35 | * `cores` (int): number of MPI cores to be used for each function call 36 | * `threads_per_core` (int): number of OpenMP threads to be used for each function call 37 | * `gpus_per_core` (int): number of GPUs per worker - defaults to 0 38 | * `cwd` (str/None): current working directory where the parallel python task is executed 39 | * `openmpi_oversubscribe` (bool): adds the `--oversubscribe` command line flag (OpenMPI and SLURM only) - default False 40 | * `slurm_cmd_args` (list): Additional command line arguments for the srun call (SLURM only) 41 | 42 | For the special case of the [HPC Job Executor](https://executorlib.readthedocs.io/en/latest/3-hpc-job.html) 43 | the resource dictionary parameter `resource_dict` can also include additional parameters define in the submission script 44 | of the [Python simple queuing system adatper (pysqa)](https://pysqa.readthedocs.io) these include but are not limited to: 45 | * `run_time_max` (int): the maximum time the execution of the submitted Python function is allowed to take in seconds. 46 | * `memory_max` (int): the maximum amount of memory the Python function is allowed to use in Gigabytes. 47 | * `partition` (str): the partition of the queuing system the Python function is submitted to. 48 | * `queue` (str): the name of the queue the Python function is submitted to. 49 | 50 | All parameters in the resource dictionary `resource_dict` are optional. 51 | 52 | ## SSH Connection 53 | While the [Python simple queuing system adatper (pysqa)](https://pysqa.readthedocs.io) provides the option to connect to 54 | high performance computing (HPC) clusters via SSH, this functionality is not supported for executorlib. The background 55 | is the use of [cloudpickle](https://github.com/cloudpipe/cloudpickle) for serialization inside executorlib, this requires 56 | the same Python version and dependencies on both computer connected via SSH. As tracking those parameters is rather 57 | complicated the SSH connection functionality of [pysqa](https://pysqa.readthedocs.io) is not officially supported in 58 | executorlib. 59 | -------------------------------------------------------------------------------- /executorlib/__init__.py: -------------------------------------------------------------------------------- 1 | from executorlib.executor.base import BaseExecutor 2 | from executorlib.executor.flux import ( 3 | FluxClusterExecutor, 4 | FluxJobExecutor, 5 | ) 6 | from executorlib.executor.single import SingleNodeExecutor 7 | from executorlib.executor.slurm import ( 8 | SlurmClusterExecutor, 9 | SlurmJobExecutor, 10 | ) 11 | from executorlib.standalone.cache import get_cache_data 12 | 13 | from . import _version 14 | 15 | __all__: list[str] = [ 16 | "get_cache_data", 17 | "BaseExecutor", 18 | "FluxJobExecutor", 19 | "FluxClusterExecutor", 20 | "SingleNodeExecutor", 21 | "SlurmJobExecutor", 22 | "SlurmClusterExecutor", 23 | ] 24 | 25 | __version__ = _version.__version__ 26 | -------------------------------------------------------------------------------- /executorlib/_version.py: -------------------------------------------------------------------------------- 1 | # file generated by setuptools-scm 2 | # don't change, don't track in version control 3 | 4 | __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"] 5 | 6 | TYPE_CHECKING = False 7 | if TYPE_CHECKING: 8 | from typing import Tuple 9 | from typing import Union 10 | 11 | VERSION_TUPLE = Tuple[Union[int, str], ...] 12 | else: 13 | VERSION_TUPLE = object 14 | 15 | version: str 16 | __version__: str 17 | __version_tuple__: VERSION_TUPLE 18 | version_tuple: VERSION_TUPLE 19 | 20 | __version__ = version = "0.0.1" 21 | __version_tuple__ = version_tuple = (0, 0, 1) 22 | -------------------------------------------------------------------------------- /executorlib/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | External application programming interface (API) following the semantic versioning this inteface is promised to remain 3 | stable during minor releases and any change in the interface leads to a major version bump. External libraries should 4 | only use the functionality in this API in combination with the user interface defined in the root __init__.py, all other 5 | functionality is considered internal and might change during minor releases. 6 | """ 7 | 8 | from executorlib.standalone.command import get_command_path 9 | from executorlib.standalone.interactive.communication import ( 10 | SocketInterface, 11 | interface_bootup, 12 | interface_connect, 13 | interface_receive, 14 | interface_send, 15 | interface_shutdown, 16 | ) 17 | from executorlib.standalone.interactive.spawner import MpiExecSpawner, SubprocessSpawner 18 | from executorlib.standalone.queue import cancel_items_in_queue 19 | from executorlib.standalone.serialize import cloudpickle_register 20 | 21 | __all__: list[str] = [ 22 | "cancel_items_in_queue", 23 | "cloudpickle_register", 24 | "get_command_path", 25 | "interface_bootup", 26 | "interface_connect", 27 | "interface_receive", 28 | "interface_send", 29 | "interface_shutdown", 30 | "MpiExecSpawner", 31 | "SocketInterface", 32 | "SubprocessSpawner", 33 | ] 34 | -------------------------------------------------------------------------------- /executorlib/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/backend/__init__.py -------------------------------------------------------------------------------- /executorlib/backend/cache_parallel.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import sys 3 | import time 4 | 5 | import cloudpickle 6 | 7 | from executorlib.task_scheduler.file.backend import ( 8 | backend_load_file, 9 | backend_write_file, 10 | ) 11 | 12 | 13 | def main() -> None: 14 | """ 15 | Main function for executing the cache_parallel script. 16 | 17 | This function uses MPI (Message Passing Interface) to distribute the execution of a function 18 | across multiple processes. It loads a file, broadcasts the data to all processes, executes 19 | the function, gathers the results (if there are multiple processes), and writes the output 20 | to a file. 21 | 22 | Args: 23 | None 24 | 25 | Returns: 26 | None 27 | """ 28 | from mpi4py import MPI 29 | 30 | MPI.pickle.__init__( # type: ignore 31 | cloudpickle.dumps, 32 | cloudpickle.loads, 33 | pickle.HIGHEST_PROTOCOL, 34 | ) 35 | mpi_rank_zero = MPI.COMM_WORLD.Get_rank() == 0 36 | mpi_size_larger_one = MPI.COMM_WORLD.Get_size() > 1 37 | file_name = sys.argv[1] 38 | 39 | time_start = time.time() 40 | apply_dict = {} 41 | if mpi_rank_zero: 42 | apply_dict = backend_load_file(file_name=file_name) 43 | apply_dict = MPI.COMM_WORLD.bcast(apply_dict, root=0) 44 | output = apply_dict["fn"].__call__(*apply_dict["args"], **apply_dict["kwargs"]) 45 | try: 46 | result = ( 47 | MPI.COMM_WORLD.gather(output, root=0) if mpi_size_larger_one else output 48 | ) 49 | except Exception as error: 50 | if mpi_rank_zero: 51 | backend_write_file( 52 | file_name=file_name, 53 | output={"error": error}, 54 | runtime=time.time() - time_start, 55 | ) 56 | else: 57 | if mpi_rank_zero: 58 | backend_write_file( 59 | file_name=file_name, 60 | output={"result": result}, 61 | runtime=time.time() - time_start, 62 | ) 63 | MPI.COMM_WORLD.Barrier() 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /executorlib/backend/cache_serial.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from executorlib.task_scheduler.file.backend import backend_execute_task_in_file 4 | 5 | if __name__ == "__main__": 6 | backend_execute_task_in_file(file_name=sys.argv[1]) 7 | -------------------------------------------------------------------------------- /executorlib/backend/interactive_parallel.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import sys 3 | from os.path import abspath 4 | from typing import Optional 5 | 6 | import cloudpickle 7 | import zmq 8 | 9 | from executorlib.standalone.interactive.backend import call_funct, parse_arguments 10 | from executorlib.standalone.interactive.communication import ( 11 | interface_connect, 12 | interface_receive, 13 | interface_send, 14 | interface_shutdown, 15 | ) 16 | 17 | 18 | def main() -> None: 19 | """ 20 | Entry point of the program. 21 | 22 | This function initializes MPI, sets up the necessary communication, and executes the requested functions. 23 | 24 | Returns: 25 | None 26 | """ 27 | from mpi4py import MPI 28 | 29 | MPI.pickle.__init__( # type: ignore 30 | cloudpickle.dumps, 31 | cloudpickle.loads, 32 | pickle.HIGHEST_PROTOCOL, 33 | ) 34 | mpi_rank_zero = MPI.COMM_WORLD.Get_rank() == 0 35 | mpi_size_larger_one = MPI.COMM_WORLD.Get_size() > 1 36 | 37 | argument_dict = parse_arguments(argument_lst=sys.argv) 38 | context: Optional[zmq.Context] = None 39 | socket: Optional[zmq.Socket] = None 40 | if mpi_rank_zero: 41 | context, socket = interface_connect( 42 | host=argument_dict["host"], port=argument_dict["zmqport"] 43 | ) 44 | 45 | memory = None 46 | 47 | # required for flux interface - otherwise the current path is not included in the python path 48 | cwd = abspath(".") 49 | if cwd not in sys.path: 50 | sys.path.insert(1, cwd) 51 | 52 | while True: 53 | # Read from socket 54 | input_dict: dict = {} 55 | if mpi_rank_zero: 56 | input_dict = interface_receive(socket=socket) 57 | input_dict = MPI.COMM_WORLD.bcast(input_dict, root=0) 58 | 59 | # Parse input 60 | if "shutdown" in input_dict and input_dict["shutdown"]: 61 | if mpi_rank_zero: 62 | interface_send(socket=socket, result_dict={"result": True}) 63 | interface_shutdown(socket=socket, context=context) 64 | MPI.COMM_WORLD.Barrier() 65 | break 66 | elif ( 67 | "fn" in input_dict 68 | and "init" not in input_dict 69 | and "args" in input_dict 70 | and "kwargs" in input_dict 71 | ): 72 | # Execute function 73 | try: 74 | output = call_funct(input_dict=input_dict, funct=None, memory=memory) 75 | if mpi_size_larger_one: 76 | output_reply = MPI.COMM_WORLD.gather(output, root=0) 77 | else: 78 | output_reply = output 79 | except Exception as error: 80 | if mpi_rank_zero: 81 | interface_send( 82 | socket=socket, 83 | result_dict={"error": error}, 84 | ) 85 | else: 86 | # Send output 87 | if mpi_rank_zero: 88 | interface_send(socket=socket, result_dict={"result": output_reply}) 89 | elif ( 90 | "init" in input_dict 91 | and input_dict["init"] 92 | and "args" in input_dict 93 | and "kwargs" in input_dict 94 | ): 95 | memory = call_funct(input_dict=input_dict, funct=None) 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /executorlib/backend/interactive_serial.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import abspath 3 | from typing import Optional 4 | 5 | from executorlib.standalone.interactive.backend import call_funct, parse_arguments 6 | from executorlib.standalone.interactive.communication import ( 7 | interface_connect, 8 | interface_receive, 9 | interface_send, 10 | interface_shutdown, 11 | ) 12 | 13 | 14 | def main(argument_lst: Optional[list[str]] = None): 15 | """ 16 | The main function of the program. 17 | 18 | Args: 19 | argument_lst (Optional[List[str]]): List of command line arguments. If None, sys.argv will be used. 20 | 21 | Returns: 22 | None 23 | """ 24 | if argument_lst is None: 25 | argument_lst = sys.argv 26 | argument_dict = parse_arguments(argument_lst=argument_lst) 27 | context, socket = interface_connect( 28 | host=argument_dict["host"], port=argument_dict["zmqport"] 29 | ) 30 | 31 | memory = None 32 | 33 | # required for flux interface - otherwise the current path is not included in the python path 34 | cwd = abspath(".") 35 | if cwd not in sys.path: 36 | sys.path.insert(1, cwd) 37 | 38 | while True: 39 | # Read from socket 40 | input_dict = interface_receive(socket=socket) 41 | 42 | # Parse input 43 | if "shutdown" in input_dict and input_dict["shutdown"]: 44 | interface_send(socket=socket, result_dict={"result": True}) 45 | interface_shutdown(socket=socket, context=context) 46 | break 47 | elif ( 48 | "fn" in input_dict 49 | and "init" not in input_dict 50 | and "args" in input_dict 51 | and "kwargs" in input_dict 52 | ): 53 | # Execute function 54 | try: 55 | output = call_funct(input_dict=input_dict, funct=None, memory=memory) 56 | except Exception as error: 57 | interface_send( 58 | socket=socket, 59 | result_dict={"error": error}, 60 | ) 61 | else: 62 | # Send output 63 | interface_send(socket=socket, result_dict={"result": output}) 64 | elif ( 65 | "init" in input_dict 66 | and input_dict["init"] 67 | and "args" in input_dict 68 | and "kwargs" in input_dict 69 | ): 70 | memory = call_funct(input_dict=input_dict, funct=None) 71 | 72 | 73 | if __name__ == "__main__": 74 | main(argument_lst=sys.argv) 75 | -------------------------------------------------------------------------------- /executorlib/executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/executor/__init__.py -------------------------------------------------------------------------------- /executorlib/executor/base.py: -------------------------------------------------------------------------------- 1 | import queue 2 | from abc import ABC 3 | from concurrent.futures import ( 4 | Executor as FutureExecutor, 5 | ) 6 | from concurrent.futures import ( 7 | Future, 8 | ) 9 | from typing import Callable, Optional 10 | 11 | from executorlib.task_scheduler.base import TaskSchedulerBase 12 | 13 | 14 | class BaseExecutor(FutureExecutor, ABC): 15 | """ 16 | Interface class for the executor. 17 | 18 | Args: 19 | executor (TaskSchedulerBase): internal executor 20 | """ 21 | 22 | def __init__(self, executor: TaskSchedulerBase): 23 | self._task_scheduler = executor 24 | 25 | @property 26 | def max_workers(self) -> Optional[int]: 27 | return self._task_scheduler.max_workers 28 | 29 | @max_workers.setter 30 | def max_workers(self, max_workers: int): 31 | self._task_scheduler.max_workers = max_workers 32 | 33 | @property 34 | def info(self) -> Optional[dict]: 35 | """ 36 | Get the information about the executor. 37 | 38 | Returns: 39 | Optional[dict]: Information about the executor. 40 | """ 41 | return self._task_scheduler.info 42 | 43 | @property 44 | def future_queue(self) -> Optional[queue.Queue]: 45 | """ 46 | Get the future queue. 47 | 48 | Returns: 49 | queue.Queue: The future queue. 50 | """ 51 | return self._task_scheduler.future_queue 52 | 53 | def submit( # type: ignore 54 | self, 55 | fn: Callable, 56 | /, 57 | *args, 58 | resource_dict: Optional[dict] = None, 59 | **kwargs, 60 | ) -> Future: 61 | """ 62 | Submits a callable to be executed with the given arguments. 63 | 64 | Schedules the callable to be executed as fn(*args, **kwargs) and returns 65 | a Future instance representing the execution of the callable. 66 | 67 | Args: 68 | fn (callable): function to submit for execution 69 | args: arguments for the submitted function 70 | kwargs: keyword arguments for the submitted function 71 | resource_dict (dict): resource dictionary, which defines the resources used for the execution of the 72 | function. Example resource dictionary: { 73 | cores: 1, 74 | threads_per_core: 1, 75 | gpus_per_worker: 0, 76 | oversubscribe: False, 77 | cwd: None, 78 | executor: None, 79 | hostname_localhost: False, 80 | } 81 | 82 | Returns: 83 | Future: A Future representing the given call. 84 | """ 85 | return self._task_scheduler.submit( 86 | *([fn] + list(args)), resource_dict=resource_dict, **kwargs 87 | ) 88 | 89 | def shutdown(self, wait: bool = True, *, cancel_futures: bool = False): 90 | """ 91 | Clean-up the resources associated with the Executor. 92 | 93 | It is safe to call this method several times. Otherwise, no other 94 | methods can be called after this one. 95 | 96 | Args: 97 | wait (bool): If True then shutdown will not return until all running 98 | futures have finished executing and the resources used by the 99 | parallel_executors have been reclaimed. 100 | cancel_futures (bool): If True then shutdown will cancel all pending 101 | futures. Futures that are completed or running will not be 102 | cancelled. 103 | """ 104 | self._task_scheduler.shutdown(wait=wait, cancel_futures=cancel_futures) 105 | 106 | def __len__(self) -> int: 107 | """ 108 | Get the length of the executor. 109 | 110 | Returns: 111 | int: The length of the executor. 112 | """ 113 | return len(self._task_scheduler) 114 | 115 | def __bool__(self): 116 | """ 117 | Overwrite length to always return True 118 | 119 | Returns: 120 | bool: Always return True 121 | """ 122 | return True 123 | 124 | def __exit__(self, *args, **kwargs) -> None: 125 | """ 126 | Exit method called when exiting the context manager. 127 | """ 128 | self._task_scheduler.__exit__(*args, **kwargs) 129 | -------------------------------------------------------------------------------- /executorlib/standalone/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Submodules in the executorlib.standalone module do not depend on other modules of the executorlib package. This strict 3 | separation simplifies the development, testing and debugging. The functionality in executorlib.standalone is designed 4 | to be used independently in other libraries. 5 | """ 6 | -------------------------------------------------------------------------------- /executorlib/standalone/cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cloudpickle 4 | 5 | group_dict = { 6 | "fn": "function", 7 | "args": "input_args", 8 | "kwargs": "input_kwargs", 9 | "output": "output", 10 | "error": "error", 11 | "runtime": "runtime", 12 | "queue_id": "queue_id", 13 | } 14 | 15 | 16 | def get_cache_data(cache_directory: str) -> list[dict]: 17 | """ 18 | Collect all HDF5 files in the cache directory 19 | 20 | Args: 21 | cache_directory (str): The directory to store cache files. 22 | 23 | Returns: 24 | list[dict]: List of dictionaries each representing on of the HDF5 files in the cache directory. 25 | """ 26 | import h5py 27 | import numpy as np 28 | 29 | file_lst = [] 30 | for task_key in os.listdir(cache_directory): 31 | file_name = os.path.join(cache_directory, task_key, "cache.h5out") 32 | os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True) 33 | if os.path.exists(file_name): 34 | with h5py.File(file_name, "r") as hdf: 35 | file_content_dict = { 36 | key: cloudpickle.loads(np.void(hdf["/" + key])) 37 | for key in group_dict.values() 38 | if key in hdf 39 | } 40 | file_content_dict["filename"] = file_name 41 | file_lst.append(file_content_dict) 42 | return file_lst 43 | -------------------------------------------------------------------------------- /executorlib/standalone/command.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_command_path(executable: str) -> str: 5 | """ 6 | Get path of the backend executable script 7 | 8 | Args: 9 | executable (str): Name of the backend executable script, either mpiexec.py or serial.py 10 | 11 | Returns: 12 | str: absolute path to the executable script 13 | """ 14 | return os.path.abspath(os.path.join(__file__, "..", "..", "backend", executable)) 15 | -------------------------------------------------------------------------------- /executorlib/standalone/inputcheck.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import multiprocessing 3 | import os.path 4 | from concurrent.futures import Executor 5 | from typing import Callable, Optional 6 | 7 | 8 | def check_oversubscribe(oversubscribe: bool) -> None: 9 | """ 10 | Check if oversubscribe is True and raise a ValueError if it is. 11 | """ 12 | if oversubscribe: 13 | raise ValueError( 14 | "Oversubscribing is not supported for the executorlib.flux.PyFLuxExecutor backend." 15 | "Please use oversubscribe=False instead of oversubscribe=True." 16 | ) 17 | 18 | 19 | def check_command_line_argument_lst(command_line_argument_lst: list[str]) -> None: 20 | """ 21 | Check if command_line_argument_lst is not empty and raise a ValueError if it is. 22 | """ 23 | if len(command_line_argument_lst) > 0: 24 | raise ValueError( 25 | "The command_line_argument_lst parameter is not supported for the SLURM backend." 26 | ) 27 | 28 | 29 | def check_gpus_per_worker(gpus_per_worker: int) -> None: 30 | """ 31 | Check if gpus_per_worker is not 0 and raise a TypeError if it is. 32 | """ 33 | if gpus_per_worker != 0: 34 | raise TypeError( 35 | "GPU assignment is not supported for the executorlib.mpi.PyMPIExecutor backend." 36 | "Please use gpus_per_worker=0 instead of gpus_per_worker=" 37 | + str(gpus_per_worker) 38 | + "." 39 | ) 40 | 41 | 42 | def check_executor(executor: Executor) -> None: 43 | """ 44 | Check if executor is not None and raise a ValueError if it is. 45 | """ 46 | if executor is not None: 47 | raise ValueError( 48 | "The executor parameter is only supported for the flux framework backend." 49 | ) 50 | 51 | 52 | def check_nested_flux_executor(nested_flux_executor: bool) -> None: 53 | """ 54 | Check if nested_flux_executor is True and raise a ValueError if it is. 55 | """ 56 | if nested_flux_executor: 57 | raise ValueError( 58 | "The nested_flux_executor parameter is only supported for the flux framework backend." 59 | ) 60 | 61 | 62 | def check_resource_dict(function: Callable) -> None: 63 | """ 64 | Check if the function has a parameter named 'resource_dict' and raise a ValueError if it does. 65 | """ 66 | if "resource_dict" in inspect.signature(function).parameters: 67 | raise ValueError( 68 | "The parameter resource_dict is used internally in executorlib, " 69 | "so it cannot be used as a parameter in the submitted functions." 70 | ) 71 | 72 | 73 | def check_resource_dict_is_empty(resource_dict: dict) -> None: 74 | """ 75 | Check if resource_dict is not empty and raise a ValueError if it is. 76 | """ 77 | if len(resource_dict) > 0: 78 | raise ValueError( 79 | "When block_allocation is enabled, the resource requirements have to be defined on the executor level." 80 | ) 81 | 82 | 83 | def check_refresh_rate(refresh_rate: float) -> None: 84 | """ 85 | Check if refresh_rate is not 0.01 and raise a ValueError if it is. 86 | """ 87 | if refresh_rate != 0.01: 88 | raise ValueError( 89 | "The sleep_interval parameter is only used when disable_dependencies=False." 90 | ) 91 | 92 | 93 | def check_plot_dependency_graph(plot_dependency_graph: bool) -> None: 94 | """ 95 | Check if plot_dependency_graph is True and raise a ValueError if it is. 96 | """ 97 | if plot_dependency_graph: 98 | raise ValueError( 99 | "The plot_dependency_graph parameter is only used when disable_dependencies=False." 100 | ) 101 | 102 | 103 | def check_pmi(backend: Optional[str], pmi: Optional[str]) -> None: 104 | """ 105 | Check if pmi is valid for the selected backend and raise a ValueError if it is not. 106 | """ 107 | if backend is not None: 108 | if backend != "flux_allocation" and pmi is not None: 109 | raise ValueError( 110 | "The pmi parameter is currently only implemented for flux." 111 | ) 112 | elif backend == "flux_allocation" and pmi not in ["pmix", "pmi1", "pmi2", None]: 113 | raise ValueError( 114 | "The pmi parameter supports [pmix, pmi1, pmi2], but not: " + str(pmi) 115 | ) 116 | 117 | 118 | def check_init_function( 119 | block_allocation: bool, init_function: Optional[Callable] 120 | ) -> None: 121 | """ 122 | Check if block_allocation is False and init_function is not None, and raise a ValueError if it is. 123 | """ 124 | if not block_allocation and init_function is not None: 125 | raise ValueError("") 126 | 127 | 128 | def check_max_workers_and_cores( 129 | max_workers: Optional[int], max_cores: Optional[int] 130 | ) -> None: 131 | if max_workers is not None: 132 | raise ValueError( 133 | "The number of workers cannot be controlled with the pysqa based backend." 134 | ) 135 | if max_cores is not None: 136 | raise ValueError( 137 | "The number of cores cannot be controlled with the pysqa based backend." 138 | ) 139 | 140 | 141 | def check_hostname_localhost(hostname_localhost: Optional[bool]) -> None: 142 | if hostname_localhost is not None: 143 | raise ValueError( 144 | "The option to connect to hosts based on their hostname is not available with the pysqa based backend." 145 | ) 146 | 147 | 148 | def check_flux_executor_pmi_mode(flux_executor_pmi_mode: Optional[str]) -> None: 149 | if flux_executor_pmi_mode is not None: 150 | raise ValueError( 151 | "The option to specify the flux pmi mode is not available with the pysqa based backend." 152 | ) 153 | 154 | 155 | def check_flux_log_files(flux_log_files: Optional[bool]) -> None: 156 | """ 157 | Check if flux_log_files is True and raise a ValueError if it is. 158 | """ 159 | if flux_log_files: 160 | raise ValueError( 161 | "The flux_log_files parameter is only supported for the flux framework backend." 162 | ) 163 | 164 | 165 | def check_pysqa_config_directory(pysqa_config_directory: Optional[str]) -> None: 166 | """ 167 | Check if pysqa_config_directory is None and raise a ValueError if it is not. 168 | """ 169 | if pysqa_config_directory is not None: 170 | raise ValueError( 171 | "pysqa_config_directory parameter is only supported for pysqa backend." 172 | ) 173 | 174 | 175 | def validate_number_of_cores( 176 | max_cores: Optional[int] = None, 177 | max_workers: Optional[int] = None, 178 | cores_per_worker: Optional[int] = 1, 179 | set_local_cores: bool = False, 180 | ) -> int: 181 | """ 182 | Validate the number of cores and return the appropriate value. 183 | """ 184 | if max_cores is not None and max_workers is None and cores_per_worker is not None: 185 | return int(max_cores / cores_per_worker) 186 | elif max_workers is not None: 187 | return int(max_workers) 188 | elif max_cores is None and max_workers is None and not set_local_cores: 189 | raise ValueError( 190 | "Block allocation requires a fixed set of computational resources. Neither max_cores nor max_workers are defined." 191 | ) 192 | else: 193 | return multiprocessing.cpu_count() 194 | 195 | 196 | def check_file_exists(file_name: Optional[str]): 197 | if file_name is None: 198 | raise ValueError("file_name is not set.") 199 | if not os.path.exists(file_name): 200 | raise ValueError("file_name is not written to the file system.") 201 | -------------------------------------------------------------------------------- /executorlib/standalone/interactive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/standalone/interactive/__init__.py -------------------------------------------------------------------------------- /executorlib/standalone/interactive/arguments.py: -------------------------------------------------------------------------------- 1 | from asyncio.exceptions import CancelledError 2 | from concurrent.futures import Future, TimeoutError 3 | from typing import Any, Union 4 | 5 | 6 | def get_future_objects_from_input(args: tuple, kwargs: dict): 7 | """ 8 | Check the input parameters if they contain future objects and which of these future objects are executed 9 | 10 | Args: 11 | args (tuple): function arguments 12 | kwargs (dict): function keyword arguments 13 | 14 | Returns: 15 | list, boolean: list of future objects and boolean flag if all future objects are already done 16 | """ 17 | future_lst = [] 18 | 19 | def find_future_in_list(lst): 20 | for el in lst: 21 | if isinstance(el, Future): 22 | future_lst.append(el) 23 | elif isinstance(el, list): 24 | find_future_in_list(lst=el) 25 | elif isinstance(el, dict): 26 | find_future_in_list(lst=el.values()) 27 | 28 | find_future_in_list(lst=args) 29 | find_future_in_list(lst=kwargs.values()) 30 | boolean_flag = len([future for future in future_lst if future.done()]) == len( 31 | future_lst 32 | ) 33 | return future_lst, boolean_flag 34 | 35 | 36 | def get_exception_lst(future_lst: list[Future]) -> list: 37 | """ 38 | Get list of exceptions raised by the future objects in the list of future objects 39 | 40 | Args: 41 | future_lst (list): list of future objects 42 | 43 | Returns: 44 | list: list of exceptions raised by the future objects in the list of future objects. Returns empty list if no 45 | exception was raised. 46 | """ 47 | return [ 48 | f.exception() for f in future_lst if check_exception_was_raised(future_obj=f) 49 | ] 50 | 51 | 52 | def check_exception_was_raised(future_obj: Future) -> bool: 53 | """ 54 | Check if exception was raised by future object 55 | 56 | Args: 57 | future_obj (Future): future object 58 | 59 | Returns: 60 | bool: True if exception was raised, False otherwise 61 | """ 62 | try: 63 | excp = future_obj.exception(timeout=10**-10) 64 | return excp is not None and not isinstance(excp, CancelledError) 65 | except TimeoutError: 66 | return False 67 | 68 | 69 | def update_futures_in_input(args: tuple, kwargs: dict) -> tuple[tuple, dict]: 70 | """ 71 | Evaluate future objects in the arguments and keyword arguments by calling future.result() 72 | 73 | Args: 74 | args (tuple): function arguments 75 | kwargs (dict): function keyword arguments 76 | 77 | Returns: 78 | tuple, dict: arguments and keyword arguments with each future object in them being evaluated 79 | """ 80 | 81 | def get_result(arg: Union[list[Future], Future]) -> Any: 82 | if isinstance(arg, Future): 83 | return arg.result() 84 | elif isinstance(arg, list): 85 | return [get_result(arg=el) for el in arg] 86 | elif isinstance(arg, dict): 87 | return {k: get_result(arg=v) for k, v in arg.items()} 88 | else: 89 | return arg 90 | 91 | args = tuple([get_result(arg=arg) for arg in args]) 92 | kwargs = {key: get_result(arg=value) for key, value in kwargs.items()} 93 | return args, kwargs 94 | -------------------------------------------------------------------------------- /executorlib/standalone/interactive/backend.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Any, Callable, Optional 3 | 4 | 5 | def call_funct( 6 | input_dict: dict, funct: Optional[Callable] = None, memory: Optional[dict] = None 7 | ) -> Any: 8 | """ 9 | Call function from dictionary 10 | 11 | Args: 12 | input_dict (dict): dictionary containing the function 'fn', its arguments 'args' and keyword arguments 'kwargs' 13 | funct (Callable, optional): function to be evaluated if it is not included in the input dictionary 14 | memory (dict, optional): variables stored in memory which can be used as keyword arguments 15 | 16 | Returns: 17 | Any: Result of the function 18 | """ 19 | if funct is None: 20 | 21 | def funct(*args, **kwargs): 22 | return args[0].__call__(*args[1:], **kwargs) 23 | 24 | funct_args = inspect.getfullargspec(input_dict["fn"]).args 25 | if memory is not None: 26 | input_dict["kwargs"].update( 27 | _update_dict_delta( 28 | dict_input=memory, 29 | dict_output=input_dict["kwargs"], 30 | keys_possible_lst=funct_args, 31 | ) 32 | ) 33 | return funct(input_dict["fn"], *input_dict["args"], **input_dict["kwargs"]) 34 | 35 | 36 | def parse_arguments(argument_lst: list[str]) -> dict: 37 | """ 38 | Simple function to parse command line arguments 39 | 40 | Args: 41 | argument_lst (list): list of arguments as strings 42 | 43 | Returns: 44 | dict: dictionary with the parsed arguments and their corresponding values 45 | """ 46 | return update_default_dict_from_arguments( 47 | argument_lst=argument_lst, 48 | argument_dict={ 49 | "zmqport": "--zmqport", 50 | "host": "--host", 51 | }, 52 | default_dict={"host": "localhost"}, 53 | ) 54 | 55 | 56 | def update_default_dict_from_arguments( 57 | argument_lst: list[str], argument_dict: dict, default_dict: dict 58 | ) -> dict: 59 | """ 60 | Update default dictionary with values from command line arguments 61 | 62 | Args: 63 | argument_lst (list[str]): List of arguments as strings 64 | argument_dict (dict): Dictionary mapping argument names to their corresponding command line flags 65 | default_dict (dict): Default dictionary to be updated 66 | 67 | Returns: 68 | dict: Updated default dictionary 69 | """ 70 | default_dict.update( 71 | { 72 | k: argument_lst[argument_lst.index(v) + 1] 73 | for k, v in argument_dict.items() 74 | if v in argument_lst 75 | } 76 | ) 77 | return default_dict 78 | 79 | 80 | def _update_dict_delta( 81 | dict_input: dict, dict_output: dict, keys_possible_lst: list[str] 82 | ) -> dict: 83 | """ 84 | Update dictionary with values from another dictionary, only if the keys are present in a given list 85 | 86 | Args: 87 | dict_input (dict): Input dictionary 88 | dict_output (dict): Output dictionary to be updated 89 | keys_possible_lst (list[str]): List of possible keys to be updated 90 | 91 | Returns: 92 | dict: Updated dictionary 93 | """ 94 | return { 95 | k: v 96 | for k, v in dict_input.items() 97 | if k in keys_possible_lst and k not in dict_output 98 | } 99 | -------------------------------------------------------------------------------- /executorlib/standalone/interactive/spawner.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from abc import ABC, abstractmethod 3 | from typing import Optional 4 | 5 | MPI_COMMAND = "mpiexec" 6 | 7 | 8 | class BaseSpawner(ABC): 9 | def __init__( 10 | self, 11 | cwd: Optional[str] = None, 12 | cores: int = 1, 13 | openmpi_oversubscribe: bool = False, 14 | ): 15 | """ 16 | Base class for interface implementations. 17 | 18 | Args: 19 | cwd (str): The current working directory. 20 | cores (int, optional): The number of cores to use. Defaults to 1. 21 | openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False. 22 | """ 23 | self._cwd = cwd 24 | self._cores = cores 25 | self._openmpi_oversubscribe = openmpi_oversubscribe 26 | 27 | @abstractmethod 28 | def bootup( 29 | self, 30 | command_lst: list[str], 31 | ): 32 | """ 33 | Method to start the interface. 34 | 35 | Args: 36 | command_lst (list[str]): The command list to execute. 37 | """ 38 | raise NotImplementedError 39 | 40 | @abstractmethod 41 | def shutdown(self, wait: bool = True): 42 | """ 43 | Method to shutdown the interface. 44 | 45 | Args: 46 | wait (bool, optional): Whether to wait for the interface to shutdown. Defaults to True. 47 | """ 48 | raise NotImplementedError 49 | 50 | @abstractmethod 51 | def poll(self): 52 | """ 53 | Method to check if the interface is running. 54 | 55 | Returns: 56 | bool: True if the interface is running, False otherwise. 57 | """ 58 | raise NotImplementedError 59 | 60 | 61 | class SubprocessSpawner(BaseSpawner): 62 | def __init__( 63 | self, 64 | cwd: Optional[str] = None, 65 | cores: int = 1, 66 | openmpi_oversubscribe: bool = False, 67 | threads_per_core: int = 1, 68 | ): 69 | """ 70 | Subprocess interface implementation. 71 | 72 | Args: 73 | cwd (str, optional): The current working directory. Defaults to None. 74 | cores (int, optional): The number of cores to use. Defaults to 1. 75 | threads_per_core (int, optional): The number of threads per core. Defaults to 1. 76 | oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False. 77 | """ 78 | super().__init__( 79 | cwd=cwd, 80 | cores=cores, 81 | openmpi_oversubscribe=openmpi_oversubscribe, 82 | ) 83 | self._process: Optional[subprocess.Popen] = None 84 | self._threads_per_core = threads_per_core 85 | 86 | def bootup( 87 | self, 88 | command_lst: list[str], 89 | ): 90 | """ 91 | Method to start the subprocess interface. 92 | 93 | Args: 94 | command_lst (list[str]): The command list to execute. 95 | """ 96 | self._process = subprocess.Popen( 97 | args=self.generate_command(command_lst=command_lst), 98 | cwd=self._cwd, 99 | stdin=subprocess.DEVNULL, 100 | ) 101 | 102 | def generate_command(self, command_lst: list[str]) -> list[str]: 103 | """ 104 | Method to generate the command list. 105 | 106 | Args: 107 | command_lst (list[str]): The command list. 108 | 109 | Returns: 110 | list[str]: The generated command list. 111 | """ 112 | return command_lst 113 | 114 | def shutdown(self, wait: bool = True): 115 | """ 116 | Method to shutdown the subprocess interface. 117 | 118 | Args: 119 | wait (bool, optional): Whether to wait for the interface to shutdown. Defaults to True. 120 | """ 121 | if self._process is not None: 122 | self._process.communicate() 123 | self._process.terminate() 124 | if wait: 125 | self._process.wait() 126 | self._process = None 127 | 128 | def poll(self) -> bool: 129 | """ 130 | Method to check if the subprocess interface is running. 131 | 132 | Returns: 133 | bool: True if the interface is running, False otherwise. 134 | """ 135 | return self._process is not None and self._process.poll() is None 136 | 137 | 138 | class MpiExecSpawner(SubprocessSpawner): 139 | def generate_command(self, command_lst: list[str]) -> list[str]: 140 | """ 141 | Generate the command list for the MPIExec interface. 142 | 143 | Args: 144 | command_lst (list[str]): The command list. 145 | 146 | Returns: 147 | list[str]: The generated command list. 148 | """ 149 | command_prepend_lst = generate_mpiexec_command( 150 | cores=self._cores, 151 | openmpi_oversubscribe=self._openmpi_oversubscribe, 152 | ) 153 | return super().generate_command( 154 | command_lst=command_prepend_lst + command_lst, 155 | ) 156 | 157 | 158 | def generate_mpiexec_command( 159 | cores: int, openmpi_oversubscribe: bool = False 160 | ) -> list[str]: 161 | """ 162 | Generate the command list for the MPIExec interface. 163 | 164 | Args: 165 | cores (int): The number of cores. 166 | openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False. 167 | 168 | Returns: 169 | list[str]: The generated command list. 170 | """ 171 | if cores == 1: 172 | return [] 173 | else: 174 | command_prepend_lst = [MPI_COMMAND, "-n", str(cores)] 175 | if openmpi_oversubscribe: 176 | command_prepend_lst += ["--oversubscribe"] 177 | return command_prepend_lst 178 | -------------------------------------------------------------------------------- /executorlib/standalone/plot.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from concurrent.futures import Future 3 | from typing import Optional 4 | 5 | import cloudpickle 6 | 7 | 8 | def generate_nodes_and_edges( 9 | task_hash_dict: dict, future_hash_inverse_dict: dict 10 | ) -> tuple[list, list]: 11 | """ 12 | Generate nodes and edges for visualization. 13 | 14 | Args: 15 | task_hash_dict (dict): Dictionary mapping task hash to task information. 16 | future_hash_inverse_dict (dict): Dictionary mapping future hash to future object. 17 | 18 | Returns: 19 | Tuple[list, list]: Tuple containing the list of nodes and the list of edges. 20 | """ 21 | node_lst: list = [] 22 | edge_lst: list = [] 23 | hash_id_dict: dict = {} 24 | 25 | def add_element(arg, link_to, label=""): 26 | """ 27 | Add element to the node and edge lists. 28 | 29 | Args: 30 | arg: Argument to be added. 31 | link_to: ID of the node to link the element to. 32 | label (str, optional): Label for the edge. Defaults to "". 33 | """ 34 | if isinstance(arg, Future): 35 | edge_lst.append( 36 | { 37 | "start": hash_id_dict[future_hash_inverse_dict[arg]], 38 | "end": link_to, 39 | "label": label, 40 | } 41 | ) 42 | elif isinstance(arg, list) and any(isinstance(a, Future) for a in arg): 43 | lst_no_future = [a if not isinstance(a, Future) else "$" for a in arg] 44 | node_id = len(node_lst) 45 | node_lst.append( 46 | {"name": str(lst_no_future), "id": node_id, "shape": "circle"} 47 | ) 48 | edge_lst.append({"start": node_id, "end": link_to, "label": label}) 49 | for i, a in enumerate(arg): 50 | if isinstance(a, Future): 51 | add_element(arg=a, link_to=node_id, label="ind: " + str(i)) 52 | elif isinstance(arg, dict) and any(isinstance(a, Future) for a in arg.values()): 53 | dict_no_future = { 54 | kt: vt if not isinstance(vt, Future) else "$" for kt, vt in arg.items() 55 | } 56 | node_id = len(node_lst) 57 | node_lst.append( 58 | {"name": str(dict_no_future), "id": node_id, "shape": "circle"} 59 | ) 60 | edge_lst.append({"start": node_id, "end": link_to, "label": label}) 61 | for kt, vt in arg.items(): 62 | if isinstance(vt, Future): 63 | add_element(arg=vt, link_to=node_id, label="key: " + kt) 64 | else: 65 | node_id = len(node_lst) 66 | node_lst.append({"name": str(arg), "id": node_id, "shape": "circle"}) 67 | edge_lst.append({"start": node_id, "end": link_to, "label": label}) 68 | 69 | for k, v in task_hash_dict.items(): 70 | hash_id_dict[k] = len(node_lst) 71 | node_lst.append( 72 | {"name": v["fn"].__name__, "id": hash_id_dict[k], "shape": "box"} 73 | ) 74 | for k, task_dict in task_hash_dict.items(): 75 | for arg in task_dict["args"]: 76 | add_element(arg=arg, link_to=hash_id_dict[k], label="") 77 | 78 | for kw, v in task_dict["kwargs"].items(): 79 | add_element(arg=v, link_to=hash_id_dict[k], label=str(kw)) 80 | 81 | return node_lst, edge_lst 82 | 83 | 84 | def generate_task_hash(task_dict: dict, future_hash_inverse_dict: dict) -> bytes: 85 | """ 86 | Generate a hash for a task dictionary. 87 | 88 | Args: 89 | task_dict (dict): Dictionary containing task information. 90 | future_hash_inverse_dict (dict): Dictionary mapping future hash to future object. 91 | 92 | Returns: 93 | bytes: Hash generated for the task dictionary. 94 | """ 95 | 96 | def convert_arg(arg, future_hash_inverse_dict): 97 | """ 98 | Convert an argument to its hash representation. 99 | 100 | Args: 101 | arg: Argument to be converted. 102 | future_hash_inverse_dict (dict): Dictionary mapping future hash to future object. 103 | 104 | Returns: 105 | The hash representation of the argument. 106 | """ 107 | if isinstance(arg, Future): 108 | return future_hash_inverse_dict[arg] 109 | elif isinstance(arg, list): 110 | return [ 111 | convert_arg(arg=a, future_hash_inverse_dict=future_hash_inverse_dict) 112 | for a in arg 113 | ] 114 | elif isinstance(arg, dict): 115 | return { 116 | k: convert_arg(arg=v, future_hash_inverse_dict=future_hash_inverse_dict) 117 | for k, v in arg.items() 118 | } 119 | else: 120 | return arg 121 | 122 | args_for_hash = [ 123 | convert_arg(arg=arg, future_hash_inverse_dict=future_hash_inverse_dict) 124 | for arg in task_dict["args"] 125 | ] 126 | kwargs_for_hash = { 127 | k: convert_arg(arg=v, future_hash_inverse_dict=future_hash_inverse_dict) 128 | for k, v in task_dict["kwargs"].items() 129 | } 130 | return cloudpickle.dumps( 131 | {"fn": task_dict["fn"], "args": args_for_hash, "kwargs": kwargs_for_hash} 132 | ) 133 | 134 | 135 | def draw(node_lst: list, edge_lst: list, filename: Optional[str] = None): 136 | """ 137 | Draw the graph visualization of nodes and edges. 138 | 139 | Args: 140 | node_lst (list): List of nodes. 141 | edge_lst (list): List of edges. 142 | filename (str): Name of the file to store the plotted graph in. 143 | """ 144 | import networkx as nx # noqa 145 | 146 | graph = nx.DiGraph() 147 | for node in node_lst: 148 | graph.add_node(node["id"], label=node["name"], shape=node["shape"]) 149 | for edge in edge_lst: 150 | graph.add_edge(edge["start"], edge["end"], label=edge["label"]) 151 | if filename is not None: 152 | file_format = os.path.splitext(filename)[-1][1:] 153 | with open(filename, "wb") as f: 154 | f.write(nx.nx_agraph.to_agraph(graph).draw(prog="dot", format=file_format)) 155 | else: 156 | from IPython.display import SVG, display # noqa 157 | 158 | display(SVG(nx.nx_agraph.to_agraph(graph).draw(prog="dot", format="svg"))) 159 | -------------------------------------------------------------------------------- /executorlib/standalone/queue.py: -------------------------------------------------------------------------------- 1 | import queue 2 | 3 | 4 | def cancel_items_in_queue(que: queue.Queue): 5 | """ 6 | Cancel items which are still waiting in the queue. If the executor is busy tasks remain in the queue, so the future 7 | objects have to be cancelled when the executor shuts down. 8 | 9 | Args: 10 | que (queue.Queue): Queue with task objects which should be executed 11 | """ 12 | while True: 13 | try: 14 | item = que.get_nowait() 15 | if isinstance(item, dict) and "future" in item: 16 | item["future"].cancel() 17 | que.task_done() 18 | except queue.Empty: 19 | break 20 | -------------------------------------------------------------------------------- /executorlib/standalone/serialize.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import inspect 3 | import re 4 | from typing import Callable, Optional 5 | 6 | import cloudpickle 7 | 8 | 9 | def cloudpickle_register(ind: int = 2): 10 | """ 11 | Cloudpickle can either pickle by value or pickle by reference. The functions which are communicated have to 12 | be pickled by value rather than by reference, so the module which calls the map function is pickled by value. 13 | https://github.com/cloudpipe/cloudpickle#overriding-pickles-serialization-mechanism-for-importable-constructs 14 | inspect can help to find the module which is calling executorlib 15 | https://docs.python.org/3/library/inspect.html 16 | to learn more about inspect another good read is: 17 | http://pymotw.com/2/inspect/index.html#module-inspect 18 | 1 refers to 1 level higher than the map function 19 | 20 | Args: 21 | ind (int): index of the level at which pickle by value starts while for the rest pickle by reference is used 22 | """ 23 | try: # When executed in a jupyter notebook this can cause a ValueError - in this case we just ignore it. 24 | cloudpickle.register_pickle_by_value(inspect.getmodule(inspect.stack()[ind][0])) 25 | except IndexError: 26 | cloudpickle_register(ind=ind - 1) 27 | except ValueError: 28 | pass 29 | 30 | 31 | def serialize_funct_h5( 32 | fn: Callable, 33 | fn_args: Optional[list] = None, 34 | fn_kwargs: Optional[dict] = None, 35 | resource_dict: Optional[dict] = None, 36 | ) -> tuple[str, dict]: 37 | """ 38 | Serialize a function and its arguments and keyword arguments into an HDF5 file. 39 | 40 | Args: 41 | fn (Callable): The function to be serialized. 42 | fn_args (list): The arguments of the function. 43 | fn_kwargs (dict): The keyword arguments of the function. 44 | resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function. 45 | Example resource dictionary: { 46 | cores: 1, 47 | threads_per_core: 1, 48 | gpus_per_worker: 0, 49 | oversubscribe: False, 50 | cwd: None, 51 | executor: None, 52 | hostname_localhost: False, 53 | } 54 | 55 | Returns: 56 | Tuple[str, dict]: A tuple containing the task key and the serialized data. 57 | 58 | """ 59 | if fn_args is None: 60 | fn_args = [] 61 | if fn_kwargs is None: 62 | fn_kwargs = {} 63 | if resource_dict is None: 64 | resource_dict = {} 65 | binary_all = cloudpickle.dumps( 66 | {"fn": fn, "args": fn_args, "kwargs": fn_kwargs, "resource_dict": resource_dict} 67 | ) 68 | task_key = fn.__name__ + _get_hash(binary=binary_all) 69 | data = { 70 | "fn": fn, 71 | "args": fn_args, 72 | "kwargs": fn_kwargs, 73 | "resource_dict": resource_dict, 74 | } 75 | return task_key, data 76 | 77 | 78 | def _get_hash(binary: bytes) -> str: 79 | """ 80 | Get the hash of a binary. 81 | 82 | Args: 83 | binary (bytes): The binary to be hashed. 84 | 85 | Returns: 86 | str: The hash of the binary. 87 | 88 | """ 89 | # Remove specification of jupyter kernel from hash to be deterministic 90 | binary_no_ipykernel = re.sub(b"(?<=/ipykernel_)(.*)(?=/)", b"", binary) 91 | return str(hashlib.md5(binary_no_ipykernel).hexdigest()) 92 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/__init__.py -------------------------------------------------------------------------------- /executorlib/task_scheduler/base.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import queue 3 | from concurrent.futures import ( 4 | Executor as FutureExecutor, 5 | ) 6 | from concurrent.futures import ( 7 | Future, 8 | ) 9 | from threading import Thread 10 | from typing import Callable, Optional, Union 11 | 12 | from executorlib.standalone.inputcheck import check_resource_dict 13 | from executorlib.standalone.queue import cancel_items_in_queue 14 | from executorlib.standalone.serialize import cloudpickle_register 15 | 16 | 17 | class TaskSchedulerBase(FutureExecutor): 18 | """ 19 | Base class for the executor. 20 | 21 | Args: 22 | max_cores (int): defines the number cores which can be used in parallel 23 | """ 24 | 25 | def __init__(self, max_cores: Optional[int] = None): 26 | """ 27 | Initialize the ExecutorBase class. 28 | """ 29 | cloudpickle_register(ind=3) 30 | self._process_kwargs: dict = {} 31 | self._max_cores = max_cores 32 | self._future_queue: Optional[queue.Queue] = queue.Queue() 33 | self._process: Optional[Union[Thread, list[Thread]]] = None 34 | 35 | @property 36 | def max_workers(self) -> Optional[int]: 37 | return self._process_kwargs.get("max_workers") 38 | 39 | @max_workers.setter 40 | def max_workers(self, max_workers: int): 41 | raise NotImplementedError("The max_workers setter is not implemented.") 42 | 43 | @property 44 | def info(self) -> Optional[dict]: 45 | """ 46 | Get the information about the executor. 47 | 48 | Returns: 49 | Optional[dict]: Information about the executor. 50 | """ 51 | meta_data_dict = self._process_kwargs.copy() 52 | if "future_queue" in meta_data_dict: 53 | del meta_data_dict["future_queue"] 54 | if self._process is not None and isinstance(self._process, list): 55 | meta_data_dict["max_workers"] = len(self._process) 56 | return meta_data_dict 57 | elif self._process is not None: 58 | return meta_data_dict 59 | else: 60 | return None 61 | 62 | @property 63 | def future_queue(self) -> Optional[queue.Queue]: 64 | """ 65 | Get the future queue. 66 | 67 | Returns: 68 | queue.Queue: The future queue. 69 | """ 70 | return self._future_queue 71 | 72 | def submit( # type: ignore 73 | self, 74 | fn: Callable, 75 | /, 76 | *args, 77 | resource_dict: Optional[dict] = None, 78 | **kwargs, 79 | ) -> Future: 80 | """ 81 | Submits a callable to be executed with the given arguments. 82 | 83 | Schedules the callable to be executed as fn(*args, **kwargs) and returns 84 | a Future instance representing the execution of the callable. 85 | 86 | Args: 87 | fn (callable): function to submit for execution 88 | args: arguments for the submitted function 89 | kwargs: keyword arguments for the submitted function 90 | resource_dict (dict): resource dictionary, which defines the resources used for the execution of the 91 | function. Example resource dictionary: { 92 | cores: 1, 93 | threads_per_core: 1, 94 | gpus_per_worker: 0, 95 | oversubscribe: False, 96 | cwd: None, 97 | executor: None, 98 | hostname_localhost: False, 99 | } 100 | 101 | Returns: 102 | Future: A Future representing the given call. 103 | """ 104 | if resource_dict is None: 105 | resource_dict = {} 106 | cores = resource_dict.get("cores") 107 | if ( 108 | cores is not None 109 | and self._max_cores is not None 110 | and cores > self._max_cores 111 | ): 112 | raise ValueError( 113 | "The specified number of cores is larger than the available number of cores." 114 | ) 115 | check_resource_dict(function=fn) 116 | f: Future = Future() 117 | if self._future_queue is not None: 118 | self._future_queue.put( 119 | { 120 | "fn": fn, 121 | "args": args, 122 | "kwargs": kwargs, 123 | "future": f, 124 | "resource_dict": resource_dict, 125 | } 126 | ) 127 | return f 128 | 129 | def shutdown(self, wait: bool = True, *, cancel_futures: bool = False): 130 | """ 131 | Clean-up the resources associated with the Executor. 132 | 133 | It is safe to call this method several times. Otherwise, no other 134 | methods can be called after this one. 135 | 136 | Args: 137 | wait (bool): If True then shutdown will not return until all running 138 | futures have finished executing and the resources used by the 139 | parallel_executors have been reclaimed. 140 | cancel_futures (bool): If True then shutdown will cancel all pending 141 | futures. Futures that are completed or running will not be 142 | cancelled. 143 | """ 144 | if cancel_futures and self._future_queue is not None: 145 | cancel_items_in_queue(que=self._future_queue) 146 | if self._process is not None and self._future_queue is not None: 147 | self._future_queue.put({"shutdown": True, "wait": wait}) 148 | if wait and isinstance(self._process, Thread): 149 | self._process.join() 150 | self._future_queue.join() 151 | self._process = None 152 | self._future_queue = None 153 | 154 | def _set_process(self, process: Thread): 155 | """ 156 | Set the process for the executor. 157 | 158 | Args: 159 | process (RaisingThread): The process for the executor. 160 | """ 161 | self._process = process 162 | self._process.start() 163 | 164 | def __len__(self) -> int: 165 | """ 166 | Get the length of the executor. 167 | 168 | Returns: 169 | int: The length of the executor. 170 | """ 171 | queue_size = 0 172 | if self._future_queue is not None: 173 | queue_size = self._future_queue.qsize() 174 | return queue_size 175 | 176 | def __del__(self): 177 | """ 178 | Clean-up the resources associated with the Executor. 179 | """ 180 | with contextlib.suppress(AttributeError, RuntimeError): 181 | self.shutdown(wait=False) 182 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/file/__init__.py -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from typing import Any 4 | 5 | from executorlib.task_scheduler.file.hdf import dump, load 6 | from executorlib.task_scheduler.file.shared import FutureItem 7 | 8 | 9 | def backend_load_file(file_name: str) -> dict: 10 | """ 11 | Load the data from an HDF5 file and convert FutureItem objects to their results. 12 | 13 | Args: 14 | file_name (str): The name of the HDF5 file. 15 | 16 | Returns: 17 | dict: The loaded data from the file. 18 | 19 | """ 20 | apply_dict = load(file_name=file_name) 21 | apply_dict["args"] = [ 22 | arg if not _isinstance(arg, FutureItem) else arg.result() 23 | for arg in apply_dict["args"] 24 | ] 25 | apply_dict["kwargs"] = { 26 | key: arg if not _isinstance(arg, FutureItem) else arg.result() 27 | for key, arg in apply_dict["kwargs"].items() 28 | } 29 | return apply_dict 30 | 31 | 32 | def backend_write_file(file_name: str, output: Any, runtime: float) -> None: 33 | """ 34 | Write the output to an HDF5 file. 35 | 36 | Args: 37 | file_name (str): The name of the HDF5 file. 38 | output (Any): The output to be written. 39 | runtime (float): Time for executing function. 40 | 41 | Returns: 42 | None 43 | 44 | """ 45 | file_name_out = os.path.splitext(file_name)[0] 46 | os.rename(file_name, file_name_out + ".h5ready") 47 | if "result" in output: 48 | dump( 49 | file_name=file_name_out + ".h5ready", 50 | data_dict={"output": output["result"], "runtime": runtime}, 51 | ) 52 | else: 53 | dump( 54 | file_name=file_name_out + ".h5ready", 55 | data_dict={"error": output["error"], "runtime": runtime}, 56 | ) 57 | os.rename(file_name_out + ".h5ready", file_name_out + ".h5out") 58 | 59 | 60 | def backend_execute_task_in_file(file_name: str) -> None: 61 | """ 62 | Execute the task stored in a given HDF5 file. 63 | 64 | Args: 65 | file_name (str): The file name of the HDF5 file as an absolute path. 66 | 67 | Returns: 68 | None 69 | """ 70 | apply_dict = backend_load_file(file_name=file_name) 71 | time_start = time.time() 72 | try: 73 | result = { 74 | "result": apply_dict["fn"].__call__( 75 | *apply_dict["args"], **apply_dict["kwargs"] 76 | ) 77 | } 78 | except Exception as error: 79 | result = {"error": error} 80 | 81 | backend_write_file( 82 | file_name=file_name, 83 | output=result, 84 | runtime=time.time() - time_start, 85 | ) 86 | 87 | 88 | def _isinstance(obj: Any, cls: type) -> bool: 89 | return str(obj.__class__) == str(cls) 90 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/hdf.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | import cloudpickle 4 | import h5py 5 | import numpy as np 6 | 7 | from executorlib.standalone.cache import group_dict 8 | 9 | 10 | def dump(file_name: Optional[str], data_dict: dict) -> None: 11 | """ 12 | Dump data dictionary into HDF5 file 13 | 14 | Args: 15 | file_name (str): file name of the HDF5 file as absolute path 16 | data_dict (dict): dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}} 17 | """ 18 | if file_name is not None: 19 | with h5py.File(file_name, "a") as fname: 20 | for data_key, data_value in data_dict.items(): 21 | if data_key in group_dict: 22 | fname.create_dataset( 23 | name="/" + group_dict[data_key], 24 | data=np.void(cloudpickle.dumps(data_value)), 25 | ) 26 | 27 | 28 | def load(file_name: str) -> dict: 29 | """ 30 | Load data dictionary from HDF5 file 31 | 32 | Args: 33 | file_name (str): file name of the HDF5 file as absolute path 34 | 35 | Returns: 36 | dict: dictionary containing the python function to be executed {"fn": ..., "args": (), "kwargs": {}} 37 | """ 38 | with h5py.File(file_name, "r") as hdf: 39 | data_dict = {} 40 | if "function" in hdf: 41 | data_dict["fn"] = cloudpickle.loads(np.void(hdf["/function"])) 42 | else: 43 | raise TypeError("Function not found in HDF5 file.") 44 | if "input_args" in hdf: 45 | data_dict["args"] = cloudpickle.loads(np.void(hdf["/input_args"])) 46 | else: 47 | data_dict["args"] = () 48 | if "input_kwargs" in hdf: 49 | data_dict["kwargs"] = cloudpickle.loads(np.void(hdf["/input_kwargs"])) 50 | else: 51 | data_dict["kwargs"] = {} 52 | return data_dict 53 | 54 | 55 | def get_output(file_name: str) -> tuple[bool, bool, Any]: 56 | """ 57 | Check if output is available in the HDF5 file 58 | 59 | Args: 60 | file_name (str): file name of the HDF5 file as absolute path 61 | 62 | Returns: 63 | Tuple[bool, bool, object]: boolean flag indicating if output is available and the output object itself 64 | """ 65 | with h5py.File(file_name, "r") as hdf: 66 | if "output" in hdf: 67 | return True, True, cloudpickle.loads(np.void(hdf["/output"])) 68 | elif "error" in hdf: 69 | return True, False, cloudpickle.loads(np.void(hdf["/error"])) 70 | else: 71 | return False, False, None 72 | 73 | 74 | def get_runtime(file_name: str) -> float: 75 | """ 76 | Get run time from HDF5 file 77 | 78 | Args: 79 | file_name (str): file name of the HDF5 file as absolute path 80 | 81 | Returns: 82 | float: run time from the execution of the python function 83 | """ 84 | with h5py.File(file_name, "r") as hdf: 85 | if "runtime" in hdf: 86 | return cloudpickle.loads(np.void(hdf["/runtime"])) 87 | else: 88 | return 0.0 89 | 90 | 91 | def get_queue_id(file_name: Optional[str]) -> Optional[int]: 92 | """ 93 | Get queuing system id from HDF5 file 94 | 95 | Args: 96 | file_name (str): file name of the HDF5 file as absolute path 97 | 98 | Returns: 99 | int: queuing system id from the execution of the python function 100 | """ 101 | if file_name is not None: 102 | with h5py.File(file_name, "r") as hdf: 103 | if "queue_id" in hdf: 104 | return cloudpickle.loads(np.void(hdf["/queue_id"])) 105 | return None 106 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/queue_spawner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from typing import Optional, Union 4 | 5 | from pysqa import QueueAdapter 6 | 7 | from executorlib.standalone.inputcheck import check_file_exists 8 | from executorlib.task_scheduler.file.hdf import dump, get_queue_id 9 | 10 | 11 | def execute_with_pysqa( 12 | command: list, 13 | task_dependent_lst: Optional[list[int]] = None, 14 | file_name: Optional[str] = None, 15 | resource_dict: Optional[dict] = None, 16 | config_directory: Optional[str] = None, 17 | backend: Optional[str] = None, 18 | cache_directory: Optional[str] = None, 19 | ) -> Optional[int]: 20 | """ 21 | Execute a command by submitting it to the queuing system 22 | 23 | Args: 24 | command (list): The command to be executed. 25 | task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to []. 26 | file_name (str): Name of the HDF5 file which contains the Python function 27 | resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function. 28 | Example resource dictionary: { 29 | cwd: None, 30 | } 31 | config_directory (str, optional): path to the config directory. 32 | backend (str, optional): name of the backend used to spawn tasks. 33 | cache_directory (str): The directory to store the HDF5 files. 34 | 35 | Returns: 36 | int: queuing system ID 37 | """ 38 | if task_dependent_lst is None: 39 | task_dependent_lst = [] 40 | check_file_exists(file_name=file_name) 41 | queue_id = get_queue_id(file_name=file_name) 42 | qa = QueueAdapter( 43 | directory=config_directory, 44 | queue_type=backend, 45 | execute_command=_pysqa_execute_command, 46 | ) 47 | if queue_id is None or qa.get_status_of_job(process_id=queue_id) is None: 48 | if resource_dict is None: 49 | resource_dict = {} 50 | if "cwd" in resource_dict and resource_dict["cwd"] is not None: 51 | cwd = resource_dict["cwd"] 52 | else: 53 | cwd = cache_directory 54 | submit_kwargs = { 55 | "command": " ".join(command), 56 | "dependency_list": [str(qid) for qid in task_dependent_lst], 57 | "working_directory": os.path.abspath(cwd), 58 | } 59 | if "cwd" in resource_dict: 60 | del resource_dict["cwd"] 61 | if "threads_per_core" in resource_dict: 62 | resource_dict["cores"] *= resource_dict["threads_per_core"] 63 | del resource_dict["threads_per_core"] 64 | unsupported_keys = [ 65 | "gpus_per_core", 66 | "openmpi_oversubscribe", 67 | "slurm_cmd_args", 68 | ] 69 | for k in unsupported_keys: 70 | if k in resource_dict: 71 | del resource_dict[k] 72 | if "job_name" not in resource_dict: 73 | resource_dict["job_name"] = os.path.basename( 74 | os.path.dirname(os.path.abspath(cwd)) 75 | ) 76 | submit_kwargs.update(resource_dict) 77 | queue_id = qa.submit_job(**submit_kwargs) 78 | dump(file_name=file_name, data_dict={"queue_id": queue_id}) 79 | return queue_id 80 | 81 | 82 | def _pysqa_execute_command( 83 | commands: str, 84 | working_directory: Optional[str] = None, 85 | split_output: bool = True, 86 | shell: bool = False, 87 | error_filename: str = "pysqa.err", 88 | ) -> Union[str, list[str]]: 89 | """ 90 | A wrapper around the subprocess.check_output function. Modified from pysqa to raise an exception if the subprocess 91 | fails to submit the job to the queue. 92 | 93 | Args: 94 | commands (str): The command(s) to be executed on the command line 95 | working_directory (str, optional): The directory where the command is executed. Defaults to None. 96 | split_output (bool, optional): Boolean flag to split newlines in the output. Defaults to True. 97 | shell (bool, optional): Additional switch to convert commands to a single string. Defaults to False. 98 | error_filename (str, optional): In case the execution fails, the output is written to this file. Defaults to "pysqa.err". 99 | 100 | Returns: 101 | Union[str, List[str]]: Output of the shell command either as a string or as a list of strings 102 | """ 103 | if shell and isinstance(commands, list): 104 | commands = " ".join(commands) 105 | out = subprocess.check_output( 106 | commands, 107 | cwd=working_directory, 108 | stderr=subprocess.STDOUT, 109 | universal_newlines=True, 110 | shell=not isinstance(commands, list), 111 | ) 112 | if out is not None and split_output: 113 | return out.split("\n") 114 | else: 115 | return out 116 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/subprocess_spawner.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import time 3 | from typing import Optional 4 | 5 | from executorlib.standalone.inputcheck import check_file_exists 6 | 7 | 8 | def execute_in_subprocess( 9 | command: list, 10 | task_dependent_lst: Optional[list] = None, 11 | file_name: Optional[str] = None, 12 | resource_dict: Optional[dict] = None, 13 | config_directory: Optional[str] = None, 14 | backend: Optional[str] = None, 15 | cache_directory: Optional[str] = None, 16 | ) -> subprocess.Popen: 17 | """ 18 | Execute a command in a subprocess. 19 | 20 | Args: 21 | command (list): The command to be executed. 22 | task_dependent_lst (list): A list of subprocesses that the current subprocess depends on. Defaults to []. 23 | file_name (str): Name of the HDF5 file which contains the Python function 24 | resource_dict (dict): resource dictionary, which defines the resources used for the execution of the function. 25 | Example resource dictionary: { 26 | cwd: None, 27 | } 28 | config_directory (str, optional): path to the config directory. 29 | backend (str, optional): name of the backend used to spawn tasks. 30 | cache_directory (str): The directory to store the HDF5 files. 31 | 32 | Returns: 33 | subprocess.Popen: The subprocess object. 34 | 35 | """ 36 | if task_dependent_lst is None: 37 | task_dependent_lst = [] 38 | check_file_exists(file_name=file_name) 39 | while len(task_dependent_lst) > 0: 40 | task_dependent_lst = [ 41 | task for task in task_dependent_lst if task.poll() is None 42 | ] 43 | if config_directory is not None: 44 | raise ValueError( 45 | "config_directory parameter is not supported for subprocess spawner." 46 | ) 47 | if backend is not None: 48 | raise ValueError("backend parameter is not supported for subprocess spawner.") 49 | if resource_dict is None: 50 | resource_dict = {} 51 | cwd = resource_dict.get("cwd", cache_directory) 52 | return subprocess.Popen(command, universal_newlines=True, cwd=cwd) 53 | 54 | 55 | def terminate_subprocess(task): 56 | """ 57 | Terminate a subprocess and wait for it to complete. 58 | 59 | Args: 60 | task (subprocess.Popen): The subprocess.Popen instance to terminate 61 | """ 62 | task.terminate() 63 | while task.poll() is None: 64 | time.sleep(0.1) 65 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/file/task_scheduler.py: -------------------------------------------------------------------------------- 1 | import os 2 | from threading import Thread 3 | from typing import Callable, Optional 4 | 5 | from executorlib.standalone.inputcheck import ( 6 | check_executor, 7 | check_flux_executor_pmi_mode, 8 | check_flux_log_files, 9 | check_hostname_localhost, 10 | check_max_workers_and_cores, 11 | check_nested_flux_executor, 12 | ) 13 | from executorlib.task_scheduler.base import TaskSchedulerBase 14 | from executorlib.task_scheduler.file.shared import execute_tasks_h5 15 | from executorlib.task_scheduler.file.subprocess_spawner import ( 16 | execute_in_subprocess, 17 | terminate_subprocess, 18 | ) 19 | 20 | try: 21 | from executorlib.task_scheduler.file.queue_spawner import execute_with_pysqa 22 | except ImportError: 23 | # If pysqa is not available fall back to executing tasks in a subprocess 24 | execute_with_pysqa = execute_in_subprocess # type: ignore 25 | 26 | 27 | class FileTaskScheduler(TaskSchedulerBase): 28 | def __init__( 29 | self, 30 | cache_directory: str = "cache", 31 | resource_dict: Optional[dict] = None, 32 | execute_function: Callable = execute_with_pysqa, 33 | terminate_function: Optional[Callable] = None, 34 | pysqa_config_directory: Optional[str] = None, 35 | backend: Optional[str] = None, 36 | disable_dependencies: bool = False, 37 | ): 38 | """ 39 | Initialize the FileExecutor. 40 | 41 | Args: 42 | cache_directory (str, optional): The directory to store cache files. Defaults to "cache". 43 | resource_dict (dict): A dictionary of resources required by the task. With the following keys: 44 | - cores (int): number of MPI cores to be used for each function call 45 | - cwd (str/None): current working directory where the parallel python task is executed 46 | execute_function (Callable, optional): The function to execute tasks. Defaults to execute_in_subprocess. 47 | terminate_function (Callable, optional): The function to terminate the tasks. 48 | pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend). 49 | backend (str, optional): name of the backend used to spawn tasks. 50 | disable_dependencies (boolean): Disable resolving future objects during the submission. 51 | """ 52 | super().__init__(max_cores=None) 53 | default_resource_dict = { 54 | "cores": 1, 55 | "cwd": None, 56 | } 57 | if resource_dict is None: 58 | resource_dict = {} 59 | resource_dict.update( 60 | {k: v for k, v in default_resource_dict.items() if k not in resource_dict} 61 | ) 62 | if execute_function == execute_in_subprocess and terminate_function is None: 63 | terminate_function = terminate_subprocess 64 | cache_directory_path = os.path.abspath(cache_directory) 65 | os.makedirs(cache_directory_path, exist_ok=True) 66 | self._process_kwargs = { 67 | "future_queue": self._future_queue, 68 | "execute_function": execute_function, 69 | "cache_directory": cache_directory_path, 70 | "resource_dict": resource_dict, 71 | "terminate_function": terminate_function, 72 | "pysqa_config_directory": pysqa_config_directory, 73 | "backend": backend, 74 | "disable_dependencies": disable_dependencies, 75 | } 76 | self._set_process( 77 | Thread( 78 | target=execute_tasks_h5, 79 | kwargs=self._process_kwargs, 80 | ) 81 | ) 82 | 83 | 84 | def create_file_executor( 85 | max_workers: Optional[int] = None, 86 | backend: str = "flux_submission", 87 | max_cores: Optional[int] = None, 88 | cache_directory: Optional[str] = None, 89 | resource_dict: Optional[dict] = None, 90 | flux_executor=None, 91 | flux_executor_pmi_mode: Optional[str] = None, 92 | flux_executor_nesting: bool = False, 93 | flux_log_files: bool = False, 94 | pysqa_config_directory: Optional[str] = None, 95 | hostname_localhost: Optional[bool] = None, 96 | block_allocation: bool = False, 97 | init_function: Optional[Callable] = None, 98 | disable_dependencies: bool = False, 99 | ): 100 | if cache_directory is None: 101 | cache_directory = "executorlib_cache" 102 | if block_allocation: 103 | raise ValueError( 104 | "The option block_allocation is not available with the pysqa based backend." 105 | ) 106 | if init_function is not None: 107 | raise ValueError( 108 | "The option to specify an init_function is not available with the pysqa based backend." 109 | ) 110 | check_flux_executor_pmi_mode(flux_executor_pmi_mode=flux_executor_pmi_mode) 111 | check_max_workers_and_cores(max_cores=max_cores, max_workers=max_workers) 112 | check_hostname_localhost(hostname_localhost=hostname_localhost) 113 | check_executor(executor=flux_executor) 114 | check_nested_flux_executor(nested_flux_executor=flux_executor_nesting) 115 | check_flux_log_files(flux_log_files=flux_log_files) 116 | return FileTaskScheduler( 117 | cache_directory=cache_directory, 118 | resource_dict=resource_dict, 119 | pysqa_config_directory=pysqa_config_directory, 120 | backend=backend.split("_submission")[0], 121 | disable_dependencies=disable_dependencies, 122 | ) 123 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/interactive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/executorlib/task_scheduler/interactive/__init__.py -------------------------------------------------------------------------------- /executorlib/task_scheduler/interactive/fluxspawner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | import flux 5 | import flux.job 6 | 7 | from executorlib.standalone.interactive.spawner import BaseSpawner 8 | 9 | 10 | def validate_max_workers(max_workers: int, cores: int, threads_per_core: int): 11 | handle = flux.Flux() 12 | cores_total = flux.resource.list.resource_list(handle).get().up.ncores 13 | cores_requested = max_workers * cores * threads_per_core 14 | if cores_total < cores_requested: 15 | raise ValueError( 16 | "The number of requested cores is larger than the available cores " 17 | + str(cores_total) 18 | + " < " 19 | + str(cores_requested) 20 | ) 21 | 22 | 23 | class FluxPythonSpawner(BaseSpawner): 24 | """ 25 | A class representing the FluxPythonInterface. 26 | 27 | Args: 28 | cwd (str, optional): The current working directory. Defaults to None. 29 | cores (int, optional): The number of cores. Defaults to 1. 30 | threads_per_core (int, optional): The number of threads per base. Defaults to 1. 31 | gpus_per_core (int, optional): The number of GPUs per base. Defaults to 0. 32 | num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None. 33 | exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to 34 | False. 35 | openmpi_oversubscribe (bool, optional): Whether to oversubscribe. Defaults to False. 36 | priority (int, optional): job urgency 0 (lowest) through 31 (highest) (default is 16). Priorities 0 through 15 37 | are restricted to the instance owner. 38 | flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None. 39 | flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None. 40 | flux_executor_nesting (bool, optional): Whether to use nested FluxExecutor. Defaults to False. 41 | flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False. 42 | """ 43 | 44 | def __init__( 45 | self, 46 | cwd: Optional[str] = None, 47 | cores: int = 1, 48 | threads_per_core: int = 1, 49 | gpus_per_core: int = 0, 50 | num_nodes: Optional[int] = None, 51 | exclusive: bool = False, 52 | priority: Optional[int] = None, 53 | openmpi_oversubscribe: bool = False, 54 | flux_executor: Optional[flux.job.FluxExecutor] = None, 55 | flux_executor_pmi_mode: Optional[str] = None, 56 | flux_executor_nesting: bool = False, 57 | flux_log_files: bool = False, 58 | ): 59 | super().__init__( 60 | cwd=cwd, 61 | cores=cores, 62 | openmpi_oversubscribe=openmpi_oversubscribe, 63 | ) 64 | self._threads_per_core = threads_per_core 65 | self._gpus_per_core = gpus_per_core 66 | self._num_nodes = num_nodes 67 | self._exclusive = exclusive 68 | self._flux_executor = flux_executor 69 | self._flux_executor_pmi_mode = flux_executor_pmi_mode 70 | self._flux_executor_nesting = flux_executor_nesting 71 | self._flux_log_files = flux_log_files 72 | self._priority = priority 73 | self._future = None 74 | 75 | def bootup( 76 | self, 77 | command_lst: list[str], 78 | ): 79 | """ 80 | Boot up the client process to connect to the SocketInterface. 81 | 82 | Args: 83 | command_lst (list[str]): List of strings to start the client process. 84 | Raises: 85 | ValueError: If oversubscribing is not supported for the Flux adapter or if conda environments are not supported. 86 | """ 87 | if self._openmpi_oversubscribe: 88 | raise ValueError( 89 | "Oversubscribing is currently not supported for the Flux adapter." 90 | ) 91 | if self._flux_executor is None: 92 | self._flux_executor = flux.job.FluxExecutor() 93 | if not self._flux_executor_nesting: 94 | jobspec = flux.job.JobspecV1.from_command( 95 | command=command_lst, 96 | num_tasks=self._cores, 97 | cores_per_task=self._threads_per_core, 98 | gpus_per_task=self._gpus_per_core, 99 | num_nodes=self._num_nodes, 100 | exclusive=self._exclusive, 101 | ) 102 | else: 103 | jobspec = flux.job.JobspecV1.from_nest_command( 104 | command=command_lst, 105 | num_slots=self._cores, 106 | cores_per_slot=self._threads_per_core, 107 | gpus_per_slot=self._gpus_per_core, 108 | num_nodes=self._num_nodes, 109 | exclusive=self._exclusive, 110 | ) 111 | jobspec.environment = dict(os.environ) 112 | if self._flux_executor_pmi_mode is not None: 113 | jobspec.setattr_shell_option("pmi", self._flux_executor_pmi_mode) 114 | if self._cwd is not None: 115 | jobspec.cwd = self._cwd 116 | if self._flux_log_files and self._cwd is not None: 117 | jobspec.stderr = os.path.join(self._cwd, "flux.err") 118 | jobspec.stdout = os.path.join(self._cwd, "flux.out") 119 | elif self._flux_log_files: 120 | jobspec.stderr = os.path.abspath("flux.err") 121 | jobspec.stdout = os.path.abspath("flux.out") 122 | if self._priority is not None: 123 | self._future = self._flux_executor.submit( 124 | jobspec=jobspec, urgency=self._priority 125 | ) 126 | else: 127 | self._future = self._flux_executor.submit(jobspec=jobspec) 128 | 129 | def shutdown(self, wait: bool = True): 130 | """ 131 | Shutdown the FluxPythonInterface. 132 | 133 | Args: 134 | wait (bool, optional): Whether to wait for the execution to complete. Defaults to True. 135 | """ 136 | if self._future is not None: 137 | if self.poll(): 138 | self._future.cancel() 139 | # The flux future objects are not instantly updated, 140 | # still showing running after cancel was called, 141 | # so we wait until the execution is completed. 142 | self._future.result() 143 | 144 | def poll(self): 145 | """ 146 | Check if the FluxPythonInterface is running. 147 | 148 | Returns: 149 | bool: True if the interface is running, False otherwise. 150 | """ 151 | return self._future is not None and not self._future.done() 152 | -------------------------------------------------------------------------------- /executorlib/task_scheduler/interactive/slurmspawner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | from executorlib.standalone.interactive.spawner import SubprocessSpawner 5 | 6 | SLURM_COMMAND = "srun" 7 | 8 | 9 | def validate_max_workers(max_workers: int, cores: int, threads_per_core: int): 10 | cores_total = int(os.environ["SLURM_NTASKS"]) * int( 11 | os.environ["SLURM_CPUS_PER_TASK"] 12 | ) 13 | cores_requested = max_workers * cores * threads_per_core 14 | if cores_total < cores_requested: 15 | raise ValueError( 16 | "The number of requested cores is larger than the available cores " 17 | + str(cores_total) 18 | + " < " 19 | + str(cores_requested) 20 | ) 21 | 22 | 23 | class SrunSpawner(SubprocessSpawner): 24 | def __init__( 25 | self, 26 | cwd: Optional[str] = None, 27 | cores: int = 1, 28 | threads_per_core: int = 1, 29 | gpus_per_core: int = 0, 30 | num_nodes: Optional[int] = None, 31 | exclusive: bool = False, 32 | openmpi_oversubscribe: bool = False, 33 | slurm_cmd_args: Optional[list[str]] = None, 34 | ): 35 | """ 36 | Srun interface implementation. 37 | 38 | Args: 39 | cwd (str, optional): The current working directory. Defaults to None. 40 | cores (int, optional): The number of cores to use. Defaults to 1. 41 | threads_per_core (int, optional): The number of threads per core. Defaults to 1. 42 | gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0. 43 | num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None. 44 | exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False. 45 | openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False. 46 | slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to []. 47 | """ 48 | super().__init__( 49 | cwd=cwd, 50 | cores=cores, 51 | openmpi_oversubscribe=openmpi_oversubscribe, 52 | threads_per_core=threads_per_core, 53 | ) 54 | self._gpus_per_core = gpus_per_core 55 | self._slurm_cmd_args = slurm_cmd_args 56 | self._num_nodes = num_nodes 57 | self._exclusive = exclusive 58 | 59 | def generate_command(self, command_lst: list[str]) -> list[str]: 60 | """ 61 | Generate the command list for the Srun interface. 62 | 63 | Args: 64 | command_lst (list[str]): The command list. 65 | 66 | Returns: 67 | list[str]: The generated command list. 68 | """ 69 | command_prepend_lst = generate_slurm_command( 70 | cores=self._cores, 71 | cwd=self._cwd, 72 | threads_per_core=self._threads_per_core, 73 | gpus_per_core=self._gpus_per_core, 74 | num_nodes=self._num_nodes, 75 | exclusive=self._exclusive, 76 | openmpi_oversubscribe=self._openmpi_oversubscribe, 77 | slurm_cmd_args=self._slurm_cmd_args, 78 | ) 79 | return super().generate_command( 80 | command_lst=command_prepend_lst + command_lst, 81 | ) 82 | 83 | 84 | def generate_slurm_command( 85 | cores: int, 86 | cwd: Optional[str], 87 | threads_per_core: int = 1, 88 | gpus_per_core: int = 0, 89 | num_nodes: Optional[int] = None, 90 | exclusive: bool = False, 91 | openmpi_oversubscribe: bool = False, 92 | slurm_cmd_args: Optional[list[str]] = None, 93 | ) -> list[str]: 94 | """ 95 | Generate the command list for the SLURM interface. 96 | 97 | Args: 98 | cores (int): The number of cores. 99 | cwd (str): The current working directory. 100 | threads_per_core (int, optional): The number of threads per core. Defaults to 1. 101 | gpus_per_core (int, optional): The number of GPUs per core. Defaults to 0. 102 | num_nodes (int, optional): The number of compute nodes to use for executing the task. Defaults to None. 103 | exclusive (bool): Whether to exclusively reserve the compute nodes, or allow sharing compute notes. Defaults to False. 104 | openmpi_oversubscribe (bool, optional): Whether to oversubscribe the cores. Defaults to False. 105 | slurm_cmd_args (list[str], optional): Additional command line arguments. Defaults to []. 106 | 107 | Returns: 108 | list[str]: The generated command list. 109 | """ 110 | command_prepend_lst = [SLURM_COMMAND, "-n", str(cores)] 111 | if cwd is not None: 112 | command_prepend_lst += ["-D", cwd] 113 | if num_nodes is not None: 114 | command_prepend_lst += ["-N", str(num_nodes)] 115 | if threads_per_core > 1: 116 | command_prepend_lst += ["--cpus-per-task=" + str(threads_per_core)] 117 | if gpus_per_core > 0: 118 | command_prepend_lst += ["--gpus-per-task=" + str(gpus_per_core)] 119 | if exclusive: 120 | command_prepend_lst += ["--exact"] 121 | if openmpi_oversubscribe: 122 | command_prepend_lst += ["--oversubscribe"] 123 | if slurm_cmd_args is not None and len(slurm_cmd_args) > 0: 124 | command_prepend_lst += slurm_cmd_args 125 | return command_prepend_lst 126 | -------------------------------------------------------------------------------- /notebooks/images/uml_executor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/notebooks/images/uml_executor.png -------------------------------------------------------------------------------- /notebooks/images/uml_spawner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/notebooks/images/uml_spawner.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "hatchling==1.27.0", 4 | "hatch-vcs==0.5.0", 5 | "cloudpickle==3.1.1", 6 | "pyzmq==26.4.0", 7 | ] 8 | build-backend = "hatchling.build" 9 | 10 | [project] 11 | name = "executorlib" 12 | description = "Up-scale python functions for high performance computing (HPC) with executorlib." 13 | authors = [ 14 | { name = "Jan Janssen", email = "janssen@lanl.gov" }, 15 | ] 16 | readme = "README.md" 17 | license = { file = "LICENSE" } 18 | keywords = ["high performance computing", "hpc", "task scheduler", "slurm", "flux-framework", "executor"] 19 | requires-python = ">=3.9, <3.14" 20 | classifiers = [ 21 | "Development Status :: 5 - Production/Stable", 22 | "Topic :: Scientific/Engineering :: Physics", 23 | "License :: OSI Approved :: BSD License", 24 | "Intended Audience :: Science/Research", 25 | "Operating System :: OS Independent", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | ] 32 | dependencies = [ 33 | "cloudpickle==3.1.1", 34 | "pyzmq==26.4.0", 35 | ] 36 | dynamic = ["version"] 37 | 38 | [project.urls] 39 | Homepage = "https://github.com/pyiron/executorlib" 40 | Documentation = "https://executorlib.readthedocs.io" 41 | Repository = "https://github.com/pyiron/executorlib" 42 | 43 | [project.optional-dependencies] 44 | cache = ["h5py==3.13.0"] 45 | graph = [ 46 | "pygraphviz==1.14", 47 | "networkx==3.4.2", 48 | ] 49 | graphnotebook = [ 50 | "pygraphviz==1.14", 51 | "networkx==3.4.2", 52 | "ipython==9.0.2", 53 | ] 54 | mpi = ["mpi4py==4.0.1"] 55 | cluster = [ 56 | "pysqa==0.2.6", 57 | "h5py==3.13.0", 58 | ] 59 | all = [ 60 | "mpi4py==4.0.1", 61 | "pysqa==0.2.6", 62 | "h5py==3.13.0", 63 | "pygraphviz==1.14", 64 | "networkx==3.4.2", 65 | "ipython==9.0.2", 66 | ] 67 | 68 | [tool.ruff] 69 | exclude = [".ci_support", "docs", "notebooks", "tests", "setup.py", "_version.py"] 70 | 71 | [tool.ruff.lint] 72 | select = [ 73 | # pycodestyle 74 | "E", 75 | # Pyflakes 76 | "F", 77 | # pyupgrade 78 | "UP", 79 | # flake8-bugbear 80 | "B", 81 | # flake8-simplify 82 | "SIM", 83 | # isort 84 | "I", 85 | # flake8-comprehensions 86 | "C4", 87 | # eradicate 88 | "ERA", 89 | # pylint 90 | "PL", 91 | ] 92 | ignore = [ 93 | # ignore line-length violations 94 | "E501", 95 | # Too many arguments in function definition 96 | "PLR0913", 97 | # Magic value used in comparison 98 | "PLR2004", 99 | # Too many branches 100 | "PLR0912", 101 | ] 102 | 103 | [tool.hatch.build] 104 | include = [ 105 | "executorlib" 106 | ] 107 | 108 | [tool.hatch.build.hooks.vcs] 109 | version-file = "executorlib/_version.py" 110 | 111 | [tool.hatch.build.targets.sdist] 112 | include = [ 113 | "executorlib" 114 | ] 115 | 116 | [tool.hatch.build.targets.wheel] 117 | packages = [ 118 | "executorlib" 119 | ] 120 | 121 | [tool.hatch.version] 122 | source = "vcs" 123 | path = "executorlib/_version.py" -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyiron/executorlib/1c81ec93add5d5f89353ced385618325b9805f1a/tests/__init__.py -------------------------------------------------------------------------------- /tests/benchmark/llh.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from time import time 3 | 4 | 5 | def llh_numpy(mean, sigma): 6 | import numpy 7 | 8 | data = numpy.random.normal(size=100000000).astype("float64") 9 | s = (data - mean) ** 2 / (2 * (sigma**2)) 10 | pdfs = numpy.exp(-s) 11 | pdfs /= numpy.sqrt(2 * numpy.pi) * sigma 12 | return numpy.log(pdfs).sum() 13 | 14 | 15 | def run_with_executor(executor=None, mean=0.1, sigma=1.1, runs=32, **kwargs): 16 | with executor(**kwargs) as exe: 17 | future_lst = [ 18 | exe.submit(llh_numpy, mean=mean, sigma=sigma) for i in range(runs) 19 | ] 20 | return [f.result() for f in future_lst] 21 | 22 | 23 | def run_static(mean=0.1, sigma=1.1, runs=32): 24 | return [llh_numpy(mean=mean, sigma=sigma) for i in range(runs)] 25 | 26 | 27 | if __name__ == "__main__": 28 | run_mode = sys.argv[1] 29 | start_time = time() 30 | if run_mode == "static": 31 | run_static(mean=0.1, sigma=1.1, runs=32) 32 | elif run_mode == "process": 33 | from concurrent.futures import ProcessPoolExecutor 34 | 35 | run_with_executor( 36 | executor=ProcessPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4 37 | ) 38 | elif run_mode == "thread": 39 | from concurrent.futures import ThreadPoolExecutor 40 | 41 | run_with_executor( 42 | executor=ThreadPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4 43 | ) 44 | elif run_mode == "block_allocation": 45 | from executorlib import SingleNodeExecutor 46 | 47 | run_with_executor( 48 | executor=SingleNodeExecutor, 49 | mean=0.1, 50 | sigma=1.1, 51 | runs=32, 52 | max_cores=4, 53 | block_allocation=True, 54 | ) 55 | elif run_mode == "executorlib": 56 | from executorlib import SingleNodeExecutor 57 | 58 | run_with_executor( 59 | executor=SingleNodeExecutor, 60 | mean=0.1, 61 | sigma=1.1, 62 | runs=32, 63 | max_cores=4, 64 | block_allocation=False, 65 | ) 66 | elif run_mode == "flux": 67 | from executorlib import FluxJobExecutor 68 | 69 | run_with_executor( 70 | executor=FluxJobExecutor, 71 | mean=0.1, 72 | sigma=1.1, 73 | runs=32, 74 | max_cores=4, 75 | block_allocation=True, 76 | ) 77 | elif run_mode == "mpi4py": 78 | from mpi4py.futures import MPIPoolExecutor 79 | 80 | run_with_executor( 81 | executor=MPIPoolExecutor, mean=0.1, sigma=1.1, runs=32, max_workers=4 82 | ) 83 | else: 84 | raise ValueError(run_mode) 85 | stop_time = time() 86 | print(run_mode, stop_time - start_time) 87 | -------------------------------------------------------------------------------- /tests/benchmark/test_results.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestResults(unittest.TestCase): 5 | def test_result(self): 6 | with open("timing.log") as f: 7 | content = f.readlines() 8 | timing_dict = {l.split()[0]: float(l.split()[1]) for l in content} 9 | self.assertEqual(min(timing_dict, key=timing_dict.get), "process") 10 | self.assertEqual(max(timing_dict, key=timing_dict.get), "static") 11 | self.assertTrue(timing_dict["process"] < timing_dict["executorlib"]) 12 | self.assertTrue(timing_dict["block_allocation"] < timing_dict["process"] * 1.1) 13 | self.assertTrue(timing_dict["executorlib"] < timing_dict["process"] * 1.35) 14 | self.assertTrue(timing_dict["process"] < timing_dict["mpi4py"]) 15 | self.assertTrue(timing_dict["block_allocation"] < timing_dict["mpi4py"]) 16 | self.assertTrue(timing_dict["mpi4py"] < timing_dict["process"] * 1.15) 17 | self.assertTrue(timing_dict["thread"] < timing_dict["static"]) 18 | self.assertTrue(timing_dict["mpi4py"] < timing_dict["thread"]) 19 | -------------------------------------------------------------------------------- /tests/executables/count.py: -------------------------------------------------------------------------------- 1 | def count(iterations): 2 | for i in range(int(iterations)): 3 | print(i) 4 | print("done") 5 | 6 | 7 | if __name__ == "__main__": 8 | while True: 9 | user_input = input() 10 | if "shutdown" in user_input: 11 | break 12 | else: 13 | count(iterations=int(user_input)) 14 | -------------------------------------------------------------------------------- /tests/test_backend_interactive_serial.py: -------------------------------------------------------------------------------- 1 | from threading import Thread 2 | import unittest 3 | 4 | import cloudpickle 5 | import zmq 6 | 7 | from executorlib.backend.interactive_serial import main 8 | 9 | 10 | def calc(i, j): 11 | return i + j 12 | 13 | 14 | def set_global(): 15 | return {"j": 5} 16 | 17 | 18 | def submit(socket): 19 | socket.send( 20 | cloudpickle.dumps({"init": True, "fn": set_global, "args": (), "kwargs": {}}) 21 | ) 22 | socket.send(cloudpickle.dumps({"fn": calc, "args": (), "kwargs": {"i": 2}})) 23 | socket.send(cloudpickle.dumps({"shutdown": True, "wait": True})) 24 | 25 | 26 | def submit_error(socket): 27 | socket.send( 28 | cloudpickle.dumps({"init": True, "fn": set_global, "args": (), "kwargs": {}}) 29 | ) 30 | socket.send(cloudpickle.dumps({"fn": calc, "args": (), "kwargs": {}})) 31 | socket.send(cloudpickle.dumps({"shutdown": True, "wait": True})) 32 | 33 | 34 | class TestSerial(unittest.TestCase): 35 | def test_main_as_thread(self): 36 | context = zmq.Context() 37 | socket = context.socket(zmq.PAIR) 38 | port = socket.bind_to_random_port("tcp://*") 39 | t = Thread(target=main, kwargs={"argument_lst": ["--zmqport", str(port)]}) 40 | t.start() 41 | submit(socket=socket) 42 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": 7}) 43 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True}) 44 | socket.close() 45 | context.term() 46 | 47 | def test_main_as_thread_error(self): 48 | context = zmq.Context() 49 | socket = context.socket(zmq.PAIR) 50 | port = socket.bind_to_random_port("tcp://*") 51 | t = Thread(target=main, kwargs={"argument_lst": ["--zmqport", str(port)]}) 52 | t.start() 53 | submit_error(socket=socket) 54 | self.assertEqual( 55 | str(type(cloudpickle.loads(socket.recv())["error"])), "" 56 | ) 57 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True}) 58 | socket.close() 59 | context.term() 60 | 61 | def test_submit_as_thread(self): 62 | context = zmq.Context() 63 | socket = context.socket(zmq.PAIR) 64 | port = socket.bind_to_random_port("tcp://*") 65 | t = Thread(target=submit, kwargs={"socket": socket}) 66 | t.start() 67 | main(argument_lst=["--zmqport", str(port)]) 68 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": 7}) 69 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True}) 70 | socket.close() 71 | context.term() 72 | 73 | def test_submit_as_thread_error(self): 74 | context = zmq.Context() 75 | socket = context.socket(zmq.PAIR) 76 | port = socket.bind_to_random_port("tcp://*") 77 | t = Thread(target=submit_error, kwargs={"socket": socket}) 78 | t.start() 79 | main(argument_lst=["--zmqport", str(port)]) 80 | self.assertEqual( 81 | str(type(cloudpickle.loads(socket.recv())["error"])), "" 82 | ) 83 | self.assertEqual(cloudpickle.loads(socket.recv()), {"result": True}) 84 | socket.close() 85 | context.term() 86 | -------------------------------------------------------------------------------- /tests/test_base_executor_queue.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future, CancelledError 2 | from queue import Queue 3 | import unittest 4 | 5 | from executorlib.standalone.queue import cancel_items_in_queue 6 | 7 | 8 | class TestQueue(unittest.TestCase): 9 | def test_cancel_items_in_queue(self): 10 | q = Queue() 11 | fs1 = Future() 12 | fs2 = Future() 13 | q.put({"future": fs1}) 14 | q.put({"future": fs2}) 15 | cancel_items_in_queue(que=q) 16 | self.assertEqual(q.qsize(), 0) 17 | self.assertTrue(fs1.done()) 18 | with self.assertRaises(CancelledError): 19 | self.assertTrue(fs1.result()) 20 | self.assertTrue(fs2.done()) 21 | with self.assertRaises(CancelledError): 22 | self.assertTrue(fs2.result()) 23 | q.join() 24 | -------------------------------------------------------------------------------- /tests/test_cache_backend_execute.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import os 3 | import shutil 4 | import unittest 5 | 6 | 7 | try: 8 | from executorlib.task_scheduler.file.backend import backend_execute_task_in_file 9 | from executorlib.task_scheduler.file.shared import _check_task_output, FutureItem 10 | from executorlib.task_scheduler.file.hdf import dump, get_runtime 11 | from executorlib.standalone.serialize import serialize_funct_h5 12 | 13 | skip_h5io_test = False 14 | except ImportError: 15 | skip_h5io_test = True 16 | 17 | 18 | def my_funct(a, b): 19 | return a + b 20 | 21 | 22 | def get_error(a): 23 | raise ValueError(a) 24 | 25 | 26 | @unittest.skipIf( 27 | skip_h5io_test, "h5io is not installed, so the h5io tests are skipped." 28 | ) 29 | class TestSharedFunctions(unittest.TestCase): 30 | def test_execute_function_mixed(self): 31 | cache_directory = os.path.abspath("cache") 32 | os.makedirs(cache_directory, exist_ok=True) 33 | task_key, data_dict = serialize_funct_h5( 34 | fn=my_funct, 35 | fn_args=[1], 36 | fn_kwargs={"b": 2}, 37 | ) 38 | file_name = os.path.join(cache_directory, task_key, "cache.h5in") 39 | os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True) 40 | dump(file_name=file_name, data_dict=data_dict) 41 | backend_execute_task_in_file(file_name=file_name) 42 | future_obj = Future() 43 | _check_task_output( 44 | task_key=task_key, future_obj=future_obj, cache_directory=cache_directory 45 | ) 46 | self.assertTrue(future_obj.done()) 47 | self.assertEqual(future_obj.result(), 3) 48 | self.assertTrue( 49 | get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out")) 50 | > 0.0 51 | ) 52 | future_file_obj = FutureItem( 53 | file_name=os.path.join(cache_directory, task_key, "cache.h5out") 54 | ) 55 | self.assertTrue(future_file_obj.done()) 56 | self.assertEqual(future_file_obj.result(), 3) 57 | 58 | def test_execute_function_args(self): 59 | cache_directory = os.path.abspath("cache") 60 | os.makedirs(cache_directory, exist_ok=True) 61 | task_key, data_dict = serialize_funct_h5( 62 | fn=my_funct, 63 | fn_args=[1, 2], 64 | fn_kwargs={}, 65 | ) 66 | file_name = os.path.join(cache_directory, task_key, "cache.h5in") 67 | os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True) 68 | dump(file_name=file_name, data_dict=data_dict) 69 | backend_execute_task_in_file(file_name=file_name) 70 | future_obj = Future() 71 | _check_task_output( 72 | task_key=task_key, future_obj=future_obj, cache_directory=cache_directory 73 | ) 74 | self.assertTrue(future_obj.done()) 75 | self.assertEqual(future_obj.result(), 3) 76 | self.assertTrue( 77 | get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out")) 78 | > 0.0 79 | ) 80 | future_file_obj = FutureItem( 81 | file_name=os.path.join(cache_directory, task_key, "cache.h5out") 82 | ) 83 | self.assertTrue(future_file_obj.done()) 84 | self.assertEqual(future_file_obj.result(), 3) 85 | 86 | def test_execute_function_kwargs(self): 87 | cache_directory = os.path.abspath("cache") 88 | os.makedirs(cache_directory, exist_ok=True) 89 | task_key, data_dict = serialize_funct_h5( 90 | fn=my_funct, 91 | fn_args=[], 92 | fn_kwargs={"a": 1, "b": 2}, 93 | ) 94 | file_name = os.path.join(cache_directory, task_key, "cache.h5in") 95 | os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True) 96 | dump(file_name=file_name, data_dict=data_dict) 97 | backend_execute_task_in_file(file_name=file_name) 98 | future_obj = Future() 99 | _check_task_output( 100 | task_key=task_key, future_obj=future_obj, cache_directory=cache_directory 101 | ) 102 | self.assertTrue(future_obj.done()) 103 | self.assertEqual(future_obj.result(), 3) 104 | self.assertTrue( 105 | get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out")) 106 | > 0.0 107 | ) 108 | future_file_obj = FutureItem( 109 | file_name=os.path.join(cache_directory, task_key, "cache.h5out") 110 | ) 111 | self.assertTrue(future_file_obj.done()) 112 | self.assertEqual(future_file_obj.result(), 3) 113 | 114 | def test_execute_function_error(self): 115 | cache_directory = os.path.abspath("cache") 116 | os.makedirs(cache_directory, exist_ok=True) 117 | task_key, data_dict = serialize_funct_h5( 118 | fn=get_error, 119 | fn_args=[], 120 | fn_kwargs={"a": 1}, 121 | ) 122 | file_name = os.path.join(cache_directory, task_key, "cache.h5in") 123 | os.makedirs(os.path.join(cache_directory, task_key), exist_ok=True) 124 | dump(file_name=file_name, data_dict=data_dict) 125 | backend_execute_task_in_file(file_name=file_name) 126 | future_obj = Future() 127 | _check_task_output( 128 | task_key=task_key, future_obj=future_obj, cache_directory=cache_directory 129 | ) 130 | self.assertTrue(future_obj.done()) 131 | with self.assertRaises(ValueError): 132 | future_obj.result() 133 | self.assertTrue( 134 | get_runtime(file_name=os.path.join(cache_directory, task_key, "cache.h5out")) 135 | > 0.0 136 | ) 137 | future_file_obj = FutureItem( 138 | file_name=os.path.join(cache_directory, task_key, "cache.h5out") 139 | ) 140 | self.assertTrue(future_file_obj.done()) 141 | with self.assertRaises(ValueError): 142 | future_file_obj.result() 143 | 144 | def tearDown(self): 145 | if os.path.exists("cache"): 146 | shutil.rmtree("cache") 147 | -------------------------------------------------------------------------------- /tests/test_cache_fileexecutor_mpi.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import os 3 | import shutil 4 | import unittest 5 | 6 | from executorlib.task_scheduler.file.subprocess_spawner import execute_in_subprocess 7 | 8 | 9 | try: 10 | from executorlib.task_scheduler.file.task_scheduler import FileTaskScheduler 11 | 12 | skip_h5py_test = False 13 | except ImportError: 14 | skip_h5py_test = True 15 | 16 | 17 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None 18 | 19 | 20 | def mpi_funct(i): 21 | from mpi4py import MPI 22 | 23 | size = MPI.COMM_WORLD.Get_size() 24 | rank = MPI.COMM_WORLD.Get_rank() 25 | return i, size, rank 26 | 27 | 28 | @unittest.skipIf( 29 | skip_h5py_test or skip_mpi4py_test, 30 | "h5py or mpi4py are not installed, so the h5py and mpi4py tests are skipped.", 31 | ) 32 | class TestCacheExecutorMPI(unittest.TestCase): 33 | def test_executor(self): 34 | with FileTaskScheduler( 35 | resource_dict={"cores": 2}, execute_function=execute_in_subprocess 36 | ) as exe: 37 | fs1 = exe.submit(mpi_funct, 1) 38 | self.assertFalse(fs1.done()) 39 | self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)]) 40 | self.assertTrue(fs1.done()) 41 | 42 | def tearDown(self): 43 | if os.path.exists("cache"): 44 | shutil.rmtree("cache") 45 | -------------------------------------------------------------------------------- /tests/test_cache_fileexecutor_serial.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import os 3 | from queue import Queue 4 | import shutil 5 | import unittest 6 | from threading import Thread 7 | 8 | from executorlib.task_scheduler.file.subprocess_spawner import ( 9 | execute_in_subprocess, 10 | terminate_subprocess, 11 | ) 12 | 13 | try: 14 | from executorlib.task_scheduler.file.task_scheduler import FileTaskScheduler, create_file_executor 15 | from executorlib.task_scheduler.file.shared import execute_tasks_h5 16 | 17 | skip_h5py_test = False 18 | except ImportError: 19 | skip_h5py_test = True 20 | 21 | 22 | def my_funct(a, b): 23 | return a + b 24 | 25 | 26 | def list_files_in_working_directory(): 27 | return os.listdir(os.getcwd()) 28 | 29 | 30 | def get_error(a): 31 | raise ValueError(a) 32 | 33 | 34 | @unittest.skipIf( 35 | skip_h5py_test, "h5py is not installed, so the h5py tests are skipped." 36 | ) 37 | class TestCacheExecutorSerial(unittest.TestCase): 38 | def test_executor_mixed(self): 39 | with FileTaskScheduler(execute_function=execute_in_subprocess) as exe: 40 | fs1 = exe.submit(my_funct, 1, b=2) 41 | self.assertFalse(fs1.done()) 42 | self.assertEqual(fs1.result(), 3) 43 | self.assertTrue(fs1.done()) 44 | 45 | def test_executor_dependence_mixed(self): 46 | with FileTaskScheduler(execute_function=execute_in_subprocess) as exe: 47 | fs1 = exe.submit(my_funct, 1, b=2) 48 | fs2 = exe.submit(my_funct, 1, b=fs1) 49 | self.assertFalse(fs2.done()) 50 | self.assertEqual(fs2.result(), 4) 51 | self.assertTrue(fs2.done()) 52 | 53 | def test_create_file_executor_error(self): 54 | with self.assertRaises(ValueError): 55 | create_file_executor(block_allocation=True) 56 | with self.assertRaises(ValueError): 57 | create_file_executor(init_function=True) 58 | 59 | def test_executor_dependence_error(self): 60 | with self.assertRaises(ValueError): 61 | with FileTaskScheduler( 62 | execute_function=execute_in_subprocess, disable_dependencies=True 63 | ) as exe: 64 | fs = exe.submit(my_funct, 1, b=exe.submit(my_funct, 1, b=2)) 65 | fs.result() 66 | 67 | def test_executor_working_directory(self): 68 | cwd = os.path.join(os.path.dirname(__file__), "executables") 69 | with FileTaskScheduler( 70 | resource_dict={"cwd": cwd}, execute_function=execute_in_subprocess 71 | ) as exe: 72 | fs1 = exe.submit(list_files_in_working_directory) 73 | self.assertEqual(fs1.result(), os.listdir(cwd)) 74 | 75 | def test_executor_error(self): 76 | cwd = os.path.join(os.path.dirname(__file__), "executables") 77 | with FileTaskScheduler( 78 | resource_dict={"cwd": cwd}, execute_function=execute_in_subprocess 79 | ) as exe: 80 | fs1 = exe.submit(get_error, a=1) 81 | with self.assertRaises(ValueError): 82 | fs1.result() 83 | 84 | def test_executor_function(self): 85 | fs1 = Future() 86 | q = Queue() 87 | q.put( 88 | { 89 | "fn": my_funct, 90 | "args": (), 91 | "kwargs": {"a": 1, "b": 2}, 92 | "future": fs1, 93 | "resource_dict": {}, 94 | } 95 | ) 96 | cache_dir = os.path.abspath("cache") 97 | os.makedirs(cache_dir, exist_ok=True) 98 | process = Thread( 99 | target=execute_tasks_h5, 100 | kwargs={ 101 | "future_queue": q, 102 | "cache_directory": cache_dir, 103 | "execute_function": execute_in_subprocess, 104 | "resource_dict": {"cores": 1, "cwd": None}, 105 | "terminate_function": terminate_subprocess, 106 | }, 107 | ) 108 | process.start() 109 | self.assertFalse(fs1.done()) 110 | self.assertEqual(fs1.result(), 3) 111 | self.assertTrue(fs1.done()) 112 | q.put({"shutdown": True, "wait": True}) 113 | process.join() 114 | 115 | def test_executor_function_dependence_kwargs(self): 116 | fs1 = Future() 117 | fs2 = Future() 118 | q = Queue() 119 | q.put( 120 | { 121 | "fn": my_funct, 122 | "args": (), 123 | "kwargs": {"a": 1, "b": 2}, 124 | "future": fs1, 125 | "resource_dict": {}, 126 | } 127 | ) 128 | q.put( 129 | { 130 | "fn": my_funct, 131 | "args": (), 132 | "kwargs": {"a": 1, "b": fs1}, 133 | "future": fs2, 134 | "resource_dict": {}, 135 | } 136 | ) 137 | cache_dir = os.path.abspath("cache") 138 | os.makedirs(cache_dir, exist_ok=True) 139 | process = Thread( 140 | target=execute_tasks_h5, 141 | kwargs={ 142 | "future_queue": q, 143 | "cache_directory": cache_dir, 144 | "execute_function": execute_in_subprocess, 145 | "resource_dict": {"cores": 1, "cwd": None}, 146 | "terminate_function": terminate_subprocess, 147 | }, 148 | ) 149 | process.start() 150 | self.assertFalse(fs2.done()) 151 | self.assertEqual(fs2.result(), 4) 152 | self.assertTrue(fs2.done()) 153 | q.put({"shutdown": True, "wait": True}) 154 | process.join() 155 | 156 | def test_executor_function_dependence_args(self): 157 | fs1 = Future() 158 | fs2 = Future() 159 | q = Queue() 160 | q.put( 161 | { 162 | "fn": my_funct, 163 | "args": (), 164 | "kwargs": {"a": 1, "b": 2}, 165 | "future": fs1, 166 | "resource_dict": {}, 167 | } 168 | ) 169 | q.put( 170 | { 171 | "fn": my_funct, 172 | "args": [fs1], 173 | "kwargs": {"b": 2}, 174 | "future": fs2, 175 | "resource_dict": {}, 176 | } 177 | ) 178 | cache_dir = os.path.abspath("cache") 179 | os.makedirs(cache_dir, exist_ok=True) 180 | process = Thread( 181 | target=execute_tasks_h5, 182 | kwargs={ 183 | "future_queue": q, 184 | "cache_directory": cache_dir, 185 | "execute_function": execute_in_subprocess, 186 | "resource_dict": {"cores": 1}, 187 | "terminate_function": terminate_subprocess, 188 | }, 189 | ) 190 | process.start() 191 | self.assertFalse(fs2.done()) 192 | self.assertEqual(fs2.result(), 5) 193 | self.assertTrue(fs2.done()) 194 | q.put({"shutdown": True, "wait": True}) 195 | process.join() 196 | 197 | def test_execute_in_subprocess_errors(self): 198 | with self.assertRaises(ValueError): 199 | execute_in_subprocess( 200 | file_name=__file__, command=[], config_directory="test" 201 | ) 202 | with self.assertRaises(ValueError): 203 | execute_in_subprocess(file_name=__file__, command=[], backend="flux") 204 | 205 | def tearDown(self): 206 | if os.path.exists("cache"): 207 | shutil.rmtree("cache") 208 | -------------------------------------------------------------------------------- /tests/test_fluxclusterexecutor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | import unittest 4 | import shutil 5 | 6 | from executorlib import FluxClusterExecutor 7 | from executorlib.standalone.serialize import cloudpickle_register 8 | 9 | try: 10 | import flux.job 11 | 12 | skip_flux_test = "FLUX_URI" not in os.environ 13 | pmi = os.environ.get("EXECUTORLIB_PMIX", None) 14 | except ImportError: 15 | skip_flux_test = True 16 | 17 | 18 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None 19 | 20 | 21 | def mpi_funct(i): 22 | from mpi4py import MPI 23 | 24 | size = MPI.COMM_WORLD.Get_size() 25 | rank = MPI.COMM_WORLD.Get_rank() 26 | return i, size, rank 27 | 28 | 29 | @unittest.skipIf( 30 | skip_flux_test or skip_mpi4py_test, 31 | "h5py or mpi4py or flux are not installed, so the h5py, flux and mpi4py tests are skipped.", 32 | ) 33 | class TestCacheExecutorPysqa(unittest.TestCase): 34 | def test_executor(self): 35 | with FluxClusterExecutor( 36 | resource_dict={"cores": 2, "cwd": "cache"}, 37 | block_allocation=False, 38 | cache_directory="cache", 39 | ) as exe: 40 | cloudpickle_register(ind=1) 41 | fs1 = exe.submit(mpi_funct, 1) 42 | self.assertFalse(fs1.done()) 43 | self.assertEqual(fs1.result(), [(1, 2, 0), (1, 2, 1)]) 44 | self.assertTrue(fs1.done()) 45 | 46 | def tearDown(self): 47 | if os.path.exists("cache"): 48 | shutil.rmtree("cache") 49 | -------------------------------------------------------------------------------- /tests/test_fluxjobexecutor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | from executorlib import FluxJobExecutor 7 | 8 | 9 | try: 10 | import flux.job 11 | from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner 12 | 13 | skip_flux_test = "FLUX_URI" not in os.environ 14 | pmi = os.environ.get("EXECUTORLIB_PMIX", None) 15 | except ImportError: 16 | skip_flux_test = True 17 | 18 | 19 | def calc(i): 20 | return i 21 | 22 | 23 | def mpi_funct(i): 24 | from mpi4py import MPI 25 | 26 | size = MPI.COMM_WORLD.Get_size() 27 | rank = MPI.COMM_WORLD.Get_rank() 28 | return i, size, rank 29 | 30 | 31 | def get_global(memory=None): 32 | return memory 33 | 34 | 35 | def set_global(): 36 | return {"memory": np.array([5])} 37 | 38 | 39 | @unittest.skipIf( 40 | skip_flux_test, "Flux is not installed, so the flux tests are skipped." 41 | ) 42 | class TestFluxBackend(unittest.TestCase): 43 | def setUp(self): 44 | self.executor = flux.job.FluxExecutor() 45 | 46 | def test_flux_executor_serial(self): 47 | with FluxJobExecutor( 48 | max_cores=2, 49 | flux_executor=self.executor, 50 | block_allocation=True, 51 | ) as exe: 52 | fs_1 = exe.submit(calc, 1) 53 | fs_2 = exe.submit(calc, 2) 54 | self.assertEqual(fs_1.result(), 1) 55 | self.assertEqual(fs_2.result(), 2) 56 | self.assertTrue(fs_1.done()) 57 | self.assertTrue(fs_2.done()) 58 | 59 | def test_flux_executor_serial_no_depencies(self): 60 | with FluxJobExecutor( 61 | max_cores=2, 62 | flux_executor=self.executor, 63 | block_allocation=True, 64 | disable_dependencies=True, 65 | ) as exe: 66 | fs_1 = exe.submit(calc, 1) 67 | fs_2 = exe.submit(calc, 2) 68 | self.assertEqual(fs_1.result(), 1) 69 | self.assertEqual(fs_2.result(), 2) 70 | self.assertTrue(fs_1.done()) 71 | self.assertTrue(fs_2.done()) 72 | 73 | def test_flux_executor_threads(self): 74 | with FluxJobExecutor( 75 | max_cores=1, 76 | resource_dict={"threads_per_core": 2}, 77 | flux_executor=self.executor, 78 | block_allocation=True, 79 | ) as exe: 80 | fs_1 = exe.submit(calc, 1) 81 | fs_2 = exe.submit(calc, 2) 82 | self.assertEqual(fs_1.result(), 1) 83 | self.assertEqual(fs_2.result(), 2) 84 | self.assertTrue(fs_1.done()) 85 | self.assertTrue(fs_2.done()) 86 | 87 | def test_flux_executor_parallel(self): 88 | with FluxJobExecutor( 89 | max_cores=2, 90 | resource_dict={"cores": 2}, 91 | flux_executor=self.executor, 92 | block_allocation=True, 93 | flux_executor_pmi_mode=pmi, 94 | ) as exe: 95 | fs_1 = exe.submit(mpi_funct, 1) 96 | self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)]) 97 | self.assertTrue(fs_1.done()) 98 | 99 | def test_single_task(self): 100 | with FluxJobExecutor( 101 | max_cores=2, 102 | resource_dict={"cores": 2}, 103 | flux_executor=self.executor, 104 | block_allocation=True, 105 | flux_executor_pmi_mode=pmi, 106 | ) as p: 107 | output = p.map(mpi_funct, [1, 2, 3]) 108 | self.assertEqual( 109 | list(output), 110 | [[(1, 2, 0), (1, 2, 1)], [(2, 2, 0), (2, 2, 1)], [(3, 2, 0), (3, 2, 1)]], 111 | ) 112 | 113 | def test_output_files_cwd(self): 114 | dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 115 | os.makedirs(dirname, exist_ok=True) 116 | file_stdout = os.path.join(dirname, "flux.out") 117 | file_stderr = os.path.join(dirname, "flux.err") 118 | with FluxJobExecutor( 119 | max_cores=1, 120 | resource_dict={"cores": 1, "cwd": dirname}, 121 | flux_executor=self.executor, 122 | block_allocation=True, 123 | flux_log_files=True, 124 | ) as p: 125 | output = p.map(calc, [1, 2, 3]) 126 | self.assertEqual( 127 | list(output), 128 | [1, 2, 3], 129 | ) 130 | self.assertTrue(os.path.exists(file_stdout)) 131 | self.assertTrue(os.path.exists(file_stderr)) 132 | os.remove(file_stdout) 133 | os.remove(file_stderr) 134 | 135 | def test_output_files_abs(self): 136 | file_stdout = os.path.abspath("flux.out") 137 | file_stderr = os.path.abspath("flux.err") 138 | with FluxJobExecutor( 139 | max_cores=1, 140 | resource_dict={"cores": 1}, 141 | flux_executor=self.executor, 142 | block_allocation=True, 143 | flux_log_files=True, 144 | ) as p: 145 | output = p.map(calc, [1, 2, 3]) 146 | self.assertEqual( 147 | list(output), 148 | [1, 2, 3], 149 | ) 150 | self.assertTrue(os.path.exists(file_stdout)) 151 | self.assertTrue(os.path.exists(file_stderr)) 152 | os.remove(file_stdout) 153 | os.remove(file_stderr) 154 | 155 | def test_internal_memory(self): 156 | with FluxJobExecutor( 157 | max_cores=1, 158 | resource_dict={"cores": 1}, 159 | init_function=set_global, 160 | flux_executor=self.executor, 161 | block_allocation=True, 162 | ) as p: 163 | f = p.submit(get_global) 164 | self.assertFalse(f.done()) 165 | self.assertEqual(f.result(), np.array([5])) 166 | self.assertTrue(f.done()) 167 | 168 | def test_validate_max_workers(self): 169 | with self.assertRaises(ValueError): 170 | FluxJobExecutor( 171 | max_workers=10, 172 | resource_dict={"cores": 10, "threads_per_core": 10}, 173 | flux_executor=self.executor, 174 | block_allocation=True, 175 | ) 176 | -------------------------------------------------------------------------------- /tests/test_fluxjobexecutor_plot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from time import sleep 4 | 5 | from executorlib import FluxJobExecutor, FluxClusterExecutor 6 | from executorlib.standalone.plot import generate_nodes_and_edges 7 | from executorlib.standalone.serialize import cloudpickle_register 8 | 9 | 10 | try: 11 | import pygraphviz 12 | import flux.job 13 | from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner 14 | 15 | skip_graphviz_flux_test = "FLUX_URI" not in os.environ 16 | except ImportError: 17 | skip_graphviz_flux_test = True 18 | 19 | 20 | def add_function(parameter_1, parameter_2): 21 | sleep(0.2) 22 | return parameter_1 + parameter_2 23 | 24 | 25 | def generate_tasks(length): 26 | sleep(0.2) 27 | return range(length) 28 | 29 | 30 | def calc_from_lst(lst, ind, parameter): 31 | sleep(0.2) 32 | return lst[ind] + parameter 33 | 34 | 35 | def merge(lst): 36 | sleep(0.2) 37 | return sum(lst) 38 | 39 | 40 | @unittest.skipIf( 41 | skip_graphviz_flux_test, 42 | "Either graphviz or flux are not installed, so the plot_dependency_graph tests are skipped.", 43 | ) 44 | class TestFluxAllocationExecutorWithDependencies(unittest.TestCase): 45 | def test_executor_dependency_plot(self): 46 | with FluxJobExecutor( 47 | max_cores=1, 48 | plot_dependency_graph=True, 49 | block_allocation=False, 50 | ) as exe: 51 | cloudpickle_register(ind=1) 52 | future_1 = exe.submit(add_function, 1, parameter_2=2) 53 | future_2 = exe.submit(add_function, 1, parameter_2=future_1) 54 | self.assertTrue(future_1.done()) 55 | self.assertTrue(future_2.done()) 56 | self.assertEqual(len(exe._task_scheduler._future_hash_dict), 2) 57 | self.assertEqual(len(exe._task_scheduler._task_hash_dict), 2) 58 | nodes, edges = generate_nodes_and_edges( 59 | task_hash_dict=exe._task_scheduler._task_hash_dict, 60 | future_hash_inverse_dict={ 61 | v: k for k, v in exe._task_scheduler._future_hash_dict.items() 62 | }, 63 | ) 64 | self.assertEqual(len(nodes), 5) 65 | self.assertEqual(len(edges), 4) 66 | 67 | def test_many_to_one_plot(self): 68 | length = 5 69 | parameter = 1 70 | with FluxJobExecutor( 71 | max_cores=2, 72 | plot_dependency_graph=True, 73 | block_allocation=True, 74 | ) as exe: 75 | cloudpickle_register(ind=1) 76 | future_lst = exe.submit( 77 | generate_tasks, 78 | length=length, 79 | resource_dict={"cores": 1}, 80 | ) 81 | lst = [] 82 | for i in range(length): 83 | lst.append( 84 | exe.submit( 85 | calc_from_lst, 86 | lst=future_lst, 87 | ind=i, 88 | parameter=parameter, 89 | resource_dict={"cores": 1}, 90 | ) 91 | ) 92 | future_sum = exe.submit( 93 | merge, 94 | lst=lst, 95 | resource_dict={"cores": 1}, 96 | ) 97 | self.assertTrue(future_lst.done()) 98 | for l in lst: 99 | self.assertTrue(l.done()) 100 | self.assertTrue(future_sum.done()) 101 | self.assertEqual(len(exe._task_scheduler._future_hash_dict), 7) 102 | self.assertEqual(len(exe._task_scheduler._task_hash_dict), 7) 103 | nodes, edges = generate_nodes_and_edges( 104 | task_hash_dict=exe._task_scheduler._task_hash_dict, 105 | future_hash_inverse_dict={ 106 | v: k for k, v in exe._task_scheduler._future_hash_dict.items() 107 | }, 108 | ) 109 | self.assertEqual(len(nodes), 19) 110 | self.assertEqual(len(edges), 22) 111 | 112 | 113 | @unittest.skipIf( 114 | skip_graphviz_flux_test, 115 | "Either graphviz or flux are not installed, so the plot_dependency_graph tests are skipped.", 116 | ) 117 | class TestFluxSubmissionExecutorWithDependencies(unittest.TestCase): 118 | def test_executor_dependency_plot(self): 119 | with FluxClusterExecutor( 120 | plot_dependency_graph=True, 121 | ) as exe: 122 | cloudpickle_register(ind=1) 123 | future_1 = exe.submit(add_function, 1, parameter_2=2) 124 | future_2 = exe.submit(add_function, 1, parameter_2=future_1) 125 | self.assertTrue(future_1.done()) 126 | self.assertTrue(future_2.done()) 127 | self.assertEqual(len(exe._task_scheduler._future_hash_dict), 2) 128 | self.assertEqual(len(exe._task_scheduler._task_hash_dict), 2) 129 | nodes, edges = generate_nodes_and_edges( 130 | task_hash_dict=exe._task_scheduler._task_hash_dict, 131 | future_hash_inverse_dict={ 132 | v: k for k, v in exe._task_scheduler._future_hash_dict.items() 133 | }, 134 | ) 135 | self.assertEqual(len(nodes), 5) 136 | self.assertEqual(len(edges), 4) 137 | 138 | def test_many_to_one_plot(self): 139 | length = 5 140 | parameter = 1 141 | with FluxClusterExecutor( 142 | plot_dependency_graph=True, 143 | ) as exe: 144 | cloudpickle_register(ind=1) 145 | future_lst = exe.submit( 146 | generate_tasks, 147 | length=length, 148 | resource_dict={"cores": 1}, 149 | ) 150 | lst = [] 151 | for i in range(length): 152 | lst.append( 153 | exe.submit( 154 | calc_from_lst, 155 | lst=future_lst, 156 | ind=i, 157 | parameter=parameter, 158 | resource_dict={"cores": 1}, 159 | ) 160 | ) 161 | future_sum = exe.submit( 162 | merge, 163 | lst=lst, 164 | resource_dict={"cores": 1}, 165 | ) 166 | self.assertTrue(future_lst.done()) 167 | for l in lst: 168 | self.assertTrue(l.done()) 169 | self.assertTrue(future_sum.done()) 170 | self.assertEqual(len(exe._task_scheduler._future_hash_dict), 7) 171 | self.assertEqual(len(exe._task_scheduler._task_hash_dict), 7) 172 | nodes, edges = generate_nodes_and_edges( 173 | task_hash_dict=exe._task_scheduler._task_hash_dict, 174 | future_hash_inverse_dict={ 175 | v: k for k, v in exe._task_scheduler._future_hash_dict.items() 176 | }, 177 | ) 178 | self.assertEqual(len(nodes), 19) 179 | self.assertEqual(len(edges), 22) 180 | -------------------------------------------------------------------------------- /tests/test_fluxpythonspawner.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import os 3 | from queue import Queue 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from executorlib.task_scheduler.interactive.shared import execute_tasks 9 | from executorlib.task_scheduler.interactive.blockallocation import BlockAllocationTaskScheduler 10 | from executorlib.standalone.serialize import cloudpickle_register 11 | 12 | 13 | try: 14 | import flux.job 15 | from executorlib.task_scheduler.interactive.fluxspawner import FluxPythonSpawner 16 | 17 | skip_flux_test = "FLUX_URI" not in os.environ 18 | pmi = os.environ.get("EXECUTORLIB_PMIX", None) 19 | except ImportError: 20 | skip_flux_test = True 21 | 22 | 23 | def calc(i): 24 | return i 25 | 26 | 27 | def mpi_funct(i): 28 | from mpi4py import MPI 29 | 30 | size = MPI.COMM_WORLD.Get_size() 31 | rank = MPI.COMM_WORLD.Get_rank() 32 | return i, size, rank 33 | 34 | 35 | def get_global(memory=None): 36 | return memory 37 | 38 | 39 | def set_global(): 40 | return {"memory": np.array([5])} 41 | 42 | 43 | @unittest.skipIf( 44 | skip_flux_test, "Flux is not installed, so the flux tests are skipped." 45 | ) 46 | class TestFlux(unittest.TestCase): 47 | def setUp(self): 48 | self.flux_executor = flux.job.FluxExecutor() 49 | 50 | def test_flux_executor_serial(self): 51 | with BlockAllocationTaskScheduler( 52 | max_workers=2, 53 | executor_kwargs={"flux_executor": self.flux_executor, "priority": 20}, 54 | spawner=FluxPythonSpawner, 55 | ) as exe: 56 | fs_1 = exe.submit(calc, 1) 57 | fs_2 = exe.submit(calc, 2) 58 | self.assertEqual(fs_1.result(), 1) 59 | self.assertEqual(fs_2.result(), 2) 60 | self.assertTrue(fs_1.done()) 61 | self.assertTrue(fs_2.done()) 62 | 63 | def test_flux_executor_threads(self): 64 | with BlockAllocationTaskScheduler( 65 | max_workers=1, 66 | executor_kwargs={ 67 | "flux_executor": self.flux_executor, 68 | "threads_per_core": 2, 69 | }, 70 | spawner=FluxPythonSpawner, 71 | ) as exe: 72 | fs_1 = exe.submit(calc, 1) 73 | fs_2 = exe.submit(calc, 2) 74 | self.assertEqual(fs_1.result(), 1) 75 | self.assertEqual(fs_2.result(), 2) 76 | self.assertTrue(fs_1.done()) 77 | self.assertTrue(fs_2.done()) 78 | 79 | def test_flux_executor_parallel(self): 80 | with BlockAllocationTaskScheduler( 81 | max_workers=1, 82 | executor_kwargs={ 83 | "flux_executor": self.flux_executor, 84 | "cores": 2, 85 | "flux_executor_pmi_mode": pmi, 86 | }, 87 | spawner=FluxPythonSpawner, 88 | ) as exe: 89 | fs_1 = exe.submit(mpi_funct, 1) 90 | self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)]) 91 | self.assertTrue(fs_1.done()) 92 | 93 | def test_single_task(self): 94 | with BlockAllocationTaskScheduler( 95 | max_workers=1, 96 | executor_kwargs={ 97 | "flux_executor": self.flux_executor, 98 | "cores": 2, 99 | "flux_executor_pmi_mode": pmi, 100 | }, 101 | spawner=FluxPythonSpawner, 102 | ) as p: 103 | output = p.map(mpi_funct, [1, 2, 3]) 104 | self.assertEqual( 105 | list(output), 106 | [[(1, 2, 0), (1, 2, 1)], [(2, 2, 0), (2, 2, 1)], [(3, 2, 0), (3, 2, 1)]], 107 | ) 108 | 109 | def test_execute_task(self): 110 | f = Future() 111 | q = Queue() 112 | q.put({"fn": calc, "args": (), "kwargs": {"i": 2}, "future": f}) 113 | q.put({"shutdown": True, "wait": True}) 114 | cloudpickle_register(ind=1) 115 | execute_tasks( 116 | future_queue=q, 117 | cores=1, 118 | flux_executor=self.flux_executor, 119 | spawner=FluxPythonSpawner, 120 | ) 121 | self.assertEqual(f.result(), 2) 122 | q.join() 123 | 124 | def test_execute_task_threads(self): 125 | f = Future() 126 | q = Queue() 127 | q.put({"fn": calc, "args": (), "kwargs": {"i": 2}, "future": f}) 128 | q.put({"shutdown": True, "wait": True}) 129 | cloudpickle_register(ind=1) 130 | execute_tasks( 131 | future_queue=q, 132 | cores=1, 133 | threads_per_core=1, 134 | flux_executor=self.flux_executor, 135 | spawner=FluxPythonSpawner, 136 | ) 137 | self.assertEqual(f.result(), 2) 138 | q.join() 139 | 140 | def test_internal_memory(self): 141 | with BlockAllocationTaskScheduler( 142 | max_workers=1, 143 | executor_kwargs={ 144 | "flux_executor": self.flux_executor, 145 | "cores": 1, 146 | "init_function": set_global, 147 | }, 148 | spawner=FluxPythonSpawner, 149 | ) as p: 150 | f = p.submit(get_global) 151 | self.assertFalse(f.done()) 152 | self.assertEqual(f.result(), np.array([5])) 153 | self.assertTrue(f.done()) 154 | 155 | def test_interface_exception(self): 156 | with self.assertRaises(ValueError): 157 | flux_interface = FluxPythonSpawner( 158 | flux_executor=self.flux_executor, openmpi_oversubscribe=True 159 | ) 160 | flux_interface.bootup(command_lst=[]) 161 | -------------------------------------------------------------------------------- /tests/test_interactive_dependencies.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import importlib.util 3 | from time import sleep 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from executorlib.task_scheduler.interactive.blockallocation import BlockAllocationTaskScheduler 9 | from executorlib.standalone.interactive.spawner import MpiExecSpawner 10 | 11 | 12 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None 13 | 14 | 15 | def calc(i): 16 | return np.array(i**2) 17 | 18 | 19 | class TestFuture(unittest.TestCase): 20 | def test_pool_serial(self): 21 | with BlockAllocationTaskScheduler( 22 | max_workers=1, 23 | executor_kwargs={"cores": 1}, 24 | spawner=MpiExecSpawner, 25 | ) as p: 26 | output = p.submit(calc, i=2) 27 | self.assertTrue(isinstance(output, Future)) 28 | self.assertFalse(output.done()) 29 | sleep(1) 30 | self.assertTrue(output.done()) 31 | self.assertEqual(output.result(), np.array(4)) 32 | 33 | @unittest.skipIf( 34 | skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped." 35 | ) 36 | def test_pool_serial_multi_core(self): 37 | with BlockAllocationTaskScheduler( 38 | max_workers=1, 39 | executor_kwargs={"cores": 2}, 40 | spawner=MpiExecSpawner, 41 | ) as p: 42 | output = p.submit(calc, i=2) 43 | self.assertTrue(isinstance(output, Future)) 44 | self.assertFalse(output.done()) 45 | sleep(1) 46 | self.assertTrue(output.done()) 47 | self.assertEqual(output.result(), [np.array(4), np.array(4)]) 48 | 49 | def test_independence_from_executor(self): 50 | """ 51 | Ensure that futures are able to live on after the executor gets garbage 52 | collected. 53 | """ 54 | 55 | with self.subTest("From the main process"): 56 | mutable = [] 57 | 58 | def slow_callable(): 59 | from time import sleep 60 | 61 | sleep(1) 62 | return True 63 | 64 | def callback(future): 65 | mutable.append("Called back") 66 | 67 | def submit(): 68 | # Executor only exists in this scope and can get garbage collected after 69 | # this function is exits 70 | future = BlockAllocationTaskScheduler( 71 | max_workers=1, 72 | executor_kwargs={}, 73 | spawner=MpiExecSpawner, 74 | ).submit(slow_callable) 75 | future.add_done_callback(callback) 76 | return future 77 | 78 | self.assertListEqual( 79 | [], 80 | mutable, 81 | msg="Sanity check that test is starting in the expected condition", 82 | ) 83 | future = submit() 84 | 85 | self.assertFalse( 86 | future.done(), 87 | msg="The submit function is slow, it should be running still", 88 | ) 89 | self.assertListEqual( 90 | [], 91 | mutable, 92 | msg="While running, the mutable should not have been impacted by the " 93 | "callback", 94 | ) 95 | future.result() # Wait for the calculation to finish 96 | self.assertListEqual( 97 | ["Called back"], 98 | mutable, 99 | msg="After completion, the callback should modify the mutable data", 100 | ) 101 | 102 | with self.subTest("From inside a class"): 103 | 104 | class Foo: 105 | def __init__(self): 106 | self.running = False 107 | 108 | def run(self): 109 | self.running = True 110 | 111 | future = BlockAllocationTaskScheduler( 112 | max_workers=1, 113 | executor_kwargs={}, 114 | spawner=MpiExecSpawner, 115 | ).submit(self.return_42) 116 | future.add_done_callback(self.finished) 117 | 118 | return future 119 | 120 | def return_42(self): 121 | from time import sleep 122 | 123 | sleep(1) 124 | return 42 125 | 126 | def finished(self, future): 127 | self.running = False 128 | 129 | foo = Foo() 130 | self.assertFalse( 131 | foo.running, 132 | msg="Sanity check that the test starts in the expected condition", 133 | ) 134 | fs = foo.run() 135 | self.assertTrue( 136 | foo.running, 137 | msg="We should be able to exit the run method before the task completes", 138 | ) 139 | fs.result() # Wait for completion 140 | self.assertFalse( 141 | foo.running, 142 | msg="After task completion, we expect the callback to modify the class", 143 | ) 144 | -------------------------------------------------------------------------------- /tests/test_interactive_slurmspawner.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from executorlib.task_scheduler.interactive.slurmspawner import generate_slurm_command 3 | 4 | try: 5 | from executorlib.task_scheduler.file.queue_spawner import _pysqa_execute_command 6 | 7 | skip_pysqa_test = False 8 | except ImportError: 9 | skip_pysqa_test = True 10 | 11 | 12 | @unittest.skipIf( 13 | skip_pysqa_test, "pysqa is not installed, so the pysqa tests are skipped." 14 | ) 15 | class TestPysqaExecuteCommand(unittest.TestCase): 16 | def test_pysqa_execute_command_list(self): 17 | out = _pysqa_execute_command( 18 | commands=["echo", "test"], 19 | working_directory=None, 20 | split_output=True, 21 | shell=True, 22 | error_filename="pysqa.err", 23 | ) 24 | self.assertEqual(len(out), 2) 25 | self.assertEqual("test", out[0]) 26 | 27 | def test_pysqa_execute_command_string(self): 28 | out = _pysqa_execute_command( 29 | commands="echo test", 30 | working_directory=None, 31 | split_output=False, 32 | shell=False, 33 | error_filename="pysqa.err", 34 | ) 35 | self.assertEqual(len(out), 5) 36 | self.assertEqual("test\n", out) 37 | 38 | def test_pysqa_execute_command_fail(self): 39 | with self.assertRaises(FileNotFoundError): 40 | _pysqa_execute_command( 41 | commands=["no/executable/available"], 42 | working_directory=None, 43 | split_output=True, 44 | shell=False, 45 | error_filename="pysqa.err", 46 | ) 47 | 48 | def test_generate_slurm_command(self): 49 | command_lst = generate_slurm_command( 50 | cores=1, 51 | cwd="/tmp/test", 52 | threads_per_core=2, 53 | gpus_per_core=1, 54 | num_nodes=1, 55 | exclusive=True, 56 | openmpi_oversubscribe=True, 57 | slurm_cmd_args=["--help"], 58 | ) 59 | self.assertEqual(len(command_lst), 12) 60 | reply_lst = ['srun', '-n', '1', '-D', '/tmp/test', '-N', '1', '--cpus-per-task=2', '--gpus-per-task=1', '--exact', '--oversubscribe', '--help'] 61 | self.assertEqual(command_lst, reply_lst) 62 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import unittest 4 | 5 | from executorlib import SingleNodeExecutor, get_cache_data 6 | from executorlib.standalone.serialize import cloudpickle_register 7 | 8 | try: 9 | import h5py 10 | 11 | skip_h5py_test = False 12 | except ImportError: 13 | skip_h5py_test = True 14 | 15 | 16 | def get_error(a): 17 | raise ValueError(a) 18 | 19 | 20 | @unittest.skipIf( 21 | skip_h5py_test, "h5py is not installed, so the h5io tests are skipped." 22 | ) 23 | class TestCacheFunctions(unittest.TestCase): 24 | def test_cache_data(self): 25 | cache_directory = "./cache" 26 | with SingleNodeExecutor(cache_directory=cache_directory) as exe: 27 | self.assertTrue(exe) 28 | future_lst = [exe.submit(sum, [i, i]) for i in range(1, 4)] 29 | result_lst = [f.result() for f in future_lst] 30 | 31 | cache_lst = get_cache_data(cache_directory=cache_directory) 32 | self.assertEqual(sum([c["output"] for c in cache_lst]), sum(result_lst)) 33 | self.assertEqual( 34 | sum([sum(c["input_args"][0]) for c in cache_lst]), sum(result_lst) 35 | ) 36 | 37 | def test_cache_error(self): 38 | cache_directory = "./cache_error" 39 | with SingleNodeExecutor(cache_directory=cache_directory) as exe: 40 | self.assertTrue(exe) 41 | cloudpickle_register(ind=1) 42 | f = exe.submit(get_error, a=1) 43 | with self.assertRaises(ValueError): 44 | print(f.result()) 45 | 46 | def tearDown(self): 47 | if os.path.exists("cache"): 48 | shutil.rmtree("cache") 49 | if os.path.exists("cache_error"): 50 | shutil.rmtree("cache_error") 51 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_mpi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib.util 3 | import shutil 4 | import time 5 | import unittest 6 | 7 | from executorlib import SingleNodeExecutor, SlurmJobExecutor 8 | from executorlib.standalone.serialize import cloudpickle_register 9 | 10 | 11 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None 12 | 13 | 14 | def calc(i): 15 | return i 16 | 17 | 18 | def mpi_funct(i): 19 | from mpi4py import MPI 20 | 21 | size = MPI.COMM_WORLD.Get_size() 22 | rank = MPI.COMM_WORLD.Get_rank() 23 | return i, size, rank 24 | 25 | 26 | def mpi_funct_sleep(i): 27 | from mpi4py import MPI 28 | 29 | size = MPI.COMM_WORLD.Get_size() 30 | rank = MPI.COMM_WORLD.Get_rank() 31 | time.sleep(i) 32 | return i, size, rank 33 | 34 | 35 | class TestExecutorBackend(unittest.TestCase): 36 | def test_meta_executor_serial(self): 37 | with SingleNodeExecutor(max_cores=2, block_allocation=True) as exe: 38 | cloudpickle_register(ind=1) 39 | fs_1 = exe.submit(calc, 1) 40 | fs_2 = exe.submit(calc, 2) 41 | self.assertEqual(fs_1.result(), 1) 42 | self.assertEqual(fs_2.result(), 2) 43 | self.assertTrue(fs_1.done()) 44 | self.assertTrue(fs_2.done()) 45 | 46 | def test_meta_executor_single(self): 47 | with SingleNodeExecutor(max_cores=1, block_allocation=True) as exe: 48 | cloudpickle_register(ind=1) 49 | fs_1 = exe.submit(calc, 1) 50 | fs_2 = exe.submit(calc, 2) 51 | self.assertEqual(fs_1.result(), 1) 52 | self.assertEqual(fs_2.result(), 2) 53 | self.assertTrue(fs_1.done()) 54 | self.assertTrue(fs_2.done()) 55 | 56 | def test_oversubscribe(self): 57 | with self.assertRaises(ValueError): 58 | with SingleNodeExecutor(max_cores=1, block_allocation=True) as exe: 59 | cloudpickle_register(ind=1) 60 | fs_1 = exe.submit(calc, 1, resource_dict={"cores": 2}) 61 | 62 | @unittest.skipIf( 63 | skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped." 64 | ) 65 | def test_meta_executor_parallel(self): 66 | with SingleNodeExecutor( 67 | max_workers=2, 68 | resource_dict={"cores": 2}, 69 | block_allocation=True, 70 | ) as exe: 71 | cloudpickle_register(ind=1) 72 | fs_1 = exe.submit(mpi_funct, 1) 73 | self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)]) 74 | self.assertTrue(fs_1.done()) 75 | 76 | def test_errors(self): 77 | with self.assertRaises(TypeError): 78 | SingleNodeExecutor( 79 | max_cores=1, 80 | resource_dict={"cores": 1, "gpus_per_core": 1}, 81 | ) 82 | 83 | 84 | class TestExecutorBackendCache(unittest.TestCase): 85 | def tearDown(self): 86 | shutil.rmtree("./cache") 87 | 88 | @unittest.skipIf( 89 | skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped." 90 | ) 91 | def test_meta_executor_parallel_cache(self): 92 | with SingleNodeExecutor( 93 | max_workers=2, 94 | resource_dict={"cores": 2}, 95 | block_allocation=True, 96 | cache_directory="./cache", 97 | ) as exe: 98 | cloudpickle_register(ind=1) 99 | time_1 = time.time() 100 | fs_1 = exe.submit(mpi_funct_sleep, 1) 101 | self.assertEqual(fs_1.result(), [(1, 2, 0), (1, 2, 1)]) 102 | self.assertTrue(fs_1.done()) 103 | time_2 = time.time() 104 | self.assertTrue(time_2 - time_1 > 1) 105 | time_3 = time.time() 106 | fs_2 = exe.submit(mpi_funct_sleep, 1) 107 | self.assertEqual(fs_2.result(), [(1, 2, 0), (1, 2, 1)]) 108 | self.assertTrue(fs_2.done()) 109 | time_4 = time.time() 110 | self.assertTrue(time_3 - time_4 < 1) 111 | 112 | 113 | class TestWorkingDirectory(unittest.TestCase): 114 | def test_output_files_cwd(self): 115 | dirname = os.path.abspath(os.path.dirname(__file__)) 116 | os.makedirs(dirname, exist_ok=True) 117 | with SingleNodeExecutor( 118 | max_cores=1, 119 | resource_dict={"cores": 1, "cwd": dirname}, 120 | block_allocation=True, 121 | ) as p: 122 | output = p.map(calc, [1, 2, 3]) 123 | self.assertEqual( 124 | list(output), 125 | [1, 2, 3], 126 | ) 127 | 128 | 129 | class TestSLURMExecutor(unittest.TestCase): 130 | def test_validate_max_workers(self): 131 | os.environ["SLURM_NTASKS"] = "6" 132 | os.environ["SLURM_CPUS_PER_TASK"] = "4" 133 | with self.assertRaises(ValueError): 134 | SlurmJobExecutor( 135 | max_workers=10, 136 | resource_dict={"cores": 10, "threads_per_core": 10}, 137 | block_allocation=True, 138 | ) 139 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_noblock.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from executorlib import SingleNodeExecutor 4 | from executorlib.standalone.serialize import cloudpickle_register 5 | 6 | 7 | def calc(i): 8 | return i 9 | 10 | 11 | def resource_dict(resource_dict): 12 | return resource_dict 13 | 14 | 15 | class TestExecutorBackend(unittest.TestCase): 16 | def test_meta_executor_serial_with_dependencies(self): 17 | with SingleNodeExecutor( 18 | max_cores=2, 19 | block_allocation=False, 20 | disable_dependencies=True, 21 | ) as exe: 22 | cloudpickle_register(ind=1) 23 | fs_1 = exe.submit(calc, 1) 24 | fs_2 = exe.submit(calc, 2) 25 | self.assertEqual(fs_1.result(), 1) 26 | self.assertEqual(fs_2.result(), 2) 27 | self.assertTrue(fs_1.done()) 28 | self.assertTrue(fs_2.done()) 29 | 30 | def test_meta_executor_serial_without_dependencies(self): 31 | with SingleNodeExecutor( 32 | max_cores=2, 33 | block_allocation=False, 34 | disable_dependencies=False, 35 | ) as exe: 36 | cloudpickle_register(ind=1) 37 | fs_1 = exe.submit(calc, 1) 38 | fs_2 = exe.submit(calc, 2) 39 | self.assertEqual(fs_1.result(), 1) 40 | self.assertEqual(fs_2.result(), 2) 41 | self.assertTrue(fs_1.done()) 42 | self.assertTrue(fs_2.done()) 43 | 44 | def test_meta_executor_single(self): 45 | with SingleNodeExecutor( 46 | max_cores=1, 47 | block_allocation=False, 48 | ) as exe: 49 | cloudpickle_register(ind=1) 50 | fs_1 = exe.submit(calc, 1) 51 | fs_2 = exe.submit(calc, 2) 52 | self.assertEqual(fs_1.result(), 1) 53 | self.assertEqual(fs_2.result(), 2) 54 | self.assertTrue(fs_1.done()) 55 | self.assertTrue(fs_2.done()) 56 | 57 | def test_errors(self): 58 | with self.assertRaises(TypeError): 59 | SingleNodeExecutor( 60 | max_cores=1, 61 | resource_dict={ 62 | "cores": 1, 63 | "gpus_per_core": 1, 64 | }, 65 | ) 66 | with self.assertRaises(ValueError): 67 | with SingleNodeExecutor( 68 | max_cores=1, 69 | block_allocation=False, 70 | ) as exe: 71 | exe.submit(resource_dict, resource_dict={}) 72 | with self.assertRaises(ValueError): 73 | with SingleNodeExecutor( 74 | max_cores=1, 75 | block_allocation=True, 76 | ) as exe: 77 | exe.submit(resource_dict, resource_dict={}) 78 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_resize.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from executorlib import SingleNodeExecutor 3 | from executorlib.standalone.serialize import cloudpickle_register 4 | 5 | 6 | def sleep_funct(sec): 7 | from time import sleep 8 | sleep(sec) 9 | return sec 10 | 11 | 12 | class TestResizing(unittest.TestCase): 13 | def test_without_dependencies_decrease(self): 14 | cloudpickle_register(ind=1) 15 | with SingleNodeExecutor(max_workers=2, block_allocation=True, disable_dependencies=True) as exe: 16 | future_lst = [exe.submit(sleep_funct, 1) for _ in range(4)] 17 | self.assertEqual([f.done() for f in future_lst], [False, False, False, False]) 18 | self.assertEqual(len(exe), 4) 19 | sleep_funct(sec=0.5) 20 | exe.max_workers = 1 21 | self.assertTrue(len(exe) >= 1) 22 | self.assertEqual(len(exe._task_scheduler._process), 1) 23 | self.assertTrue(1 <= sum([f.done() for f in future_lst]) < 3) 24 | self.assertEqual([f.result() for f in future_lst], [1, 1, 1, 1]) 25 | self.assertEqual([f.done() for f in future_lst], [True, True, True, True]) 26 | 27 | def test_without_dependencies_increase(self): 28 | cloudpickle_register(ind=1) 29 | with SingleNodeExecutor(max_workers=1, block_allocation=True, disable_dependencies=True) as exe: 30 | future_lst = [exe.submit(sleep_funct, 0.1) for _ in range(4)] 31 | self.assertEqual([f.done() for f in future_lst], [False, False, False, False]) 32 | self.assertEqual(len(exe), 4) 33 | self.assertEqual(exe.max_workers, 1) 34 | future_lst[0].result() 35 | exe.max_workers = 2 36 | self.assertEqual(exe.max_workers, 2) 37 | self.assertTrue(len(exe) >= 1) 38 | self.assertEqual(len(exe._task_scheduler._process), 2) 39 | self.assertEqual([f.done() for f in future_lst], [True, False, False, False]) 40 | self.assertEqual([f.result() for f in future_lst], [0.1, 0.1, 0.1, 0.1]) 41 | self.assertEqual([f.done() for f in future_lst], [True, True, True, True]) 42 | 43 | def test_with_dependencies_decrease(self): 44 | cloudpickle_register(ind=1) 45 | with SingleNodeExecutor(max_workers=2, block_allocation=True, disable_dependencies=False) as exe: 46 | future_lst = [exe.submit(sleep_funct, 1) for _ in range(4)] 47 | self.assertEqual([f.done() for f in future_lst], [False, False, False, False]) 48 | self.assertEqual(len(exe), 4) 49 | sleep_funct(sec=0.5) 50 | exe.max_workers = 1 51 | self.assertTrue(1 <= sum([f.done() for f in future_lst]) < 3) 52 | self.assertEqual([f.result() for f in future_lst], [1, 1, 1, 1]) 53 | self.assertEqual([f.done() for f in future_lst], [True, True, True, True]) 54 | 55 | def test_with_dependencies_increase(self): 56 | cloudpickle_register(ind=1) 57 | with SingleNodeExecutor(max_workers=1, block_allocation=True, disable_dependencies=False) as exe: 58 | future_lst = [exe.submit(sleep_funct, 0.1) for _ in range(4)] 59 | self.assertEqual([f.done() for f in future_lst], [False, False, False, False]) 60 | self.assertEqual(len(exe), 4) 61 | self.assertEqual(exe.max_workers, 1) 62 | future_lst[0].result() 63 | exe.max_workers = 2 64 | self.assertEqual(exe.max_workers, 2) 65 | self.assertEqual([f.done() for f in future_lst], [True, False, False, False]) 66 | self.assertEqual([f.result() for f in future_lst], [0.1, 0.1, 0.1, 0.1]) 67 | self.assertEqual([f.done() for f in future_lst], [True, True, True, True]) 68 | 69 | def test_no_block_allocation(self): 70 | with self.assertRaises(NotImplementedError): 71 | with SingleNodeExecutor(block_allocation=False, disable_dependencies=False) as exe: 72 | exe.max_workers = 2 73 | with self.assertRaises(NotImplementedError): 74 | with SingleNodeExecutor(block_allocation=False, disable_dependencies=True) as exe: 75 | exe.max_workers = 2 76 | 77 | def test_max_workers_stopped_executor(self): 78 | exe = SingleNodeExecutor(block_allocation=True) 79 | exe.shutdown(wait=True) 80 | self.assertIsNone(exe.max_workers) 81 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_shell_executor.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import subprocess 3 | import queue 4 | import unittest 5 | 6 | from executorlib import SingleNodeExecutor 7 | from executorlib.standalone.serialize import cloudpickle_register 8 | from executorlib.task_scheduler.interactive.shared import execute_tasks 9 | from executorlib.standalone.interactive.spawner import MpiExecSpawner 10 | 11 | 12 | def submit_shell_command( 13 | command: list, universal_newlines: bool = True, shell: bool = False 14 | ): 15 | return subprocess.check_output( 16 | command, universal_newlines=universal_newlines, shell=shell 17 | ) 18 | 19 | 20 | class SubprocessExecutorTest(unittest.TestCase): 21 | def test_execute_single_task(self): 22 | test_queue = queue.Queue() 23 | f = Future() 24 | test_queue.put( 25 | { 26 | "fn": submit_shell_command, 27 | "args": [["echo", "test"]], 28 | "kwargs": {"universal_newlines": True}, 29 | "future": f, 30 | } 31 | ) 32 | test_queue.put({"shutdown": True, "wait": True}) 33 | cloudpickle_register(ind=1) 34 | self.assertFalse(f.done()) 35 | execute_tasks( 36 | future_queue=test_queue, 37 | cores=1, 38 | openmpi_oversubscribe=False, 39 | spawner=MpiExecSpawner, 40 | ) 41 | self.assertTrue(f.done()) 42 | self.assertEqual("test\n", f.result()) 43 | test_queue.join() 44 | 45 | def test_wrong_error(self): 46 | test_queue = queue.Queue() 47 | f = Future() 48 | test_queue.put( 49 | { 50 | "fn": submit_shell_command, 51 | "args": [["echo", "test"]], 52 | "kwargs": {"wrong_key": True}, 53 | "future": f, 54 | } 55 | ) 56 | test_queue.put( 57 | {"shutdown": True, "wait": True} 58 | ) 59 | cloudpickle_register(ind=1) 60 | with self.assertRaises(TypeError): 61 | execute_tasks( 62 | future_queue=test_queue, 63 | cores=1, 64 | openmpi_oversubscribe=False, 65 | spawner=MpiExecSpawner, 66 | ) 67 | f.result() 68 | 69 | def test_broken_executable(self): 70 | test_queue = queue.Queue() 71 | f = Future() 72 | test_queue.put( 73 | { 74 | "fn": submit_shell_command, 75 | "args": [["/executable/does/not/exist"]], 76 | "kwargs": {"universal_newlines": True}, 77 | "future": f, 78 | } 79 | ) 80 | test_queue.put( 81 | { 82 | "shutdown": True, 83 | "wait": True, 84 | } 85 | ) 86 | cloudpickle_register(ind=1) 87 | with self.assertRaises(FileNotFoundError): 88 | execute_tasks( 89 | future_queue=test_queue, 90 | cores=1, 91 | openmpi_oversubscribe=False, 92 | spawner=MpiExecSpawner, 93 | ) 94 | f.result() 95 | 96 | def test_shell_static_executor_args(self): 97 | with SingleNodeExecutor(max_workers=1) as exe: 98 | cloudpickle_register(ind=1) 99 | future = exe.submit( 100 | submit_shell_command, 101 | ["echo", "test"], 102 | universal_newlines=True, 103 | shell=False, 104 | ) 105 | self.assertFalse(future.done()) 106 | self.assertEqual("test\n", future.result()) 107 | self.assertTrue(future.done()) 108 | 109 | def test_shell_static_executor_binary(self): 110 | with SingleNodeExecutor(max_workers=1) as exe: 111 | cloudpickle_register(ind=1) 112 | future = exe.submit( 113 | submit_shell_command, 114 | ["echo", "test"], 115 | universal_newlines=False, 116 | shell=False, 117 | ) 118 | self.assertFalse(future.done()) 119 | self.assertEqual(b"test\n", future.result()) 120 | self.assertTrue(future.done()) 121 | 122 | def test_shell_static_executor_shell(self): 123 | with SingleNodeExecutor(max_workers=1) as exe: 124 | cloudpickle_register(ind=1) 125 | future = exe.submit( 126 | submit_shell_command, "echo test", universal_newlines=True, shell=True 127 | ) 128 | self.assertFalse(future.done()) 129 | self.assertEqual("test\n", future.result()) 130 | self.assertTrue(future.done()) 131 | 132 | def test_shell_executor(self): 133 | with SingleNodeExecutor(max_workers=2) as exe: 134 | cloudpickle_register(ind=1) 135 | f_1 = exe.submit( 136 | submit_shell_command, ["echo", "test_1"], universal_newlines=True 137 | ) 138 | f_2 = exe.submit( 139 | submit_shell_command, ["echo", "test_2"], universal_newlines=True 140 | ) 141 | f_3 = exe.submit( 142 | submit_shell_command, ["echo", "test_3"], universal_newlines=True 143 | ) 144 | f_4 = exe.submit( 145 | submit_shell_command, ["echo", "test_4"], universal_newlines=True 146 | ) 147 | self.assertFalse(f_1.done()) 148 | self.assertFalse(f_2.done()) 149 | self.assertFalse(f_3.done()) 150 | self.assertFalse(f_4.done()) 151 | self.assertEqual("test_1\n", f_1.result()) 152 | self.assertEqual("test_2\n", f_2.result()) 153 | self.assertTrue(f_1.done()) 154 | self.assertTrue(f_2.done()) 155 | self.assertEqual("test_3\n", f_3.result()) 156 | self.assertEqual("test_4\n", f_4.result()) 157 | self.assertTrue(f_1.done()) 158 | self.assertTrue(f_2.done()) 159 | self.assertTrue(f_3.done()) 160 | self.assertTrue(f_4.done()) 161 | -------------------------------------------------------------------------------- /tests/test_singlenodeexecutor_shell_interactive.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import os 3 | import subprocess 4 | import queue 5 | import unittest 6 | 7 | from executorlib import SingleNodeExecutor 8 | from executorlib.standalone.serialize import cloudpickle_register 9 | from executorlib.task_scheduler.interactive.shared import execute_tasks 10 | from executorlib.standalone.interactive.spawner import MpiExecSpawner 11 | 12 | 13 | executable_path = os.path.join(os.path.dirname(__file__), "executables", "count.py") 14 | 15 | 16 | def init_process(): 17 | return { 18 | "process": subprocess.Popen( 19 | ["python", executable_path], 20 | stdin=subprocess.PIPE, 21 | stdout=subprocess.PIPE, 22 | universal_newlines=True, 23 | shell=False, 24 | ) 25 | } 26 | 27 | 28 | def interact(shell_input, process, lines_to_read=None, stop_read_pattern=None): 29 | process.stdin.write(shell_input) 30 | process.stdin.flush() 31 | lines_count = 0 32 | output = "" 33 | while True: 34 | output_current = process.stdout.readline() 35 | output += output_current 36 | lines_count += 1 37 | if stop_read_pattern is not None and stop_read_pattern in output_current: 38 | break 39 | elif lines_to_read is not None and lines_to_read == lines_count: 40 | break 41 | return output 42 | 43 | 44 | def shutdown(process): 45 | process.stdin.write("shutdown\n") 46 | process.stdin.flush() 47 | 48 | 49 | class ShellInteractiveExecutorTest(unittest.TestCase): 50 | def test_execute_single_task(self): 51 | test_queue = queue.Queue() 52 | future_lines = Future() 53 | future_pattern = Future() 54 | future_shutdown = Future() 55 | test_queue.put( 56 | { 57 | "fn": interact, 58 | "future": future_lines, 59 | "args": (), 60 | "kwargs": { 61 | "shell_input": "4\n", 62 | "lines_to_read": 5, 63 | "stop_read_pattern": None, 64 | }, 65 | } 66 | ) 67 | test_queue.put( 68 | { 69 | "fn": interact, 70 | "future": future_pattern, 71 | "args": (), 72 | "kwargs": { 73 | "shell_input": "4\n", 74 | "lines_to_read": None, 75 | "stop_read_pattern": "done", 76 | }, 77 | } 78 | ) 79 | test_queue.put( 80 | { 81 | "fn": shutdown, 82 | "future": future_shutdown, 83 | "args": (), 84 | "kwargs": {}, 85 | } 86 | ) 87 | test_queue.put({"shutdown": True, "wait": True}) 88 | cloudpickle_register(ind=1) 89 | self.assertFalse(future_lines.done()) 90 | self.assertFalse(future_pattern.done()) 91 | execute_tasks( 92 | future_queue=test_queue, 93 | cores=1, 94 | openmpi_oversubscribe=False, 95 | spawner=MpiExecSpawner, 96 | init_function=init_process, 97 | ) 98 | self.assertTrue(future_lines.done()) 99 | self.assertTrue(future_pattern.done()) 100 | self.assertTrue(future_shutdown.done()) 101 | self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result()) 102 | self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result()) 103 | test_queue.join() 104 | 105 | def test_shell_interactive_executor(self): 106 | cloudpickle_register(ind=1) 107 | with SingleNodeExecutor( 108 | max_workers=1, 109 | init_function=init_process, 110 | block_allocation=True, 111 | ) as exe: 112 | future_lines = exe.submit( 113 | interact, shell_input="4\n", lines_to_read=5, stop_read_pattern=None 114 | ) 115 | future_pattern = exe.submit( 116 | interact, 117 | shell_input="4\n", 118 | lines_to_read=None, 119 | stop_read_pattern="done", 120 | ) 121 | self.assertFalse(future_lines.done()) 122 | self.assertFalse(future_pattern.done()) 123 | self.assertEqual("0\n1\n2\n3\ndone\n", future_lines.result()) 124 | self.assertEqual("0\n1\n2\n3\ndone\n", future_pattern.result()) 125 | self.assertTrue(future_lines.done()) 126 | self.assertTrue(future_pattern.done()) 127 | future_shutdown = exe.submit(shutdown) 128 | self.assertIsNone(future_shutdown.result()) 129 | self.assertTrue(future_shutdown.done()) 130 | -------------------------------------------------------------------------------- /tests/test_standalone_hdf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import unittest 4 | 5 | 6 | try: 7 | from executorlib.task_scheduler.file.hdf import ( 8 | dump, 9 | load, 10 | get_output, 11 | get_runtime, 12 | get_queue_id, 13 | ) 14 | 15 | skip_h5py_test = False 16 | except ImportError: 17 | skip_h5py_test = True 18 | 19 | 20 | def my_funct(a, b): 21 | return a + b 22 | 23 | 24 | @unittest.skipIf( 25 | skip_h5py_test, "h5py is not installed, so the h5io tests are skipped." 26 | ) 27 | class TestSharedFunctions(unittest.TestCase): 28 | def test_hdf_mixed(self): 29 | cache_directory = os.path.abspath("cache") 30 | os.makedirs(cache_directory, exist_ok=True) 31 | file_name = os.path.join(cache_directory, "test_mixed.h5") 32 | a = 1 33 | b = 2 34 | dump( 35 | file_name=file_name, 36 | data_dict={"fn": my_funct, "args": [a], "kwargs": {"b": b}}, 37 | ) 38 | data_dict = load(file_name=file_name) 39 | self.assertTrue("fn" in data_dict.keys()) 40 | self.assertEqual(data_dict["args"], [a]) 41 | self.assertEqual(data_dict["kwargs"], {"b": b}) 42 | flag, no_error, output = get_output(file_name=file_name) 43 | self.assertTrue(get_runtime(file_name=file_name) == 0.0) 44 | self.assertFalse(no_error) 45 | self.assertFalse(flag) 46 | self.assertIsNone(output) 47 | 48 | def test_hdf_args(self): 49 | cache_directory = os.path.abspath("cache") 50 | os.makedirs(cache_directory, exist_ok=True) 51 | file_name = os.path.join(cache_directory, "test_args.h5") 52 | a = 1 53 | b = 2 54 | dump(file_name=file_name, data_dict={"fn": my_funct, "args": [a, b]}) 55 | data_dict = load(file_name=file_name) 56 | self.assertTrue("fn" in data_dict.keys()) 57 | self.assertEqual(data_dict["args"], [a, b]) 58 | self.assertEqual(data_dict["kwargs"], {}) 59 | flag, no_error, output = get_output(file_name=file_name) 60 | self.assertTrue(get_runtime(file_name=file_name) == 0.0) 61 | self.assertFalse(flag) 62 | self.assertFalse(no_error) 63 | self.assertIsNone(output) 64 | 65 | def test_hdf_kwargs(self): 66 | cache_directory = os.path.abspath("cache") 67 | os.makedirs(cache_directory, exist_ok=True) 68 | file_name = os.path.join(cache_directory, "test_kwargs.h5") 69 | a = 1 70 | b = 2 71 | dump( 72 | file_name=file_name, 73 | data_dict={ 74 | "fn": my_funct, 75 | "args": (), 76 | "kwargs": {"a": a, "b": b}, 77 | "queue_id": 123, 78 | }, 79 | ) 80 | data_dict = load(file_name=file_name) 81 | self.assertTrue("fn" in data_dict.keys()) 82 | self.assertEqual(data_dict["args"], ()) 83 | self.assertEqual(data_dict["kwargs"], {"a": a, "b": b}) 84 | self.assertEqual(get_queue_id(file_name=file_name), 123) 85 | flag, no_error, output = get_output(file_name=file_name) 86 | self.assertTrue(get_runtime(file_name=file_name) == 0.0) 87 | self.assertFalse(flag) 88 | self.assertFalse(no_error) 89 | self.assertIsNone(output) 90 | 91 | def test_hdf_queue_id(self): 92 | cache_directory = os.path.abspath("cache") 93 | os.makedirs(cache_directory, exist_ok=True) 94 | file_name = os.path.join(cache_directory, "test_queue.h5") 95 | queue_id = 123 96 | dump( 97 | file_name=file_name, 98 | data_dict={"queue_id": queue_id}, 99 | ) 100 | self.assertEqual(get_queue_id(file_name=file_name), 123) 101 | flag, no_error, output = get_output(file_name=file_name) 102 | self.assertTrue(get_runtime(file_name=file_name) == 0.0) 103 | self.assertFalse(flag) 104 | self.assertFalse(no_error) 105 | self.assertIsNone(output) 106 | 107 | def test_hdf_error(self): 108 | cache_directory = os.path.abspath("cache") 109 | os.makedirs(cache_directory, exist_ok=True) 110 | file_name = os.path.join(cache_directory, "test_error.h5") 111 | error = ValueError() 112 | dump( 113 | file_name=file_name, 114 | data_dict={"error": error}, 115 | ) 116 | flag, no_error, output = get_output(file_name=file_name) 117 | self.assertTrue(get_runtime(file_name=file_name) == 0.0) 118 | self.assertTrue(flag) 119 | self.assertFalse(no_error) 120 | self.assertTrue(isinstance(output, error.__class__)) 121 | 122 | def tearDown(self): 123 | if os.path.exists("cache"): 124 | shutil.rmtree("cache") 125 | -------------------------------------------------------------------------------- /tests/test_standalone_inputcheck.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from executorlib.standalone.inputcheck import ( 4 | check_command_line_argument_lst, 5 | check_gpus_per_worker, 6 | check_oversubscribe, 7 | check_executor, 8 | check_init_function, 9 | check_nested_flux_executor, 10 | check_flux_log_files, 11 | check_pmi, 12 | check_plot_dependency_graph, 13 | check_refresh_rate, 14 | check_resource_dict, 15 | check_resource_dict_is_empty, 16 | check_flux_executor_pmi_mode, 17 | check_max_workers_and_cores, 18 | check_hostname_localhost, 19 | check_pysqa_config_directory, 20 | check_file_exists, 21 | validate_number_of_cores, 22 | ) 23 | 24 | 25 | class TestInputCheck(unittest.TestCase): 26 | def test_check_command_line_argument_lst(self): 27 | with self.assertRaises(ValueError): 28 | check_command_line_argument_lst(command_line_argument_lst=["a"]) 29 | 30 | def test_check_gpus_per_worker(self): 31 | with self.assertRaises(TypeError): 32 | check_gpus_per_worker(gpus_per_worker=1) 33 | 34 | def test_check_oversubscribe(self): 35 | with self.assertRaises(ValueError): 36 | check_oversubscribe(oversubscribe=True) 37 | 38 | def test_check_executor(self): 39 | with self.assertRaises(ValueError): 40 | check_executor(executor=1) 41 | 42 | def test_check_init_function(self): 43 | with self.assertRaises(ValueError): 44 | check_init_function(init_function=1, block_allocation=False) 45 | 46 | def test_check_refresh_rate(self): 47 | with self.assertRaises(ValueError): 48 | check_refresh_rate(refresh_rate=1) 49 | 50 | def test_check_resource_dict(self): 51 | def simple_function(resource_dict): 52 | return resource_dict 53 | 54 | with self.assertRaises(ValueError): 55 | check_resource_dict(function=simple_function) 56 | 57 | def test_check_resource_dict_is_empty(self): 58 | with self.assertRaises(ValueError): 59 | check_resource_dict_is_empty(resource_dict={"a": 1}) 60 | 61 | def test_check_pmi(self): 62 | with self.assertRaises(ValueError): 63 | check_pmi(backend="test", pmi="test") 64 | with self.assertRaises(ValueError): 65 | check_pmi(backend="flux_allocation", pmi="test") 66 | 67 | def test_check_nested_flux_executor(self): 68 | with self.assertRaises(ValueError): 69 | check_nested_flux_executor(nested_flux_executor=True) 70 | 71 | def test_check_flux_log_files(self): 72 | with self.assertRaises(ValueError): 73 | check_flux_log_files(flux_log_files=True) 74 | 75 | def test_check_plot_dependency_graph(self): 76 | with self.assertRaises(ValueError): 77 | check_plot_dependency_graph(plot_dependency_graph=True) 78 | 79 | def test_check_flux_executor_pmi_mode(self): 80 | with self.assertRaises(ValueError): 81 | check_flux_executor_pmi_mode(flux_executor_pmi_mode="test") 82 | 83 | def test_check_max_workers_and_cores(self): 84 | with self.assertRaises(ValueError): 85 | check_max_workers_and_cores(max_workers=2, max_cores=None) 86 | with self.assertRaises(ValueError): 87 | check_max_workers_and_cores(max_workers=None, max_cores=2) 88 | with self.assertRaises(ValueError): 89 | check_max_workers_and_cores(max_workers=2, max_cores=2) 90 | 91 | def test_check_hostname_localhost(self): 92 | with self.assertRaises(ValueError): 93 | check_hostname_localhost(hostname_localhost=True) 94 | with self.assertRaises(ValueError): 95 | check_hostname_localhost(hostname_localhost=False) 96 | 97 | def test_check_pysqa_config_directory(self): 98 | with self.assertRaises(ValueError): 99 | check_pysqa_config_directory(pysqa_config_directory="path/to/config") 100 | 101 | def test_check_file_exists(self): 102 | with self.assertRaises(ValueError): 103 | check_file_exists(file_name=None) 104 | with self.assertRaises(ValueError): 105 | check_file_exists(file_name="/path/does/not/exist") 106 | 107 | def test_validate_number_of_cores(self): 108 | with self.assertRaises(ValueError): 109 | validate_number_of_cores( 110 | max_cores=None, max_workers=None, cores_per_worker=None 111 | ) 112 | self.assertIsInstance( 113 | validate_number_of_cores(max_cores=1, max_workers=None, cores_per_worker=1), 114 | int, 115 | ) 116 | self.assertIsInstance( 117 | validate_number_of_cores( 118 | max_cores=None, max_workers=1, cores_per_worker=None 119 | ), 120 | int, 121 | ) 122 | -------------------------------------------------------------------------------- /tests/test_standalone_interactive_arguments.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import Future 2 | import unittest 3 | 4 | from executorlib.standalone.interactive.arguments import ( 5 | check_exception_was_raised, 6 | get_exception_lst, 7 | get_future_objects_from_input, 8 | update_futures_in_input, 9 | ) 10 | 11 | 12 | class TestSerial(unittest.TestCase): 13 | def test_get_future_objects_from_input_with_future(self): 14 | input_args = (1, 2, Future(), [Future()], {3: Future()}) 15 | input_kwargs = {"a": 1, "b": [Future()], "c": {"d": Future()}, "e": Future()} 16 | future_lst, boolean_flag = get_future_objects_from_input(args=input_args, kwargs=input_kwargs) 17 | self.assertEqual(len(future_lst), 6) 18 | self.assertFalse(boolean_flag) 19 | 20 | def test_get_future_objects_from_input_without_future(self): 21 | input_args = (1, 2) 22 | input_kwargs = {"a": 1} 23 | future_lst, boolean_flag = get_future_objects_from_input(args=input_args, kwargs=input_kwargs) 24 | self.assertEqual(len(future_lst), 0) 25 | self.assertTrue(boolean_flag) 26 | 27 | def test_update_futures_in_input_with_future(self): 28 | f1 = Future() 29 | f1.set_result(1) 30 | f2 = Future() 31 | f2.set_result(2) 32 | f3 = Future() 33 | f3.set_result(3) 34 | f4 = Future() 35 | f4.set_result(4) 36 | f5 = Future() 37 | f5.set_result(5) 38 | f6 = Future() 39 | f6.set_result(6) 40 | input_args = (1, 2, f1, [f2], {3: f3}) 41 | input_kwargs = {"a": 1, "b": [f4], "c": {"d": f5}, "e": f6} 42 | output_args, output_kwargs = update_futures_in_input(args=input_args, kwargs=input_kwargs) 43 | self.assertEqual(output_args, (1, 2, 1, [2], {3: 3})) 44 | self.assertEqual(output_kwargs, {"a": 1, "b": [4], "c": {"d": 5}, "e": 6}) 45 | 46 | def test_update_futures_in_input_without_future(self): 47 | input_args = (1, 2) 48 | input_kwargs = {"a": 1} 49 | output_args, output_kwargs = update_futures_in_input(args=input_args, kwargs=input_kwargs) 50 | self.assertEqual(input_args, output_args) 51 | self.assertEqual(input_kwargs, output_kwargs) 52 | 53 | def test_check_exception_was_raised(self): 54 | f_with_exception = Future() 55 | f_with_exception.set_exception(ValueError()) 56 | f_without_exception = Future() 57 | self.assertTrue(check_exception_was_raised(future_obj=f_with_exception)) 58 | self.assertFalse(check_exception_was_raised(future_obj=f_without_exception)) 59 | 60 | def test_get_exception_lst(self): 61 | f_with_exception = Future() 62 | f_with_exception.set_exception(ValueError()) 63 | f_without_exception = Future() 64 | future_with_exception_lst = [f_with_exception, f_with_exception, f_without_exception, f_without_exception, f_with_exception] 65 | future_without_exception_lst = [f_without_exception, f_without_exception, f_without_exception, f_without_exception] 66 | exception_lst = get_exception_lst(future_lst=future_with_exception_lst) 67 | self.assertEqual(len(exception_lst), 3) 68 | exception_lst = get_exception_lst(future_lst=future_without_exception_lst) 69 | self.assertEqual(len(exception_lst), 0) 70 | -------------------------------------------------------------------------------- /tests/test_standalone_interactive_backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import unittest 4 | 5 | from executorlib.standalone.interactive.backend import parse_arguments 6 | from executorlib.standalone.interactive.spawner import MpiExecSpawner 7 | from executorlib.task_scheduler.interactive.slurmspawner import SrunSpawner 8 | 9 | 10 | class TestParser(unittest.TestCase): 11 | def test_command_local(self): 12 | result_dict = { 13 | "host": "localhost", 14 | "zmqport": "22", 15 | } 16 | command_lst = [ 17 | "mpiexec", 18 | "-n", 19 | "2", 20 | "--oversubscribe", 21 | sys.executable, 22 | "/", 23 | "--zmqport", 24 | result_dict["zmqport"], 25 | ] 26 | interface = MpiExecSpawner(cwd=None, cores=2, openmpi_oversubscribe=True) 27 | self.assertEqual( 28 | command_lst, 29 | interface.generate_command( 30 | command_lst=[sys.executable, "/", "--zmqport", result_dict["zmqport"]] 31 | ), 32 | ) 33 | self.assertEqual(result_dict, parse_arguments(command_lst)) 34 | 35 | def test_command_slurm(self): 36 | result_dict = { 37 | "host": "127.0.0.1", 38 | "zmqport": "22", 39 | } 40 | command_lst = [ 41 | "srun", 42 | "-n", 43 | "2", 44 | "-D", 45 | os.path.abspath("."), 46 | "--gpus-per-task=1", 47 | "--oversubscribe", 48 | sys.executable, 49 | "/", 50 | "--host", 51 | result_dict["host"], 52 | "--zmqport", 53 | result_dict["zmqport"], 54 | ] 55 | interface = SrunSpawner( 56 | cwd=os.path.abspath("."), 57 | cores=2, 58 | gpus_per_core=1, 59 | openmpi_oversubscribe=True, 60 | ) 61 | self.assertEqual( 62 | command_lst, 63 | interface.generate_command( 64 | command_lst=[ 65 | sys.executable, 66 | "/", 67 | "--host", 68 | result_dict["host"], 69 | "--zmqport", 70 | result_dict["zmqport"], 71 | ] 72 | ), 73 | ) 74 | self.assertEqual(result_dict, parse_arguments(command_lst)) 75 | 76 | def test_command_slurm_user_command(self): 77 | result_dict = { 78 | "host": "127.0.0.1", 79 | "zmqport": "22", 80 | } 81 | command_lst = [ 82 | "srun", 83 | "-n", 84 | "2", 85 | "-D", 86 | os.path.abspath("."), 87 | "--gpus-per-task=1", 88 | "--oversubscribe", 89 | "--account=test", 90 | "--job-name=executorlib", 91 | sys.executable, 92 | "/", 93 | "--host", 94 | result_dict["host"], 95 | "--zmqport", 96 | result_dict["zmqport"], 97 | ] 98 | interface = SrunSpawner( 99 | cwd=os.path.abspath("."), 100 | cores=2, 101 | gpus_per_core=1, 102 | openmpi_oversubscribe=True, 103 | slurm_cmd_args=["--account=test", "--job-name=executorlib"], 104 | ) 105 | self.assertEqual( 106 | command_lst, 107 | interface.generate_command( 108 | command_lst=[ 109 | sys.executable, 110 | "/", 111 | "--host", 112 | result_dict["host"], 113 | "--zmqport", 114 | result_dict["zmqport"], 115 | ] 116 | ), 117 | ) 118 | self.assertEqual(result_dict, parse_arguments(command_lst)) 119 | -------------------------------------------------------------------------------- /tests/test_standalone_interactive_communication.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import os 3 | import sys 4 | import unittest 5 | 6 | import numpy as np 7 | import zmq 8 | 9 | from executorlib.standalone.interactive.communication import ( 10 | interface_connect, 11 | interface_shutdown, 12 | interface_send, 13 | interface_receive, 14 | SocketInterface, 15 | ) 16 | from executorlib.standalone.serialize import cloudpickle_register 17 | from executorlib.standalone.interactive.spawner import MpiExecSpawner 18 | 19 | 20 | skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None 21 | 22 | 23 | def calc(i): 24 | return np.array(i**2) 25 | 26 | 27 | class TestInterface(unittest.TestCase): 28 | @unittest.skipIf( 29 | skip_mpi4py_test, "mpi4py is not installed, so the mpi4py tests are skipped." 30 | ) 31 | def test_interface_mpi(self): 32 | cloudpickle_register(ind=1) 33 | task_dict = {"fn": calc, "args": (), "kwargs": {"i": 2}} 34 | interface = SocketInterface( 35 | spawner=MpiExecSpawner(cwd=None, cores=1, openmpi_oversubscribe=False) 36 | ) 37 | interface.bootup( 38 | command_lst=[ 39 | sys.executable, 40 | os.path.abspath( 41 | os.path.join( 42 | __file__, 43 | "..", 44 | "..", 45 | "executorlib", 46 | "backend", 47 | "interactive_parallel.py", 48 | ) 49 | ), 50 | "--zmqport", 51 | str(interface.bind_to_random_port()), 52 | ] 53 | ) 54 | self.assertEqual( 55 | interface.send_and_receive_dict(input_dict=task_dict), np.array(4) 56 | ) 57 | interface.shutdown(wait=True) 58 | 59 | def test_interface_serial(self): 60 | cloudpickle_register(ind=1) 61 | task_dict = {"fn": calc, "args": (), "kwargs": {"i": 2}} 62 | interface = SocketInterface( 63 | spawner=MpiExecSpawner(cwd=None, cores=1, openmpi_oversubscribe=False) 64 | ) 65 | interface.bootup( 66 | command_lst=[ 67 | sys.executable, 68 | os.path.abspath( 69 | os.path.join( 70 | __file__, 71 | "..", 72 | "..", 73 | "executorlib", 74 | "backend", 75 | "interactive_serial.py", 76 | ) 77 | ), 78 | "--zmqport", 79 | str(interface.bind_to_random_port()), 80 | ] 81 | ) 82 | self.assertEqual( 83 | interface.send_and_receive_dict(input_dict=task_dict), np.array(4) 84 | ) 85 | interface.shutdown(wait=True) 86 | 87 | 88 | class TestZMQ(unittest.TestCase): 89 | def test_interface_receive(self): 90 | self.assertEqual(len(interface_receive(socket=None)), 0) 91 | 92 | def test_initialize_zmq(self): 93 | message = "test" 94 | host = "localhost" 95 | 96 | context_server = zmq.Context() 97 | socket_server = context_server.socket(zmq.PAIR) 98 | port = str(socket_server.bind_to_random_port("tcp://*")) 99 | context_client, socket_client = interface_connect(host=host, port=port) 100 | interface_send(socket=socket_server, result_dict={"message": message}) 101 | self.assertEqual(interface_receive(socket=socket_client), {"message": message}) 102 | interface_shutdown(socket=socket_client, context=context_client) 103 | interface_shutdown(socket=socket_server, context=context_server) 104 | --------------------------------------------------------------------------------