├── .gitattributes
├── copyright_header.txt
├── example_computer_setup
    ├── computers
    │   ├── ssh_transport.yaml
    │   ├── localhost.yaml
    │   └── my-cluster.yaml
    ├── README.md
    ├── new_profile.sh
    └── add_extras.py
├── environment.yml
├── CODE_OF_CONDUCT.md
├── SUPPORT.md
├── aiida_dynamic_workflows
    ├── _static_version.py
    ├── __init__.py
    ├── common
    │   ├── serialize.py
    │   ├── __init__.py
    │   ├── array.py
    │   └── mapspec.py
    ├── utils.py
    ├── step.py
    ├── samples.py
    ├── control.py
    ├── parsers.py
    ├── schedulers.py
    ├── _version.py
    ├── query.py
    ├── report.py
    ├── workchains.py
    ├── data.py
    ├── engine.py
    └── workflow.py
├── .github
    └── workflows
    │   └── ci-style.yaml
├── .pre-commit-config.yaml
├── setup.py
├── LICENSE
├── .gitignore
├── README.md
├── SECURITY.md
├── setup.cfg
└── examples
    ├── 04-deleting-data.md
    ├── 01-calculations.md
    ├── 02-workflows.md
    └── 03-failures.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | aiida_dynamic_workflows/_static_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/copyright_header.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) Microsoft Corporation.
2 | Licensed under the MIT License.
3 | 


--------------------------------------------------------------------------------
/example_computer_setup/computers/ssh_transport.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | # You can tweak these settings
3 | timeout: 120  # SSH connections can live for 2 minutes
4 | safe_interval: 0  # Can make as many SSH connections as we like
5 | compress: true  # Compress files for transfer
6 | key_policy: AutoAddPolicy  # Automatically fixes missing hosts in known_hosts
7 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dynamic-workflows-dev
 2 | channels:
 3 |     - conda-forge
 4 | dependencies:
 5 |     # General dependencies
 6 |     - python=3.9
 7 |     - graphviz
 8 |     # dev dependencies
 9 |     - pre-commit
10 |     - ipykernel   # for running notebooks
11 |     # used in example notebooks
12 |     - loky
13 |     - toolz
14 |     - pandas
15 |     - numpy
16 | 


--------------------------------------------------------------------------------
/example_computer_setup/computers/localhost.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: "Local machine"
 3 | label: "localhost"
 4 | hostname: "localhost"
 5 | transport: core.local
 6 | scheduler: core.direct
 7 | work_dir: "/home/{username}/.aiida_run"
 8 | mpirun_command: "mpirun -np {tot_num_mpiprocs}"
 9 | mpiprocs_per_machine: "1"
10 | shebang: "#!/bin/bash"
11 | prepend_text: " "
12 | append_text: " "
13 | 


--------------------------------------------------------------------------------
/example_computer_setup/README.md:
--------------------------------------------------------------------------------
 1 | # Aiida profiles for use with aiida-dynamic-workflows
 2 | 
 3 | aiida-dynamic-workflows assumes Conda is used to manage the Python environments
 4 | on the Computers
 5 | 
 6 | To get started, modify the `hostname` and `work_dir` keys in `computers/cluster.yaml`
 7 | to point to a Slurm cluster.
 8 | Then run run `./new_profile.sh <profile_name>` to create a new Aiida profile with that
 9 | Computer set up.
10 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help
 4 | 
 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
 6 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or
 7 | feature request as a new Issue.
 8 | 
 9 | For help and questions about using this project, please file a GitHub Issue.
10 | 
11 | ## Microsoft Support Policy
12 | 
13 | Support for this project is limited to the resources listed above.
14 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/_static_version.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Microsoft Corporation.
 3 | # Licensed under the MIT License.
 4 | 
 5 | # This file will be overwritten by setup.py when a source or binary
 6 | # distribution is made.  The magic value "__use_git__" is interpreted by
 7 | # version.py.
 8 | 
 9 | version = "__use_git__"
10 | 
11 | # These values are only set if the distribution was created with 'git archive'
12 | refnames = "HEAD -> main, tag: v0.1.0"
13 | git_hash = "06e78f5"
14 | 


--------------------------------------------------------------------------------
/example_computer_setup/computers/my-cluster.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: "Cluster with Slurm installeed"
 3 | label: "my-cluster"
 4 | hostname: "headnode.mycluster.whatever"
 5 | transport: core.ssh
 6 | scheduler: "dynamic_workflows.slurm"
 7 | work_dir: "/home/{username}/.aiida_run"
 8 | mpirun_command: "mpirun -np {tot_num_mpiprocs}"
 9 | mpiprocs_per_machine: "1"
10 | shebang: "#!/bin/bash"
11 | prepend_text: " "
12 | append_text: " "
13 | # Extra properties
14 | extras:
15 |    # note: will be autodetected if not specified
16 |    conda_dir: "/home/{username}/miniconda3"
17 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from . import (
 6 |     calculations,
 7 |     common,
 8 |     control,
 9 |     data,
10 |     engine,
11 |     parsers,
12 |     query,
13 |     report,
14 |     utils,
15 |     workflow,
16 | )
17 | from ._version import __version__  # noqa: F401
18 | from .samples import input_samples
19 | from .step import step
20 | 
21 | __all__ = [
22 |     "calculations",
23 |     "common",
24 |     "control",
25 |     "data",
26 |     "engine",
27 |     "input_samples",
28 |     "parsers",
29 |     "report",
30 |     "query",
31 |     "step",
32 |     "utils",
33 |     "workflow",
34 |     "__version__",
35 | ]
36 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/common/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | import cloudpickle
 6 | 
 7 | 
 8 | def read(name, opener=open):
 9 |     """Load file contents as a bytestring."""
10 |     with opener(name, "rb") as f:
11 |         return f.read()
12 | 
13 | 
14 | loads = cloudpickle.loads
15 | dumps = cloudpickle.dumps
16 | 
17 | 
18 | def load(name, opener=open):
19 |     """Load a cloudpickled object from the named file."""
20 |     with opener(name, "rb") as f:
21 |         return cloudpickle.load(f)
22 | 
23 | 
24 | def dump(obj, name, opener=open):
25 |     """Dump an object to the named file using cloudpickle."""
26 |     with opener(name, "wb") as f:
27 |         cloudpickle.dump(obj, f)
28 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/common/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | # Common code used both by the plugin and by the runtime that wraps usercode.
 6 | 
 7 | import importlib.resources
 8 | 
 9 | from .array import FileBasedObjectArray
10 | from .mapspec import MapSpec
11 | from .serialize import dump, load
12 | 
13 | __all__ = ["dump", "load", "FileBasedObjectArray", "MapSpec", "package_module_contents"]
14 | 
15 | 
16 | def package_module_contents():
17 |     """Yield (filename, contents) pairs for each module in this subpackage."""
18 |     for filename in importlib.resources.contents(__package__):
19 |         if filename.endswith(".py"):
20 |             yield filename, importlib.resources.read_text(__package__, filename)
21 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-style.yaml:
--------------------------------------------------------------------------------
 1 | name: continuous-integration-style
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | jobs:
 8 | 
 9 |   pre-commit:
10 | 
11 |     runs-on: ubuntu-latest
12 |     timeout-minutes: 30
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 | 
17 |     - name: Set up Python 3.9
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.9'
21 | 
22 |     # remove after aiida 2.0 is released
23 |     - name: Install AiiDA development version
24 |       run: pip install git+https://github.com/aiidateam/aiida-core.git@1890bab724956220c306bd9794457a5657739174
25 | 
26 |     - name: Install python dependencies
27 |       run: |
28 |         pip install pre-commit
29 |         pip install -e .
30 |         pip freeze
31 | 
32 |     - name: Run pre-commit
33 |       run:
34 |         pre-commit run --all-files || ( git status --short ; git diff ; exit 1 )
35 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.1.0
 4 |     hooks:
 5 |     -   id: trailing-whitespace
 6 |     -   id: end-of-file-fixer
 7 |     -   id: mixed-line-ending
 8 |         args: ['--fix=lf']
 9 | -   repo: https://gitlab.com/pycqa/flake8
10 |     rev: 4.0.1
11 |     hooks:
12 |     -   id: flake8
13 | -   repo: https://github.com/Lucas-C/pre-commit-hooks
14 |     rev: v1.1.13
15 |     hooks:
16 |     -   id: insert-license
17 |         files: \.py$
18 |         args:
19 |         - --license-filepath
20 |         - copyright_header.txt
21 | -   repo: https://github.com/ambv/black
22 |     rev: 22.3.0
23 |     hooks:
24 |     -   id: black
25 |         language_version: python3.9
26 | -   repo: https://github.com/timothycrosley/isort
27 |     rev: 5.10.1
28 |     hooks:
29 |     -   id: isort
30 | -   repo: https://github.com/pycqa/pydocstyle
31 |     rev: 6.1.1
32 |     hooks:
33 |     -   id: pydocstyle
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | 
 8 | def get_version_and_cmdclass(package_path):
 9 |     """Load version.py module without importing the whole package.
10 | 
11 |     Template code from miniver
12 |     """
13 |     from importlib.util import module_from_spec, spec_from_file_location
14 |     import os
15 | 
16 |     spec = spec_from_file_location("version", os.path.join(package_path, "_version.py"))
17 |     module = module_from_spec(spec)
18 |     spec.loader.exec_module(module)
19 |     return module.__version__, module.cmdclass
20 | 
21 | 
22 | version, cmdclass = get_version_and_cmdclass("aiida_dynamic_workflows")
23 | 
24 | # All other options are specified in 'setup.cfg'; the version has to be
25 | # determined dynamically from git tags (using 'miniver'), so it needs
26 | # to be done here.
27 | setup(
28 |     version=version,
29 |     cmdclass=cmdclass,
30 | )
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/example_computer_setup/new_profile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | profile=$1
 5 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 6 | 
 7 | if ! command -v verdi &> /dev/null
 8 | then
 9 |     echo "'verdi' command not found: did you activate the Conda environment where Aiida is installed?"
10 |     exit 1
11 | fi
12 | 
13 | if [ -z "$profile" ]
14 | then
15 |     echo "Usage: new_profile.sh <profile_name>"
16 |     exit 1
17 | fi
18 | 
19 | # Ensure profile is lowercase only
20 | 
21 | function lowered () {
22 |     echo $1 | tr '[:upper:]' '[:lower:]'
23 | }
24 | 
25 | if [ "$profile" != "$(lowered $profile)" ]
26 | then
27 |     echo "Profile name '$profile' is not lowercase"
28 |     exit 1
29 | fi
30 | 
31 | verdi quicksetup --profile $profile
32 | 
33 | for config_file in "$SCRIPT_DIR"/computers/*.yaml; do
34 |     computer=$(basename $config_file .yaml)
35 | 
36 |     # -n to use default values that are not included
37 |     # in the config file (this includes "username").
38 |     verdi --profile $profile computer setup -n --config $config_file
39 | 
40 |     if [ $computer = localhost ]; then
41 |         verdi --profile $profile computer configure core.local $computer -n --safe-interval 0
42 |     else
43 |         verdi --profile $profile computer configure core.ssh $computer -n --config "$SCRIPT_DIR/computers/ssh_transport.yaml"
44 |     fi
45 | 
46 |     "$SCRIPT_DIR/add_extras.py" --profile $profile --config $config_file
47 | 
48 | done
49 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import asyncio
 7 | from concurrent.futures import ThreadPoolExecutor
 8 | from functools import partial
 9 | from pathlib import Path
10 | import shutil
11 | from typing import Iterable
12 | 
13 | from IPython.display import Image
14 | import aiida
15 | import graphviz
16 | from tqdm import tqdm
17 | 
18 | 
19 | def block_until_done(chain: aiida.orm.WorkChainNode, interval=1) -> int:
20 |     """Block a running chain until an exit code is set.
21 | 
22 |     Parameters
23 |     ----------
24 |     chain : aiida.orm.WorkChainNode
25 |     interval : int, optional
26 |         Checking interval, by default 1
27 | 
28 |     Returns
29 |     -------
30 |     int
31 |         Exit code.
32 |     """
33 |     loop = asyncio.get_event_loop()
34 | 
35 |     async def wait_until_done(chain: aiida.orm.WorkChainNode) -> None:
36 |         while chain.exit_status is None:
37 |             await asyncio.sleep(interval)
38 | 
39 |     coro = wait_until_done(chain)
40 |     loop.run_until_complete(coro)
41 |     return chain.exit_status
42 | 
43 | 
44 | def render_png(g: graphviz.Digraph) -> Image:
45 |     """Render 'graphviz.Digraph' as png."""
46 |     return Image(g.render(format="png"))
47 | 
48 | 
49 | def parallel_rmtree(dirs: Iterable[str | Path], with_tqdm: bool = True):
50 |     """Apply 'shutil.rmtree' to 'dirs' in parallel using a thread pool."""
51 |     # Threadpool executor, as this task is IO bound.
52 |     rmtree = partial(shutil.rmtree, ignore_errors=True)
53 |     with ThreadPoolExecutor() as tp:
54 |         it = tp.map(rmtree, dirs)
55 |         if with_tqdm:
56 |             it = tqdm(it, total=len(dirs))
57 |         # Bare 'for' loop to force the map to complete.
58 |         for _ in it:
59 |             pass
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/example_computer_setup/add_extras.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) Microsoft Corporation.
 4 | # Licensed under the MIT License.
 5 | 
 6 | 
 7 | import aiida
 8 | from aiida.cmdline.utils import echo
 9 | import aiida.orm
10 | import click
11 | import yaml
12 | 
13 | 
14 | @click.command()
15 | @click.option("--profile", help="Aiida profile")
16 | @click.option("--config", required=True, help="Config file for computer")
17 | def main(profile, config):
18 |     """Add extra properties to the computer defined in 'config'."""
19 |     aiida.load_profile(profile)
20 | 
21 |     with open(config) as f:
22 |         config = yaml.safe_load(f)
23 | 
24 |     label = config["label"]
25 | 
26 |     echo.echo_info(f"Adding extra properties to computer {label}")
27 | 
28 |     extras = config.get("extras", dict())
29 | 
30 |     computer = aiida.orm.load_computer(label)
31 |     for k, v in extras.items():
32 |         computer.set_property(k, str(v))
33 |     computer.store()
34 | 
35 |     echo.echo_success(f"Added the following properties to {label}: {extras}")
36 | 
37 |     if "conda_dir" not in extras:
38 |         echo.echo_info(f"Setting the conda directory for computer {label}")
39 |         conda_dir = get_conda_dir(computer)
40 |         computer.set_property("conda_dir", conda_dir)
41 |         computer.store()
42 | 
43 |         echo.echo_success(f"Set the Conda directory on {label} to '{conda_dir}'")
44 |     else:
45 |         conda_dir = extras["conda_dir"]
46 |         with computer.get_transport() as t:
47 |             if not t.isdir(conda_dir):
48 |                 echo.echo_warning(f"'{conda_dir}' is not a directory on {label}")
49 | 
50 | 
51 | def get_conda_dir(computer):
52 |     """Return the Conda directory for the given computer.
53 | 
54 |     First we try to determine the Conda directory automatically by
55 |     activating the "base" environment and getting $CONDA_PREFIX.
56 | 
57 |     If that fails we simply prompt the user.
58 |     """
59 |     label = computer.label
60 |     with computer.get_transport() as t:
61 |         rv, stdout, stderr = t.exec_command_wait(
62 |             "set -e; conda activate base; echo $CONDA_PREFIX"
63 |         )
64 |         conda_dir = stdout.strip() or None
65 |         if not conda_dir:
66 |             echo.echo_warning(
67 |                 "Failed to automatically determine Conda directory "
68 |                 f"for {label} (the computer said: '{stderr}')"
69 |             )
70 | 
71 |             while not conda_dir:
72 |                 x = click.prompt(f"Enter your conda directory for {label}")
73 |                 if t.isdir(x):
74 |                     conda_dir = x
75 |                 else:
76 |                     echo.echo_warning(f"'{x}' is not a directory on {label}")
77 |         return conda_dir
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     main()
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # aiida-dynamic-workflows
 2 | An AiiDA plugin for dynamically composing workflows from Python functions that run as CalcJobs.
 3 | 
 4 | **This is experimental, pre-alpha software**.
 5 | 
 6 | 
 7 | ## Prerequisites
 8 | An environment where the _development_ version of AiiDA is installed.
 9 | This plugin makes use of a bugfix on the development branch, which will
10 | not be included in an AiiDA release until v2.0.
11 | 
12 | 
13 | ## Installing
14 | As pre-alpha software, this package is **not** released on PyPI.
15 | Currently the only way to install the plugin is to clone the
16 | repository and use `pip`:
17 | ```bash
18 | pip install -e .
19 | ```
20 | 
21 | 
22 | ## Initialization
23 | This plugin uses Conda for managing Python environments on remote computers.
24 | Any Computers that you use with this plugin must have a `conda_dir` property
25 | that contains an absolute path to the Conda directory on the machine
26 | (typically something like `/home/{username}/miniconda3`.
27 | The `add_extras.py` script in `example_cluster_setup/` can help you with this
28 | 
29 | 
30 | ## Examples
31 | The [`examples/`](./examples) directory contains Jupyter notebooks that illustrate the main
32 | features of `aiida-dynamic-workflows`. The notebooks are in Markdown format, and so require
33 | the Jupyter plugin [jupytext](https://jupytext.readthedocs.io/en/latest/) in order to run them.
34 | 
35 | 
36 | ## Contributing
37 | 
38 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
39 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
40 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
41 | 
42 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
43 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
44 | provided by the bot. You will only need to do this once across all repos using our CLA.
45 | 
46 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
47 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
48 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
49 | 
50 | ## Trademarks
51 | 
52 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
53 | trademarks or logos is subject to and must follow
54 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
55 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
56 | Any use of third-party trademarks or logos are subject to those third-party's policies.
57 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/step.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT License.
 3 | 
 4 | 
 5 | import copy
 6 | from typing import Any, Callable, Dict, Optional, Tuple, Union
 7 | 
 8 | import toolz
 9 | 
10 | from .data import PyFunction
11 | 
12 | __all__ = ["step"]
13 | 
14 | 
15 | @toolz.curry
16 | def step(
17 |     f: Callable,
18 |     *,
19 |     returns: Union[str, Tuple[str]] = "_return_value",
20 |     resources: Optional[Dict[str, Any]] = None,
21 | ) -> PyFunction:
22 |     """Construct a PyFunction from a Python function.
23 | 
24 |     This function is commonly used as a decorator.
25 | 
26 |     Parameters
27 |     ----------
28 |     f
29 |         The function to transform into a PyFunction
30 |     returns
31 |         The name of the output of this function.
32 |         If multiple names are provided, then 'f' is assumed to return
33 |         as many values (as a tuple) as there are names.
34 |     resources
35 |         Optional specification of computational resources that this
36 |         function needs. Possible resources are: "memory", "cores".
37 |         "memory" must be a string containing an integer value followed
38 |         by one of the following suffixes: "kB", "MB", "GB".
39 |         "cores" must be a positive integer.
40 | 
41 |     Examples
42 |     --------
43 |     >>> f = step(lambda x, y: x + y, returns="sum")
44 |     >>>
45 |     >>> @step(returns="other_sum", resources={"memory": "10GB", cores=2})
46 |     ... def g(x: int, y: int) -> int:
47 |     ...     return x + y
48 |     ...
49 |     >>> @step(returns=("a", "b"))
50 |     ... def h(x):
51 |     ...     return (x + 1, x + 2)
52 |     ...
53 |     >>>
54 |     """
55 |     # TODO: First query the Aiida DB to see if this function already exists.
56 |     #       This will require having a good hash for Python functions.
57 |     #       This is a hard problem.
58 |     if resources:
59 |         _validate_resources(resources)
60 | 
61 |     node = PyFunction(func=f, returns=returns, resources=resources)
62 |     node.store()
63 |     return node
64 | 
65 | 
66 | def _validate_resources(resources) -> Dict:
67 |     resources = copy.deepcopy(resources)
68 |     if "memory" in resources:
69 |         _validate_memory(resources.pop("memory"))
70 |     if "cores" in resources:
71 |         _validate_cores(resources.pop("cores"))
72 |     if resources:
73 |         raise ValueError(f"Unexpected resource specifications: {list(resources)}")
74 | 
75 | 
76 | def _validate_memory(memory: str):
77 |     mem, unit = memory[:-2], memory[-2:]
78 |     if not mem.isnumeric():
79 |         raise ValueError(f"Expected an integer amount of memory, got: '{mem}'")
80 |     elif int(mem) == 0:
81 |         raise ValueError("Cannot specify zero memory")
82 |     valid_units = ("kB", "MB", "GB")
83 |     if unit not in valid_units:
84 |         raise ValueError(
85 |             f"Invalid memory unit: '{unit}' (expected one of {valid_units})."
86 |         )
87 | 
88 | 
89 | def _validate_cores(cores: int):
90 |     if int(cores) != cores:
91 |         raise ValueError(f"Expected an integer number of cores, got: {cores}")
92 |     elif cores <= 0:
93 |         raise ValueError(f"Expected a positive number of cores, got: {cores}")
94 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = aiida_dynamic_workflows
 3 | description = AiiDA plugins for dynamically defining workflows that execute Python functions
 4 | long_description = file: README.md
 5 | long_description_content_type = text/markdown
 6 | url = http://github.com/microsoft/aiida-dynamic-workflows
 7 | author = Microsoft Quantum
 8 | license = MIT
 9 | license_file = LICENSE
10 | classifiers =
11 |     Development Status :: 2 - Pre-Alpha
12 |     License :: OSI Approved :: MIT License
13 |     Framework :: AiiDA
14 |     Intended Audience :: Science/Research
15 |     Intended Audience :: Developers
16 |     Programming Language :: Python :: 3 :: Only
17 |     Topic :: Software Development :: Libraries :: Python Modules
18 |     Operating System :: POSIXLinux
19 | keywords = aiida
20 | 
21 | [options]
22 | packages = find:
23 | python_requires = >=3.8,<4
24 | install_requires =
25 |     # TODO: Update dependency to Aiida 2.0, when it is released
26 |     aiida-core==2.0.0b1
27 |     toolz
28 |     cloudpickle
29 |     numpy
30 |     graphviz
31 |     tqdm
32 | setup_requires =
33 |     reentry
34 | include_package_data = True
35 | reentry_register = true
36 | 
37 | [options.entry_points]
38 | aiida.calculations =
39 |     dynamic_workflows.PyCalcJob = aiida_dynamic_workflows.calculations:PyCalcJob
40 |     dynamic_workflows.PyMapJob = aiida_dynamic_workflows.calculations:PyMapJob
41 |     dynamic_workflows.merge_remote_arrays = aiida_dynamic_workflows.calculations:merge_remote_arrays
42 | aiida.parsers =
43 |     dynamic_workflows.PyCalcParser = aiida_dynamic_workflows.parsers:PyCalcParser
44 |     dynamic_workflows.PyMapParser = aiida_dynamic_workflows.parsers:PyMapParser
45 | aiida.data =
46 |     dynamic_workflows.PyData = aiida_dynamic_workflows.data:PyData
47 |     dynamic_workflows.PyArray= aiida_dynamic_workflows.data:PyArray
48 |     dynamic_workflows.PyRemoteData = aiida_dynamic_workflows.data:PyRemoteData
49 |     dynamic_workflows.PyRemoteArray = aiida_dynamic_workflows.data:PyRemoteArray
50 |     dynamic_workflows.PyOutline = aiida_dynamic_workflows.data:PyOutline
51 |     dynamic_workflows.PyFunction = aiida_dynamic_workflows.data:PyFunction
52 |     dynamic_workflows.Nil = aiida_dynamic_workflows.data:Nil
53 |     dynamic_workflows.PyException = aiida_dynamic_workflows.data:PyException
54 | aiida.node =
55 |     process.workflow.dynamic_workflows.WorkChainNode = aiida_dynamic_workflows.workchains:WorkChainNode
56 | aiida.schedulers =
57 |     dynamic_workflows.slurm = aiida_dynamic_workflows.schedulers:SlurmSchedulerWithJobArray
58 | aiida.workflows =
59 |     dynamic_workflows.PyWorkChain = aiida_dynamic_workflows.workflow:PyWorkChain
60 |     dynamic_workflows.RestartedPyMapJob = aiida_dynamic_workflows.workchains:RestartedPyMapJob
61 |     dynamic_workflows.RestartedPyCalcJob = aiida_dynamic_workflows.workchains:RestartedPyCalcJob
62 | 
63 | [pydocstyle]
64 | inherit = False
65 | convention = numpy
66 | add-ignore = D100,D104,D105
67 | match = (?!test).*\.py
68 | 
69 | [isort]
70 | force_sort_within_sections=True
71 | profile=black
72 | 
73 | [flake8]
74 | ignore = E203, E266, W503
75 | max-line-length = 88
76 | max-complexity = 18
77 | select = B,C,E,F,W,T4,B9
78 | 
79 | [coverage:run]
80 | omit =
81 |     */tests/*
82 |     */ipynb_filter.py
83 |     */_static_version.py
84 |     */_version.py
85 |     */setup.py
86 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/samples.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | import itertools
  6 | from typing import Dict, Iterable, Optional, Tuple
  7 | 
  8 | import aiida.orm
  9 | import toolz
 10 | 
 11 | from .calculations import PyCalcJob, PyMapJob
 12 | from .common import MapSpec
 13 | from .data import PyRemoteArray, from_aiida_type
 14 | 
 15 | 
 16 | def input_samples(result: PyRemoteArray) -> Iterable[Dict]:
 17 |     """Return an iterable of samples, given a result from a PyMapJob.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     result
 22 |         The array resulting from the execution of a PyMapJob.
 23 | 
 24 |     Returns
 25 |     -------
 26 |     An iterable of dictionaries, ordered as 'result' (flattened, if
 27 |     'result' is a >1D array). Each dictionary has the same keys (the
 28 |     names of the parameters that produced 'result').
 29 | 
 30 |     Examples
 31 |     --------
 32 |     >>> import pandas as pd
 33 |     >>> # In the following we assume 'charge' is a PyRemoteArray output from a PyMapJob.
 34 |     >>> df = pd.DataFrame(input_samples(charge))
 35 |     >>> # Add a 'charge' column showing the result associated with each sample.
 36 |     >>> df.assign(charge=charge.reshape(-1))
 37 |     """
 38 |     if result.creator is None:
 39 |         raise ValueError(
 40 |             "Cannot generate sample plan from data that was not produced from a CalcJob"
 41 |         )
 42 |     job = result.creator
 43 |     if not issubclass(job.process_class, PyMapJob):
 44 |         raise TypeError("Expected data that was produced from a MapJob")
 45 |     output_axes = MapSpec.from_string(job.attributes["mapspec"]).output.axes
 46 |     sp = _parameter_spec(result)
 47 | 
 48 |     consts = {k: from_aiida_type(v) for k, (v, axes) in sp.items() if axes is None}
 49 |     mapped = {
 50 |         k: (from_aiida_type(v), axes) for k, (v, axes) in sp.items() if axes is not None
 51 |     }
 52 | 
 53 |     # This could be done more efficiently if we return instead a dictionary of arrays.
 54 | 
 55 |     for el in itertools.product(*map(range, result.shape)):
 56 |         el = dict(zip(output_axes, el))
 57 |         d = {k: v[tuple(el[ax] for ax in axes)] for k, (v, axes) in mapped.items()}
 58 |         yield toolz.merge(consts, d)
 59 | 
 60 | 
 61 | def _parameter_spec(result: aiida.orm.Data, axes: Optional[Tuple[str]] = None) -> Dict:
 62 |     """Return a dictionary specifying the parameters that produced a given 'result'.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     result
 67 |         Data produced from a PyCalcJob or PyMapJob.
 68 |     axes
 69 |         Labels for each axis of 'result', used to rename input axis labels.
 70 | 
 71 |     Returns
 72 |     -------
 73 |     Dictionary mapping parameter names (strings) to pairs: (Aiida node, axis names).
 74 |     """
 75 |     job = result.creator
 76 |     job_type = job.process_class
 77 | 
 78 |     if not issubclass(job_type, PyCalcJob):
 79 |         raise TypeError(f"Don't know what to do with {job_type}")
 80 | 
 81 |     if issubclass(job_type, PyMapJob):
 82 |         mapspec = MapSpec.from_string(job.attributes["mapspec"])
 83 |         if axes:
 84 |             assert len(axes) == len(mapspec.output.axes)
 85 |             translation = dict(zip(mapspec.output.axes, axes))
 86 |         else:
 87 |             translation = dict()
 88 |         input_axes = {
 89 |             spec.name: [translation.get(ax, ax) for ax in spec.axes]
 90 |             for spec in mapspec.inputs
 91 |         }
 92 |     else:
 93 |         input_axes = dict()
 94 |         assert axes is None
 95 | 
 96 |     kwargs = job.inputs.kwargs if hasattr(job.inputs, "kwargs") else {}
 97 |     # Inputs that were _not_ created by another CalcJob are the parameters we seek.
 98 |     parameters = {k: (v, input_axes.get(k)) for k, v in kwargs.items() if not v.creator}
 99 |     # Inputs that _were_ created by another Calcjob need to have
100 |     # _their_ inputs inspected, in turn.
101 |     other_inputs = [(v, input_axes.get(k)) for k, v in kwargs.items() if v.creator]
102 |     upstream_params = [_parameter_spec(v, ax) for v, ax in other_inputs]
103 | 
104 |     return toolz.merge(parameters, *upstream_params)
105 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/control.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | import subprocess
  6 | import time
  7 | from typing import Optional, Union
  8 | 
  9 | from aiida import get_config_option
 10 | from aiida.cmdline.commands.cmd_process import process_kill, process_pause, process_play
 11 | from aiida.cmdline.utils import common, daemon, echo
 12 | from aiida.engine.daemon.client import get_daemon_client
 13 | from aiida.orm import ProcessNode, load_node
 14 | 
 15 | 
 16 | def kill(process: Union[ProcessNode, int, str], timeout: int = 5) -> bool:
 17 |     """Kill the specified process.
 18 | 
 19 |     Params
 20 |     ------
 21 |     process
 22 |         The process to kill.
 23 |     timeout
 24 |         Timeout (in seconds) to wait for confirmation that the process was killed.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     True only if the process is now terminated.
 29 |     """
 30 |     process = _ensure_process_node(process)
 31 |     process_kill.callback([process], timeout=timeout, wait=True)
 32 |     return process.is_terminated
 33 | 
 34 | 
 35 | def pause(process: Union[ProcessNode, int, str], timeout: int = 5) -> bool:
 36 |     """Pause the specified process.
 37 | 
 38 |     Paused processes will not continue execution, and can be unpaused later.
 39 | 
 40 |     Params
 41 |     ------
 42 |     process
 43 |         The process to kill.
 44 |     timeout
 45 |         Timeout (in seconds) to wait for confirmation that the process was killed.
 46 | 
 47 |     Returns
 48 |     -------
 49 |     True only if the process is now paused.
 50 |     """
 51 |     process = _ensure_process_node(process)
 52 |     if process.is_terminated:
 53 |         raise RuntimeError("Cannot pause terminated process {process.pk}.")
 54 |     process_pause.callback([process], all_entries=False, timeout=timeout, wait=True)
 55 |     return process.paused
 56 | 
 57 | 
 58 | def unpause(process: Union[ProcessNode, int, str], timeout: int = 5) -> bool:
 59 |     """Unpause the specified process.
 60 | 
 61 |     Params
 62 |     ------
 63 |     process
 64 |         The process to kill.
 65 |     timeout
 66 |         Timeout (in seconds) to wait for confirmation that the process was killed.
 67 | 
 68 |     Returns
 69 |     -------
 70 |     True only if the process is now unpaused.
 71 |     """
 72 |     process = _ensure_process_node(process)
 73 |     if process.is_terminated:
 74 |         raise RuntimeError("Cannot unpause terminated process {process.pk}.")
 75 |     process_play.callback([process], all_entries=False, timeout=timeout, wait=True)
 76 |     return not process.paused
 77 | 
 78 | 
 79 | def ensure_daemon_restarted(n_workers: Optional[int] = None):
 80 |     """Restart the daemon (if it is running), or start it (if it is stopped).
 81 | 
 82 |     Parameters
 83 |     ----------
 84 |     n_workers
 85 |         The number of daemon workers to start. If not provided, the default
 86 |         number of workers for this profile is used.
 87 | 
 88 |     Notes
 89 |     -----
 90 |     If the daemon is running this is equivalent to running
 91 |     'verdi daemon restart --reset', i.e. we fully restart the daemon, including
 92 |     the circus controller. This ensures that any changes in the environment are
 93 |     properly picked up by the daemon.
 94 |     """
 95 |     client = get_daemon_client()
 96 |     n_workers = n_workers or get_config_option("daemon.default_workers")
 97 | 
 98 |     if client.is_daemon_running:
 99 |         echo.echo("Stopping the daemon...", nl=False)
100 |         response = client.stop_daemon(wait=True)
101 |         retcode = daemon.print_client_response_status(response)
102 |         if retcode:
103 |             raise RuntimeError(f"Problem restarting Aiida daemon: {response['status']}")
104 | 
105 |     echo.echo("Starting the daemon...", nl=False)
106 | 
107 |     # We have to run this in a subprocess because it daemonizes, and we do not
108 |     # want to daemonize _this_ process.
109 |     command = [
110 |         "verdi",
111 |         "-p",
112 |         client.profile.name,
113 |         "daemon",
114 |         "start-circus",
115 |         str(n_workers),
116 |     ]
117 |     try:
118 |         currenv = common.get_env_with_venv_bin()
119 |         subprocess.check_output(command, env=currenv, stderr=subprocess.STDOUT)
120 |     except subprocess.CalledProcessError as exception:
121 |         echo.echo("FAILED", fg="red", bold=True)
122 |         raise RuntimeError("Failed to start the daemon") from exception
123 | 
124 |     time.sleep(1)
125 |     response = client.get_status()
126 | 
127 |     retcode = daemon.print_client_response_status(response)
128 |     if retcode:
129 |         raise RuntimeError(f"Problem starting Aiida daemon: {response['status']}")
130 | 
131 | 
132 | def _ensure_process_node(node_or_id: Union[ProcessNode, int, str]) -> ProcessNode:
133 |     if isinstance(node_or_id, ProcessNode):
134 |         return node_or_id
135 |     else:
136 |         return load_node(node_or_id)
137 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/parsers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | """Aiida Parsers for interpreting the output of arbitrary Python functions."""
  5 | 
  6 | import os.path
  7 | 
  8 | import aiida.engine
  9 | import aiida.parsers
 10 | 
 11 | from . import common
 12 | from .common import MapSpec
 13 | from .data import PyRemoteArray, PyRemoteData, array_shape
 14 | 
 15 | # TODO: unify 'PyCalcParser' and 'PyMapParser': they are identical except
 16 | #       for the type of the outputs (PyRemoteData vs. PyRemoteArray).
 17 | 
 18 | 
 19 | class PyCalcParser(aiida.parsers.Parser):
 20 |     """Parser for a PyCalcJob."""
 21 | 
 22 |     def parse(self, **kwargs):  # noqa: D102
 23 | 
 24 |         calc = self.node
 25 | 
 26 |         def retrieve(value_file):
 27 |             # No actual retrieval occurs; we just store a reference
 28 |             # to the remote value.
 29 |             return PyRemoteData.from_remote_data(
 30 |                 calc.outputs.remote_folder,
 31 |                 value_file,
 32 |             )
 33 | 
 34 |         exception_file = "__exception__.pickle"
 35 |         remote_folder = calc.outputs["remote_folder"]
 36 |         remote_files = remote_folder.listdir()
 37 |         has_exception = exception_file in remote_files
 38 | 
 39 |         exit_code = None
 40 | 
 41 |         # If any data was produced we create the appropriate outputs.
 42 |         # If something went wrong the exit code will still be non-zero.
 43 |         output_folder = remote_folder.listdir("__return_values__")
 44 |         for r in calc.inputs.func.returns:
 45 |             filename = f"{r}.pickle"
 46 |             path = os.path.join("__return_values__", filename)
 47 |             if filename in output_folder:
 48 |                 self.out(f"return_values.{r}", retrieve(path))
 49 |             else:
 50 |                 exit_code = self.exit_codes.MISSING_OUTPUT
 51 | 
 52 |         try:
 53 |             job_infos = calc.computer.get_scheduler().parse_detailed_job_info(
 54 |                 calc.get_detailed_job_info()
 55 |             )
 56 |         except AttributeError:
 57 |             pass
 58 |         else:
 59 |             (job_info,) = job_infos
 60 |             if job_info["State"] == "FAILED":
 61 |                 exit_code = self.exit_codes.NONZERO_EXIT_CODE
 62 | 
 63 |         if has_exception:
 64 |             self.out("exception", retrieve(exception_file))
 65 |             exit_code = self.exit_codes.USER_CODE_RAISED
 66 | 
 67 |         if exit_code is not None:
 68 |             calc.set_exit_status(exit_code.status)
 69 |             calc.set_exit_message(exit_code.message)
 70 |             return exit_code
 71 | 
 72 | 
 73 | class PyMapParser(aiida.parsers.Parser):
 74 |     """Parser for a PyMapJob."""
 75 | 
 76 |     def parse(self, **kwargs):  # noqa: D102
 77 | 
 78 |         calc = self.node
 79 | 
 80 |         mapspec = MapSpec.from_string(calc.get_option("mapspec"))
 81 |         mapped_parameter_shapes = {
 82 |             k: array_shape(v)
 83 |             for k, v in calc.inputs.kwargs.items()
 84 |             if k in mapspec.parameters
 85 |         }
 86 |         expected_shape = mapspec.shape(mapped_parameter_shapes)
 87 |         remote_folder = calc.outputs["remote_folder"]
 88 |         has_exceptions = bool(remote_folder.listdir("__exceptions__"))
 89 | 
 90 |         def retrieve(return_value_name):
 91 |             return PyRemoteArray(
 92 |                 computer=calc.computer,
 93 |                 remote_path=os.path.join(
 94 |                     calc.outputs.remote_folder.get_remote_path(),
 95 |                     return_value_name,
 96 |                 ),
 97 |                 shape=expected_shape,
 98 |                 filename_template=common.array.filename_template,
 99 |             )
100 | 
101 |         exit_code = None
102 | 
103 |         # If any data was produced we create the appropriate outputs.
104 |         # Users can still tell something went wrong from the exit code.
105 |         for r in calc.inputs.func.returns:
106 |             path = os.path.join("__return_values__", r)
107 |             has_data = remote_folder.listdir(path)
108 |             if has_data:
109 |                 self.out(f"return_values.{r}", retrieve(path))
110 |             else:
111 |                 exit_code = self.exit_codes.MISSING_OUTPUT
112 | 
113 |         try:
114 |             job_infos = calc.computer.get_scheduler().parse_detailed_job_info(
115 |                 calc.get_detailed_job_info()
116 |             )
117 |         except AttributeError:
118 |             pass
119 |         else:
120 |             if any(j["State"] == "FAILED" for j in job_infos):
121 |                 exit_code = self.exit_codes.NONZERO_EXIT_CODE
122 | 
123 |         if has_exceptions:
124 |             self.out("exception", retrieve("__exceptions__"))
125 |             exit_code = self.exit_codes.USER_CODE_RAISED
126 | 
127 |         if exit_code is not None:
128 |             calc.set_exit_status(exit_code.status)
129 |             calc.set_exit_message(exit_code.message)
130 |             return exit_code
131 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/common/array.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | import concurrent.futures
  6 | import functools
  7 | import itertools
  8 | import operator
  9 | import pathlib
 10 | from typing import Any, List, Sequence, Tuple
 11 | 
 12 | import numpy as np
 13 | 
 14 | from . import serialize
 15 | 
 16 | filename_template = "__{:d}__.pickle"
 17 | 
 18 | 
 19 | class FileBasedObjectArray:
 20 |     """Array interface to a folder of files on disk.
 21 | 
 22 |     __getitem__ returns "np.ma.masked" for non-existant files.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         folder,
 28 |         shape,
 29 |         strides=None,
 30 |         filename_template=filename_template,
 31 |     ):
 32 |         self.folder = pathlib.Path(folder).absolute()
 33 |         self.shape = tuple(shape)
 34 |         self.strides = _make_strides(self.shape) if strides is None else tuple(strides)
 35 |         self.filename_template = str(filename_template)
 36 | 
 37 |     @property
 38 |     def size(self) -> int:
 39 |         """Return number of elements in the array."""
 40 |         return functools.reduce(operator.mul, self.shape, 1)
 41 | 
 42 |     @property
 43 |     def rank(self) -> int:
 44 |         """Return the rank of the array."""
 45 |         return len(self.shape)
 46 | 
 47 |     def _normalize_key(self, key: Tuple[int, ...]) -> Tuple[int, ...]:
 48 |         if not isinstance(key, tuple):
 49 |             key = (key,)
 50 |         if len(key) != self.rank:
 51 |             raise IndexError(
 52 |                 f"too many indices for array: array is {self.rank}-dimensional, "
 53 |                 "but {len(key)} were indexed"
 54 |             )
 55 | 
 56 |         if any(isinstance(k, slice) for k in key):
 57 |             raise NotImplementedError("Cannot yet slice subarrays")
 58 | 
 59 |         normalized_key = []
 60 |         for axis, k in enumerate(key):
 61 |             axis_size = self.shape[axis]
 62 |             normalized_k = k if k >= 0 else (axis_size - k)
 63 |             if not (0 <= normalized_k < axis_size):
 64 |                 raise IndexError(
 65 |                     "index {k} is out of bounds for axis {axis} with size {axis_size}"
 66 |                 )
 67 |             normalized_key.append(k)
 68 | 
 69 |         return tuple(normalized_key)
 70 | 
 71 |     def _index_to_file(self, index: int) -> pathlib.Path:
 72 |         """Return the filename associated with the given index."""
 73 |         return self.folder / self.filename_template.format(index)
 74 | 
 75 |     def _key_to_file(self, key: Tuple[int, ...]) -> pathlib.Path:
 76 |         """Return the filename associated with the given key."""
 77 |         index = sum(k * s for k, s in zip(key, self.strides))
 78 |         return self._index_to_file(index)
 79 | 
 80 |     def _files(self):
 81 |         """Yield all the filenames that constitute the data in this array."""
 82 |         return map(self._key_to_file, itertools.product(*map(range, self.shape)))
 83 | 
 84 |     def __getitem__(self, key):
 85 |         key = self._normalize_key(key)
 86 |         if any(isinstance(x, slice) for x in key):
 87 |             # XXX: need to figure out strides in order to implement this.
 88 |             raise NotImplementedError("Cannot yet slice subarrays")
 89 | 
 90 |         f = self._key_to_file(key)
 91 |         if not f.is_file():
 92 |             return np.ma.core.masked
 93 |         return serialize.load(f)
 94 | 
 95 |     def to_array(self) -> np.ma.core.MaskedArray:
 96 |         """Return a masked numpy array containing all the data.
 97 | 
 98 |         The returned numpy array has dtype "object" and a mask for
 99 |         masking out missing data.
100 |         """
101 |         items = _load_all(map(self._index_to_file, range(self.size)))
102 |         mask = [not self._index_to_file(i).is_file() for i in range(self.size)]
103 |         return np.ma.array(items, mask=mask, dtype=object).reshape(self.shape)
104 | 
105 |     def dump(self, key, value):
106 |         """Dump 'value' into the file associated with 'key'.
107 | 
108 |         Examples
109 |         --------
110 |         >>> arr = FileBasedObjectArray(...)
111 |         >>> arr.dump((2, 1, 5), dict(a=1, b=2))
112 |         """
113 |         key = self._normalize_key(key)
114 |         if not any(isinstance(x, slice) for x in key):
115 |             return serialize.dump(value, self._key_to_file(key))
116 | 
117 |         raise NotImplementedError("Cannot yet dump subarrays")
118 | 
119 | 
120 | def _tails(seq):
121 |     while seq:
122 |         seq = seq[1:]
123 |         yield seq
124 | 
125 | 
126 | def _make_strides(shape):
127 |     return tuple(functools.reduce(operator.mul, s, 1) for s in _tails(shape))
128 | 
129 | 
130 | def _load_all(filenames: Sequence[str]) -> List[Any]:
131 |     def maybe_read(f):
132 |         return serialize.read(f) if f.is_file() else None
133 | 
134 |     def maybe_load(x):
135 |         return serialize.loads(x) if x is not None else None
136 | 
137 |     # Delegate file reading to the threadpool but deserialize sequentially,
138 |     # as this is pure Python and CPU bound
139 |     with concurrent.futures.ThreadPoolExecutor() as tex:
140 |         return [maybe_load(x) for x in tex.map(maybe_read, filenames)]
141 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/schedulers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from collections.abc import Mapping
  6 | import datetime
  7 | from typing import List, Optional, T
  8 | 
  9 | from aiida.common.lang import type_check
 10 | from aiida.schedulers import JobInfo, JobState
 11 | from aiida.schedulers.plugins.slurm import SlurmScheduler
 12 | import toolz
 13 | 
 14 | __all__ = ["SlurmSchedulerWithJobArray"]
 15 | 
 16 | 
 17 | class SlurmSchedulerWithJobArray(SlurmScheduler):
 18 |     """A Slurm scheduler that reports only a single JobInfo for job arrays."""
 19 | 
 20 |     def _parse_joblist_output(self, retval, stdout, stderr):
 21 |         # Aiida assumes that there is a single job associated with each call
 22 |         # to 'sbatch', but this is not true in the case of job arrays.
 23 |         # In order to meet this requirement we merge the JobInfos for each job
 24 |         # in the array.
 25 |         return merge_job_arrays(super()._parse_joblist_output(retval, stdout, stderr))
 26 | 
 27 |     # Return only the necessary fields for 'parse_output' to do its job.
 28 |     # Our fat array jobs mean the response from 'sacct' can be pretty huge.
 29 |     _detailed_job_info_fields = [
 30 |         "JobID",
 31 |         "ExitCode",
 32 |         "State",
 33 |         "Reason",
 34 |         "CPUTime",
 35 |     ]
 36 | 
 37 |     def _get_detailed_job_info_command(self, job_id):
 38 |         fields = ",".join(self._detailed_job_info_fields)
 39 |         # --parsable2 separates fields with pipes, with no trailing pipe
 40 |         return f"sacct --format={fields} --parsable2 --jobs={job_id}"
 41 | 
 42 |     @classmethod
 43 |     def parse_detailed_job_info(cls, detailed_job_info):
 44 |         """Parse output from 'sacct', issued after the completion of the job."""
 45 |         type_check(detailed_job_info, dict)
 46 | 
 47 |         retval = detailed_job_info["retval"]
 48 |         if retval != 0:
 49 |             stderr = detailed_job_info["stderr"]
 50 |             raise ValueError(f"Error code {retval} returned by 'sacct': {stderr}")
 51 | 
 52 |         try:
 53 |             detailed_stdout = detailed_job_info["stdout"]
 54 |         except KeyError:
 55 |             raise ValueError(
 56 |                 "the `detailed_job_info` does not contain the required key `stdout`."
 57 |             )
 58 | 
 59 |         type_check(detailed_stdout, str)
 60 | 
 61 |         lines = detailed_stdout.splitlines()
 62 | 
 63 |         try:
 64 |             fields, *job_infos = lines
 65 |         except IndexError:
 66 |             raise ValueError("`detailed_job_info.stdout` does not contain enough lines")
 67 |         fields = fields.split("|")
 68 | 
 69 |         if fields != cls._detailed_job_info_fields:
 70 |             raise ValueError(
 71 |                 "Fields returned by 'sacct' do not match fields specified."
 72 |             )
 73 | 
 74 |         # Parse the individual job outputs
 75 |         job_infos = [dict(zip(fields, info.split("|"))) for info in job_infos]
 76 |         # Each job has a 'batch' entry also, which we ignore
 77 |         job_infos = [j for j in job_infos if not j["JobID"].endswith(".batch")]
 78 | 
 79 |         return job_infos
 80 | 
 81 |     def parse_output(self, detailed_job_info, stdout, stderr):
 82 |         """Parse output from 'sacct', issued after the completion of the job."""
 83 |         from aiida.engine import CalcJob
 84 | 
 85 |         job_infos = self.parse_detailed_job_info(detailed_job_info)
 86 | 
 87 |         # TODO: figure out how to return richer information to the calcjob, so
 88 |         #       that a workchain could in principle reschedule with only the
 89 |         #       failed jobs.
 90 |         if any(j["State"] == "OUT_OF_MEMORY" for j in job_infos):
 91 |             return CalcJob.exit_codes.ERROR_SCHEDULER_OUT_OF_MEMORY
 92 |         if any(j["State"] == "TIMEOUT" for j in job_infos):
 93 |             return CalcJob.exit_codes.ERROR_SCHEDULER_OUT_OF_WALLTIME
 94 | 
 95 | 
 96 | def merge_job_arrays(jobs: List[JobInfo]) -> List[JobInfo]:
 97 |     """Merge JobInfos from jobs in the same Slurm Array into a single JobInfo."""
 98 |     mergers = {
 99 |         "job_id": toolz.compose(job_array_id, toolz.first),
100 |         "dispatch_time": min,
101 |         "finish_time": toolz.compose(
102 |             max,
103 |             toolz.curried.map(with_default(datetime.datetime.min)),
104 |         ),
105 |         "job_state": total_job_state,
106 |         "raw_data": toolz.identity,
107 |     }
108 | 
109 |     job_array_id_from_info = toolz.compose(
110 |         job_array_id, toolz.functoolz.attrgetter("job_id")
111 |     )
112 | 
113 |     return [
114 |         merge_with_functions(*jobs, mergers=mergers, factory=JobInfo)
115 |         for jobs in toolz.groupby(job_array_id_from_info, jobs).values()
116 |     ]
117 | 
118 | 
119 | def total_job_state(states: List[JobState]) -> JobState:
120 |     # Order is important here
121 |     possible_states = [
122 |         JobState.UNDETERMINED,
123 |         JobState.RUNNING,
124 |         JobState.SUSPENDED,
125 |         JobState.QUEUED_HELD,
126 |         JobState.QUEUED,
127 |     ]
128 |     for ps in possible_states:
129 |         if any(state == ps for state in states):
130 |             return ps
131 | 
132 |     if all(state == JobState.DONE for state in states):
133 |         return JobState.DONE
134 |     else:
135 |         raise RuntimeError("Invalid state encountered")
136 | 
137 | 
138 | def job_array_id(job_id: str) -> str:
139 |     """Return the ID of the associated array job.
140 | 
141 |     If the provided job is not part of a job array then
142 |     the job ID is returned.
143 |     """
144 |     return toolz.first(job_id.split("_"))
145 | 
146 | 
147 | @toolz.curry
148 | def with_default(default: T, v: Optional[T]) -> T:
149 |     """Return 'v' if it is not 'None', otherwise return 'default'."""
150 |     return default if v is None else v
151 | 
152 | 
153 | def merge_with_functions(*dicts, mergers, factory=dict):
154 |     """Merge 'dicts', using 'mergers'.
155 | 
156 |     Parameters
157 |     ----------
158 |     *dicts
159 |         The dictionaries / mappings to merge
160 |     mergers
161 |         Mapping from keys in 'dicts' to functions. Each function
162 |         accepts a list of values and returns a single value.
163 |     factory
164 |         Function that returns a new instance of the mapping
165 |         type that we would like returned
166 | 
167 |     Examples
168 |     --------
169 |     >>> merge_with_functions(
170 |     ...     {"a": 1, "b": 10, "c": "hello"},
171 |     ...     {"a": 5, "b": 20, "c": "goodbye"},
172 |     ...     mergers={"a": min, "b": max},
173 |     ... )
174 |     {"a": 1, "b": 20, "c": "goodbye"}
175 |     """
176 |     if len(dicts) == 1 and not isinstance(dicts[0], Mapping):
177 |         dicts = dicts[0]
178 | 
179 |     result = factory()
180 |     for d in dicts:
181 |         for k, v in d.items():
182 |             if k not in result:
183 |                 result[k] = [v]
184 |             else:
185 |                 result[k].append(v)
186 |     return toolz.itemmap(
187 |         lambda kv: (kv[0], mergers.get(kv[0], toolz.last)(kv[1])), result, factory
188 |     )
189 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/_version.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Microsoft Corporation.
  3 | # Licensed under the MIT License.
  4 | 
  5 | 
  6 | from collections import namedtuple
  7 | import os
  8 | import subprocess
  9 | 
 10 | from setuptools.command.build_py import build_py as build_py_orig
 11 | from setuptools.command.sdist import sdist as sdist_orig
 12 | 
 13 | Version = namedtuple("Version", ("release", "dev", "labels"))
 14 | 
 15 | # No public API
 16 | __all__ = []
 17 | 
 18 | package_root = os.path.dirname(os.path.realpath(__file__))
 19 | package_name = os.path.basename(package_root)
 20 | distr_root = os.path.dirname(package_root)
 21 | # If the package is inside a "src" directory the
 22 | # distribution root is 1 level up.
 23 | if os.path.split(distr_root)[1] == "src":
 24 |     _package_root_inside_src = True
 25 |     distr_root = os.path.dirname(distr_root)
 26 | else:
 27 |     _package_root_inside_src = False
 28 | 
 29 | STATIC_VERSION_FILE = "_static_version.py"
 30 | 
 31 | 
 32 | def get_version(version_file=STATIC_VERSION_FILE):
 33 |     version_info = get_static_version_info(version_file)
 34 |     version = version_info["version"]
 35 |     if version == "__use_git__":
 36 |         version = get_version_from_git()
 37 |         if not version:
 38 |             version = get_version_from_git_archive(version_info)
 39 |         if not version:
 40 |             version = Version("unknown", None, None)
 41 |         return pep440_format(version)
 42 |     else:
 43 |         return version
 44 | 
 45 | 
 46 | def get_static_version_info(version_file=STATIC_VERSION_FILE):
 47 |     version_info = {}
 48 |     with open(os.path.join(package_root, version_file), "rb") as f:
 49 |         exec(f.read(), {}, version_info)
 50 |     return version_info
 51 | 
 52 | 
 53 | def version_is_from_git(version_file=STATIC_VERSION_FILE):
 54 |     return get_static_version_info(version_file)["version"] == "__use_git__"
 55 | 
 56 | 
 57 | def pep440_format(version_info):
 58 |     release, dev, labels = version_info
 59 | 
 60 |     version_parts = [release]
 61 |     if dev:
 62 |         if release.endswith("-dev") or release.endswith(".dev"):
 63 |             version_parts.append(dev)
 64 |         else:  # prefer PEP440 over strict adhesion to semver
 65 |             version_parts.append(".dev{}".format(dev))
 66 | 
 67 |     if labels:
 68 |         version_parts.append("+")
 69 |         version_parts.append(".".join(labels))
 70 | 
 71 |     return "".join(version_parts)
 72 | 
 73 | 
 74 | def get_version_from_git():
 75 |     try:
 76 |         p = subprocess.Popen(
 77 |             ["git", "rev-parse", "--show-toplevel"],
 78 |             cwd=distr_root,
 79 |             stdout=subprocess.PIPE,
 80 |             stderr=subprocess.PIPE,
 81 |         )
 82 |     except OSError:
 83 |         return
 84 |     if p.wait() != 0:
 85 |         return
 86 |     if not os.path.samefile(p.communicate()[0].decode().rstrip("\n"), distr_root):
 87 |         # The top-level directory of the current Git repository is not the same
 88 |         # as the root directory of the distribution: do not extract the
 89 |         # version from Git.
 90 |         return
 91 | 
 92 |     # git describe --first-parent does not take into account tags from branches
 93 |     # that were merged-in. The '--long' flag gets us the 'dev' version and
 94 |     # git hash, '--always' returns the git hash even if there are no tags.
 95 |     for opts in [["--first-parent"], []]:
 96 |         try:
 97 |             p = subprocess.Popen(
 98 |                 ["git", "describe", "--long", "--always"] + opts,
 99 |                 cwd=distr_root,
100 |                 stdout=subprocess.PIPE,
101 |                 stderr=subprocess.PIPE,
102 |             )
103 |         except OSError:
104 |             return
105 |         if p.wait() == 0:
106 |             break
107 |     else:
108 |         return
109 | 
110 |     description = (
111 |         p.communicate()[0]
112 |         .decode()
113 |         .strip("v")  # Tags can have a leading 'v', but the version should not
114 |         .rstrip("\n")
115 |         .rsplit("-", 2)  # Split the latest tag, commits since tag, and hash
116 |     )
117 | 
118 |     try:
119 |         release, dev, git = description
120 |     except ValueError:  # No tags, only the git hash
121 |         # prepend 'g' to match with format returned by 'git describe'
122 |         git = "g{}".format(*description)
123 |         release = "unknown"
124 |         dev = None
125 | 
126 |     labels = []
127 |     if dev == "0":
128 |         dev = None
129 |     else:
130 |         labels.append(git)
131 | 
132 |     try:
133 |         p = subprocess.Popen(["git", "diff", "--quiet"], cwd=distr_root)
134 |     except OSError:
135 |         labels.append("confused")  # This should never happen.
136 |     else:
137 |         if p.wait() == 1:
138 |             labels.append("dirty")
139 | 
140 |     return Version(release, dev, labels)
141 | 
142 | 
143 | # TODO: change this logic when there is a git pretty-format
144 | #       that gives the same output as 'git describe'.
145 | #       Currently we can only tell the tag the current commit is
146 | #       pointing to, or its hash (with no version info)
147 | #       if it is not tagged.
148 | def get_version_from_git_archive(version_info):
149 |     try:
150 |         refnames = version_info["refnames"]
151 |         git_hash = version_info["git_hash"]
152 |     except KeyError:
153 |         # These fields are not present if we are running from an sdist.
154 |         # Execution should never reach here, though
155 |         return None
156 | 
157 |     if git_hash.startswith("$Format") or refnames.startswith("$Format"):
158 |         # variables not expanded during 'git archive'
159 |         return None
160 | 
161 |     VTAG = "tag: v"
162 |     refs = set(r.strip() for r in refnames.split(","))
163 |     version_tags = set(r[len(VTAG) :] for r in refs if r.startswith(VTAG))
164 |     if version_tags:
165 |         release, *_ = sorted(version_tags)  # prefer e.g. "2.0" over "2.0rc1"
166 |         return Version(release, dev=None, labels=None)
167 |     else:
168 |         return Version("unknown", dev=None, labels=["g{}".format(git_hash)])
169 | 
170 | 
171 | __version__ = get_version()
172 | 
173 | 
174 | # The following section defines a module global 'cmdclass',
175 | # which can be used from setup.py. The 'package_name' and
176 | # '__version__' module globals are used (but not modified).
177 | 
178 | 
179 | def _write_version(fname):
180 |     # This could be a hard link, so try to delete it first.  Is there any way
181 |     # to do this atomically together with opening?
182 |     try:
183 |         os.remove(fname)
184 |     except OSError:
185 |         pass
186 |     with open(fname, "w") as f:
187 |         f.write(
188 |             "# This file has been created by setup.py.\n"
189 |             "version = '{}'\n".format(__version__)
190 |         )
191 | 
192 | 
193 | class _build_py(build_py_orig):
194 |     def run(self):
195 |         super().run()
196 |         _write_version(os.path.join(self.build_lib, package_name, STATIC_VERSION_FILE))
197 | 
198 | 
199 | class _sdist(sdist_orig):
200 |     def make_release_tree(self, base_dir, files):
201 |         super().make_release_tree(base_dir, files)
202 |         if _package_root_inside_src:
203 |             p = os.path.join("src", package_name)
204 |         else:
205 |             p = package_name
206 |         _write_version(os.path.join(base_dir, p, STATIC_VERSION_FILE))
207 | 
208 | 
209 | cmdclass = dict(sdist=_sdist, build_py=_build_py)
210 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/query.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | from __future__ import annotations
  5 | 
  6 | import datetime
  7 | import itertools
  8 | import multiprocessing
  9 | from pathlib import Path
 10 | 
 11 | import aiida.common
 12 | import aiida.engine
 13 | import aiida.manage.configuration
 14 | import aiida.orm
 15 | 
 16 | from .data import PyRemoteArray, PyRemoteData
 17 | from .workflow import PyWorkChain
 18 | 
 19 | 
 20 | def workflows() -> aiida.orm.QueryBuilder:
 21 |     """Return an Aiida database query that will return all workflows."""
 22 |     q = aiida.orm.QueryBuilder()
 23 |     q.append(cls=PyWorkChain, tag="flow")
 24 |     q.order_by({"flow": [{"ctime": {"order": "desc"}}]})
 25 |     return q
 26 | 
 27 | 
 28 | def running_workflows() -> aiida.orm.QueryBuilder:
 29 |     """Return an Aiida database query that will return all running workflows."""
 30 |     r = workflows()
 31 |     r.add_filter(
 32 |         "flow",
 33 |         {
 34 |             "attributes.process_state": {
 35 |                 "in": [
 36 |                     aiida.engine.ProcessState.RUNNING.value,
 37 |                     aiida.engine.ProcessState.WAITING.value,
 38 |                 ],
 39 |             }
 40 |         },
 41 |     )
 42 |     return r
 43 | 
 44 | 
 45 | def recent_workflows(
 46 |     days: int = 0, hours: int = 0, minutes: int = 0
 47 | ) -> aiida.orm.QueryBuilder:
 48 |     """Return an Aiida database query for all recently started workflows.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     days, hours, minutes
 53 |         Any workflows started more recently than this many days/minutes/hours
 54 |         will be included in the result of the query.
 55 |     """
 56 |     delta = aiida.common.timezone.now() - datetime.timedelta(
 57 |         days=days, hours=hours, minutes=minutes
 58 |     )
 59 |     r = workflows()
 60 |     r.add_filter("flow", {"ctime": {">": delta}})
 61 |     return r
 62 | 
 63 | 
 64 | def remote_files(
 65 |     profile: str | None = None,
 66 |     root: str | Path | None = None,
 67 | ) -> set[Path]:
 68 |     """Return the paths of all RemoteData for the given profile.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     profile
 73 |         The profile name for which to return the UUIDs.
 74 |         If not provided, runs on the currently loaded profile.
 75 |     root
 76 |         If provided, return only sub-paths of this root path.
 77 | 
 78 |     Notes
 79 |     -----
 80 |     As Paths are returned without any information about what computer
 81 |     the path refers to, this function is only useful in environments
 82 |     where the Paths are globally unique.
 83 |     """
 84 |     if profile:
 85 |         aiida.load_profile(profile)
 86 | 
 87 |     # PyRemoteData and PyRemoteArray are not in the 'data.core.remote'
 88 |     # plugin path, so 'query.append' does not include them when querying
 89 |     # for 'aiida.orm.RemoteData', despite the fact that they do subclass it.
 90 |     remote_data = [aiida.orm.RemoteData, PyRemoteArray, PyRemoteData]
 91 | 
 92 |     query = aiida.orm.QueryBuilder()
 93 |     query.append(cls=remote_data, project="attributes.remote_path", tag="files")
 94 |     if root:
 95 |         root = Path(root).absolute()
 96 |         query.add_filter("files", {"attributes.remote_path": {"like": f"{root}%"}})
 97 | 
 98 |     return {Path(p) for p, in query.iterall()}
 99 | 
100 | 
101 | # Needs to be importable to be used with multiprocessing in 'referenced_remote_files'
102 | def _run_on_q(f, q, *args):
103 |     try:
104 |         r = f(*args)
105 |     except Exception as e:
106 |         q.put(("error", e))
107 |     else:
108 |         q.put(("ok", r))
109 | 
110 | 
111 | def referenced_remote_files(root: str | Path | None = None) -> set[Path]:
112 |     """Return the paths of all RemoteData for all profiles.
113 | 
114 |     Parameters
115 |     ----------
116 |     root
117 |         If provided, return only sub-paths of this root path.
118 | 
119 |     Notes
120 |     -----
121 |     As Paths are returned without any information about what computer
122 |     the path refers to, this function is only useful in environments
123 |     where the Paths are globally unique.
124 |     """
125 |     # Loading different AiiDA profiles requires starting a fresh Python interpreter.
126 |     # For this reason we cannot use concurrent.futures, and must use bare
127 |     # multiprocessing.
128 |     # TODO: revisit whether this is necessary when AiiDA 2.0 is released
129 |     ctx = multiprocessing.get_context("spawn")
130 |     q = ctx.Queue()
131 |     profiles = aiida.manage.configuration.get_config().profile_names
132 |     procs = [
133 |         ctx.Process(target=_run_on_q, args=(remote_files, q, p, root)) for p in profiles
134 |     ]
135 |     for proc in procs:
136 |         proc.start()
137 |     for proc in procs:
138 |         proc.join()
139 | 
140 |     results = [q.get() for _ in range(q.qsize())]
141 |     if errors := [e for status, e in results if status != "ok"]:
142 |         raise ValueError(f"One or more processes errored: {errors}")
143 | 
144 |     return set(itertools.chain.from_iterable(r for _, r in results))
145 | 
146 | 
147 | def referenced_work_directories(root: str | Path) -> set[Path]:
148 |     """Return all calcjob working directories referenced in the AiiDA database.
149 | 
150 |     Notes
151 |     -----
152 |     As Paths are returned without any information about what computer
153 |     the path refers to, this function is only useful in environments
154 |     where the Paths are globally unique.
155 |     """
156 |     root = Path(root).absolute()
157 |     # aiiDA shards working directory paths like '/path/to/.aiida_run/ab/cd/1234-...'
158 |     # so we add 3 subdirectories onto the root to get to the working directories.
159 |     n = len(root.parts) + 3
160 |     return {Path(*p.parts[:n]) for p in referenced_remote_files(root)}
161 | 
162 | 
163 | def existing_work_directories(root: str | Path) -> set[Path]:
164 |     """Return all calcjob working directories under 'root' that exist on disk.
165 | 
166 |     Notes
167 |     -----
168 |     As Paths are returned without any information about what computer
169 |     the path refers to, this function is only useful in environments
170 |     where the Paths are globally unique.
171 | 
172 |     Examples
173 |     --------
174 |     >>> work_directories("/path/to/my-user/.aiida_run")
175 |     {PosixPath('/path/to/my-user/.aiida_run/00/24/ab.c2-899c-4106-8c8e-74638dbdd71c')}
176 |     """
177 |     root = Path(root).absolute()
178 |     # aiiDA shards working directory paths like '/path/to/.aiida_run/ab/cd/1234-...'
179 |     # so we add glob 3 subdirectories onto the root to get to the working directories.
180 |     return {Path(p) for p in root.glob("*/*/*")}
181 | 
182 | 
183 | def unreferenced_work_directories(root: str | Path) -> set[Path]:
184 |     """Return all unreferenced calcjob working directories under 'root'.
185 | 
186 |     i.e. return all calcjob working directories that exist on disk, but are
187 |     not referenced in the AiiDA database.
188 | 
189 |     Notes
190 |     -----
191 |     As Paths are returned without any information about what computer
192 |     the path refers to, this function is only useful in environments
193 |     where the Paths are globally unique.
194 | 
195 |     Examples
196 |     --------
197 |     >>> unreferenced_work_directories("/path/to/my-user/.aiida_run")
198 |     {PosixPath('/path/to/my-user/.aiida_run/00/24/abc2-899c-4106-8c8e-74638dbdd71c')}
199 |     """
200 |     root = Path(root).absolute()
201 | 
202 |     return existing_work_directories(root) - referenced_work_directories(root)
203 | 
204 | 
205 | def computer_work_directory(computer: str | aiida.orm.Computer) -> Path:
206 |     """Return the work directory for 'computer'.
207 | 
208 |     Like 'computer.get_workdir()', except that '{username}' template
209 |     parameters are replaced with actual usernames.
210 | 
211 |     Parameters
212 |     ----------
213 |     computer
214 |         A Computer instance, or a computer label.
215 |     """
216 |     if not isinstance(computer, aiida.orm.Computer):
217 |         computer = aiida.orm.load_computer(computer)
218 | 
219 |     with computer.get_transport() as t:
220 |         return Path(computer.get_workdir().format(username=t.whoami()))
221 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/common/mapspec.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | from dataclasses import dataclass
  8 | import functools
  9 | import re
 10 | from typing import Dict, List, Optional, Tuple, Union
 11 | 
 12 | from .array import _make_strides
 13 | 
 14 | 
 15 | @dataclass(frozen=True)
 16 | class ArraySpec:
 17 |     """Specification for a named array, with some axes indexed by named indices."""
 18 | 
 19 |     name: str
 20 |     axes: Tuple[Optional[str]]
 21 | 
 22 |     def __post_init__(self):
 23 |         if not self.name.isidentifier():
 24 |             raise ValueError(
 25 |                 f"Array name '{self.name}' is not a valid Python identifier"
 26 |             )
 27 |         for i in self.axes:
 28 |             if not (i is None or i.isidentifier()):
 29 |                 raise ValueError(f"Index name '{i}' is not a valid Python identifier")
 30 | 
 31 |     def __str__(self) -> str:
 32 |         indices = [":" if x is None else x for x in self.axes]
 33 |         return f"{self.name}[{', '.join(indices)}]"
 34 | 
 35 |     @property
 36 |     def indices(self) -> Tuple[str]:
 37 |         """Return the names of the indices for this array spec."""
 38 |         return tuple(x for x in self.axes if x is not None)
 39 | 
 40 |     @property
 41 |     def rank(self) -> int:
 42 |         """Return the rank of this array spec."""
 43 |         return len(self.axes)
 44 | 
 45 |     def validate(self, shape: Tuple[int, ...]):
 46 |         """Raise an exception if 'shape' is not compatible with this array spec."""
 47 |         if len(shape) != self.rank:
 48 |             raise ValueError(
 49 |                 f"Expecting array of rank {self.rank}, but got array of shape {shape}"
 50 |             )
 51 | 
 52 | 
 53 | @dataclass(frozen=True)
 54 | class MapSpec:
 55 |     """Specification for how to map input axes to output axes.
 56 | 
 57 |     Examples
 58 |     --------
 59 |     >>> mapped = MapSpec.from_string("a[i, j], b[i, j], c[k] -> q[i, j, k]")
 60 |     >>> partial_reduction = MapSpec.from_string("a[i, :], b[:, k] -> q[i, k]")
 61 |     """
 62 | 
 63 |     inputs: Tuple[ArraySpec]
 64 |     output: ArraySpec
 65 | 
 66 |     def __post_init__(self):
 67 |         if any(x is None for x in self.output.axes):
 68 |             raise ValueError("Output array must have all axes indexed (no ':').")
 69 | 
 70 |         output_indices = set(self.output.indices)
 71 |         input_indices = functools.reduce(
 72 |             set.union, (x.indices for x in self.inputs), set()
 73 |         )
 74 | 
 75 |         if extra_indices := output_indices - input_indices:
 76 |             raise ValueError(
 77 |                 "Output array has indices that do not appear "
 78 |                 f"in the input: {extra_indices}"
 79 |             )
 80 |         if unused_indices := input_indices - output_indices:
 81 |             raise ValueError(
 82 |                 "Input array have indices that do not appear "
 83 |                 f"in the output: {unused_indices}"
 84 |             )
 85 | 
 86 |     @property
 87 |     def parameters(self) -> Tuple[str, ...]:
 88 |         """Return the parameter names of this mapspec."""
 89 |         return tuple(x.name for x in self.inputs)
 90 | 
 91 |     @property
 92 |     def indices(self) -> Tuple[str, ...]:
 93 |         """Return the index names for this MapSpec."""
 94 |         return self.output.indices
 95 | 
 96 |     def shape(self, shapes: Dict[str, Tuple[int, ...]]) -> Tuple[int, ...]:
 97 |         """Return the shape of the output of this MapSpec.
 98 | 
 99 |         Parameters
100 |         ----------
101 |         shapes
102 |             Shapes of the inputs, keyed by name.
103 |         """
104 |         input_names = set(x.name for x in self.inputs)
105 | 
106 |         if extra_names := set(shapes.keys()) - input_names:
107 |             raise ValueError(
108 |                 f"Got extra array {extra_names} that are not accepted by this map."
109 |             )
110 |         if missing_names := input_names - set(shapes.keys()):
111 |             raise ValueError(
112 |                 f"Inputs expected by this map were not provided: {missing_names}"
113 |             )
114 | 
115 |         # Each individual array is of the appropriate rank
116 |         for x in self.inputs:
117 |             x.validate(shapes[x.name])
118 | 
119 |         # Shapes match between array sharing a named index
120 | 
121 |         def get_dim(array, index):
122 |             axis = array.axes.index(index)
123 |             return shapes[array.name][axis]
124 | 
125 |         shape = []
126 |         for index in self.output.indices:
127 |             relevant_arrays = [x for x in self.inputs if index in x.indices]
128 |             dim, *rest = [get_dim(x, index) for x in relevant_arrays]
129 |             if any(dim != x for x in rest):
130 |                 raise ValueError(
131 |                     f"Dimension mismatch for arrays {relevant_arrays} "
132 |                     f"along {index} axis."
133 |                 )
134 |             shape.append(dim)
135 | 
136 |         return tuple(shape)
137 | 
138 |     def output_key(self, shape: Tuple[int, ...], linear_index: int) -> Tuple[int, ...]:
139 |         """Return a key used for indexing the output of this map.
140 | 
141 |         Parameters
142 |         ----------
143 |         shape
144 |             The shape of the map output.
145 |         linear_index
146 |             The index of the element for which to return the key.
147 | 
148 |         Examples
149 |         --------
150 |         >>> spec = MapSpec.from_string("x[i, j], y[j, :, k] -> z[i, j, k]")
151 |         >>> spec.output_key((5, 2, 3), 23)
152 |         (3, 1, 2)
153 |         """
154 |         if len(shape) != len(self.indices):
155 |             raise ValueError(
156 |                 f"Expected a shape of length {len(self.indices)}, got {shape}"
157 |             )
158 |         return tuple(
159 |             (linear_index // stride) % dim
160 |             for stride, dim in zip(_make_strides(shape), shape)
161 |         )
162 | 
163 |     def input_keys(
164 |         self,
165 |         shape: Tuple[int, ...],
166 |         linear_index: int,
167 |     ) -> Dict[str, Tuple[Union[slice, int]]]:
168 |         """Return keys for indexing inputs of this map.
169 | 
170 |         Parameters
171 |         ----------
172 |         shape
173 |             The shape of the map output.
174 |         linear_index
175 |             The index of the element for which to return the keys.
176 | 
177 |         Examples
178 |         --------
179 |         >>> spec = MapSpec("x[i, j], y[j, :, k] -> z[i, j, k]")
180 |         >>> spec.input_keys((5, 2, 3), 23)
181 |         {'x': (3, 1), 'y': (1, slice(None, None, None), 2)}
182 |         """
183 |         output_key = self.output_key(shape, linear_index)
184 |         if len(output_key) != len(self.indices):
185 |             raise ValueError(
186 |                 f"Expected a key of shape {len(self.indices)}, got {output_key}"
187 |             )
188 |         ids = dict(zip(self.indices, output_key))
189 |         return {
190 |             x.name: tuple(slice(None) if ax is None else ids[ax] for ax in x.axes)
191 |             for x in self.inputs
192 |         }
193 | 
194 |     def __str__(self) -> str:
195 |         return f"{', '.join(map(str, self.inputs))} -> {self.output}"
196 | 
197 |     @classmethod
198 |     def from_string(cls, expr):
199 |         """Construct an MapSpec from a string."""
200 |         try:
201 |             in_, out_ = expr.split("->")
202 |         except ValueError:
203 |             raise ValueError(f"Expected expression of form 'a -> b', but got '{expr}''")
204 | 
205 |         inputs = _parse_indexed_arrays(in_)
206 |         outputs = _parse_indexed_arrays(out_)
207 |         if len(outputs) != 1:
208 |             raise ValueError(f"Expected a single output, but got {len(outputs)}")
209 |         (output,) = outputs
210 | 
211 |         return cls(inputs, output)
212 | 
213 |     def to_string(self) -> str:
214 |         """Return a faithful representation of a MapSpec as a string."""
215 |         return str(self)
216 | 
217 | 
218 | def _parse_index_string(index_string) -> List[Optional[str]]:
219 |     indices = [idx.strip() for idx in index_string.split(",")]
220 |     return [i if i != ":" else None for i in indices]
221 | 
222 | 
223 | def _parse_indexed_arrays(expr) -> List[ArraySpec]:
224 |     array_pattern = r"(\w+?)\[(.+?)\]"
225 |     return [
226 |         ArraySpec(name, _parse_index_string(indices))
227 |         for name, indices in re.findall(array_pattern, expr)
228 |     ]
229 | 


--------------------------------------------------------------------------------
/examples/04-deleting-data.md:
--------------------------------------------------------------------------------
  1 | # Deleting data
  2 | 
  3 | 
  4 | This notebook provides guidance on how to delete data that you no longer need.
  5 | 
  6 | 
  7 | As usual we first import AiiDA and aiida_dynamic_workflows:
  8 | 
  9 | ```python
 10 | import aiida
 11 | aiida.load_profile()
 12 | 
 13 | aiida.__version__
 14 | ```
 15 | 
 16 | ```python
 17 | import aiida_dynamic_workflows
 18 | import aiida_dynamic_workflows.workflow
 19 | import aiida_dynamic_workflows.report
 20 | 
 21 | aiida_dynamic_workflows.control.ensure_daemon_restarted()
 22 | aiida_dynamic_workflows.__version__
 23 | ```
 24 | 
 25 | Next we define a utility function for watching processes as they evolve:
 26 | 
 27 | ```python
 28 | import datetime
 29 | import time
 30 | 
 31 | import ipywidgets as widgets
 32 | 
 33 | def wait(p, timeout=2):
 34 |     out = widgets.Output()
 35 |     while not p.is_terminated:
 36 |         out.clear_output(wait=True)
 37 |         print(f"last updated @ {datetime.datetime.now()}")
 38 |         print(aiida_dynamic_workflows.report.progress(p))
 39 |         time.sleep(timeout)
 40 |     out.clear_output(wait=True)
 41 |     print(f"Finished @ {p.mtime}")
 42 |     print(aiida_dynamic_workflows.report.progress(p))
 43 | ```
 44 | 
 45 | Now we create a small workflow, for illustrative purposes:
 46 | 
 47 | ```python
 48 | @aiida_dynamic_workflows.step(returns="c")
 49 | def add(a, b):
 50 |     return a + b
 51 | 
 52 | @aiida_dynamic_workflows.step(returns="z")
 53 | def mul(c, y):
 54 |     return c * y
 55 | 
 56 | 
 57 | workflow = (
 58 |     aiida_dynamic_workflows.workflow
 59 |     .new_workflow("test")
 60 |     .then(add)
 61 |     .then(mul)
 62 |     .returning("c", "z")
 63 | )
 64 | 
 65 | local = aiida_dynamic_workflows.engine.execution_environment("py39", "localhost")
 66 | ```
 67 | 
 68 | ```python
 69 | from functools import partial
 70 | import random
 71 | 
 72 | rand = partial(random.randint, 0, 1000)
 73 | 
 74 | flow = aiida_dynamic_workflows.workflow.build(workflow.on(local), a=rand(), b=rand(), y=rand())
 75 | ```
 76 | 
 77 | And we run it:
 78 | 
 79 | ```python
 80 | run = aiida.engine.submit(flow)
 81 | wait(run)
 82 | ```
 83 | 
 84 | ## Deleting nodes from the AiiDA database
 85 | 
 86 | 
 87 | Let's say that that you wish to delete the two runs from the database.
 88 | 
 89 | AiiDA provides the following functionality of deleting the nodes from the database:
 90 | 
 91 | ```python
 92 | marked_pks, are_deleted = aiida.tools.delete_nodes([run.id])
 93 | ```
 94 | 
 95 | This function returns two things:
 96 | 1. The first is a set containing the IDs of the nodes that were deleted (or not)
 97 | 2. The second is a boolean value that is True if the nodes were actually deleted
 98 | 
 99 | 
100 | The first thing to notice is that `marked_pks` contains many more nodes than the ones we explicitly marked for deletion:
101 | 
102 | ```python
103 | len(marked_pks)
104 | ```
105 | 
106 | This is because AiiDA tries to maintain the integrity of the provenance graph.
107 | 
108 | If we delete the Workflow nodes then the calculation nodes that were created by the workflow, as well as all the produced data nodes, must also be deleted.
109 | 
110 | 
111 | We see that the above invocation did not actually delete anything:
112 | 
113 | ```python
114 | are_deleted
115 | ```
116 | 
117 | This is a safety feature; to have `delete_nodes` actually delete, we must pass `dry_run=False`:
118 | 
119 | ```python
120 | marked_pks, are_deleted = aiida.tools.delete_nodes([run.id], dry_run=False)
121 | ```
122 | 
123 | ```python
124 | are_deleted
125 | ```
126 | 
127 | ## Deleting the remote data
128 | 
129 | 
130 | Deleting the nodes from the AiiDA database is a good first step, however a typical workflow has all the intermediate data stored as `PyRemoteData` and `PyRemoteArray`. This means that the actual data is stored in a file on some remote filesystem (cluster NFS); only a _reference_ to the file is stored in the AiiDA database.
131 | 
132 | Once we have deleted the nodes from the database we also need to ensure we remove the data from the remote filsystem, to avoid filling up our disk with unwanted data.
133 | 
134 | Pyiida provides the following tools for achieving this.
135 | 
136 | 
137 | #### `aiida_dynamic_workflows.query.unreferenced_work_directories`
138 | 
139 | 
140 | This function returns any CalcJob working directories that are unreference by any RemoteData in _any profile_ in the AiiDA database.
141 | 
142 | It expects a path that will be used as a root directory for the search (i.e. only paths under this root will be returned).
143 | 
144 | To help with this there is `computer_work_directory`, which returns the CalcJob working directory root for the named computer:
145 | 
146 | ```python
147 | from aiida_dynamic_workflows.query import unreferenced_work_directories, computer_work_directory
148 | ```
149 | 
150 | ```python
151 | unreferenced_paths = unreferenced_work_directories(computer_work_directory("localhost"))
152 | ```
153 | 
154 | We see that there are a few paths that are unreferenced by the AiiDA database:
155 | 
156 | ```python
157 | unreferenced_paths
158 | ```
159 | 
160 | As these paths are not referenced by any RemoteData in the AiiDA database, they may safely be removed without invalidating the AiiDA provenance graph.
161 | 
162 | As these are just plain old paths, they may be removed by any method you wish (e.g. export to a file `to-remove.txt` and run `cat to-remove.txt | parallel rm -r {}`)
163 | However, aiida_dynamic_workflows has a useful tool for just this:
164 | 
165 | ```python
166 | aiida_dynamic_workflows.utils.parallel_rmtree(unreferenced_paths)
167 | ```
168 | 
169 | After having removed these paths, we should see that there are no more unreferenced work directories:
170 | 
171 | ```python
172 | unreferenced_work_directories(computer_work_directory("localhost"))
173 | ```
174 | 
175 | ## Preserving cached data
176 | 
177 | 
178 | Let's run the calculation twice, again:
179 | 
180 | ```python
181 | original_run = aiida.engine.submit(flow)
182 | wait(original_run)
183 | ```
184 | 
185 | ```python
186 | cached_run = aiida.engine.submit(flow)
187 | wait(cached_run)
188 | ```
189 | 
190 | We see that the calculations in the second run are created from the calculations in the first run:
191 | 
192 | ```python
193 | for c in original_run.called:
194 |     print(c.inputs.func.name, c.uuid)
195 | ```
196 | 
197 | Indeed, we see that the data nodes for the two runs point to the same location on the remote storage:
198 | 
199 | ```python
200 | original_data_paths = {k: v.get_remote_path() for k, v in original_run.outputs.return_values.items()}
201 | print(original_data_paths)
202 | ```
203 | 
204 | ```python
205 | cached_data_paths = {k: v.get_remote_path() for k, v in cached_run.outputs.return_values.items()}
206 | print(cached_data_paths)
207 | ```
208 | 
209 | ```python
210 | assert original_data_paths == cached_data_paths
211 | ```
212 | 
213 | If we delete the original run only we therefore need to keep the remote data around, as it is still referenced by the cached run.
214 | 
215 | **Let's verify that this is what happens.**
216 | 
217 | 
218 | First let's check that there is not any unreferenced data already:
219 | 
220 | ```python
221 | assert not unreferenced_work_directories(computer_work_directory("localhost"))
222 | ```
223 | 
224 | and let's check that removing the original run is not going to remove any nodes associated with the cached run:
225 | 
226 | ```python
227 | marked_pks, are_deleted = aiida.tools.delete_nodes([original_run.id])
228 | for n in marked_pks:
229 |     print(repr(aiida.orm.load_node(n)))
230 | ```
231 | 
232 | ```python
233 | aiida_dynamic_workflows.report.graph(cached_run, as_png=True)
234 | ```
235 | 
236 | We indeed see that there is no overlap; only the nodes from `original_run` are going to be deleted.
237 | 
238 | 
239 | Let's actually delete them:
240 | 
241 | ```python
242 | marked_pks, are_deleted = aiida.tools.delete_nodes([original_run.id], dry_run=False)
243 | assert are_deleted
244 | ```
245 | 
246 | Let's now check that, indeed, the data is still referenced:
247 | 
248 | ```python
249 | assert not unreferenced_work_directories(computer_work_directory("localhost"))
250 | ```
251 | 
252 | Success!
253 | 
254 | 
255 | If we now delete the cached run:
256 | 
257 | ```python
258 | _, are_deleted = aiida.tools.delete_nodes([cached_run.id], dry_run=False)
259 | assert are_deleted
260 | ```
261 | 
262 | We should see that the data is now unreferenced:
263 | 
264 | ```python
265 | unrefd = unreferenced_work_directories(computer_work_directory("localhost"))
266 | print(unrefd)
267 | assert unrefd
268 | assert {str(x) for x in unrefd} == set(cached_data_paths.values())
269 | ```
270 | 
271 | And so we can safely delete them:
272 | 
273 | ```python
274 | aiida_dynamic_workflows.utils.parallel_rmtree(unrefd)
275 | ```
276 | 


--------------------------------------------------------------------------------
/examples/01-calculations.md:
--------------------------------------------------------------------------------
  1 | # Running individual calculations with aiida-dynamic-workflows
  2 | 
  3 | 
  4 | This notebook shows how to define and run individual calculations with aiida-dynamic-workflows, and how to _manually chain the results_ from one calculation into the next one. Chaining individual calculations together in a _workflow_ will be shown in the next notebook.
  5 | 
  6 | 
  7 | ### This example assumes you already have Aiida set up, as well as the relevant codes/computers
  8 | 
  9 | 
 10 | If that's not your case, check out the `example_computer_setup` directory.
 11 | 
 12 | 
 13 | ### The imports
 14 | 
 15 | 
 16 | First things first we must import `aiida` and call `aiida.load_profile`.
 17 | 
 18 | This loads the default Aiida profile. Each Aiida profile has a separate database for storing calculations and data,
 19 | as well as separate daemons for submitting calculations.
 20 | 
 21 | ```python
 22 | import aiida
 23 | 
 24 | aiida.load_profile()
 25 | aiida.__version__
 26 | ```
 27 | 
 28 | Next we must import the plugin.
 29 | 
 30 | Additionally we call `ensure_daemon_restarted()` to ensure that the Aiida daemon has loaded the latest version of the plugin.
 31 | Failing to restart the daemon when aiida-dynamic-workflows is updated can give strange results, as the environment in the notebook and the environment on the daemon will differ. After a restart the daemon will continue processing any running calculations (so nothing will be lost).
 32 | 
 33 | ```python
 34 | import aiida_dynamic_workflows as flows
 35 | 
 36 | flows.control.ensure_daemon_restarted()
 37 | flows.__version__
 38 | ```
 39 | 
 40 | # First define the execution environment
 41 | 
 42 | 
 43 | We create an execution environment that uses the Conda environment `py39` on `my-cluster`, and will submit calculations to the `some-queue` queue.
 44 | 
 45 | ```python
 46 | cluster_env = flows.engine.execution_environment(
 47 |     "py39",   # conda environment
 48 |     "my-cluster",  # computer name
 49 |     queue=("some-queue", 24),  # queue and num. cores per machine
 50 | )
 51 | ```
 52 | 
 53 | We can also create an execution environment that uses the Conda environment on _this_ machine:
 54 | 
 55 | ```python
 56 | local_env = flows.engine.execution_environment("py39", "localhost")
 57 | ```
 58 | 
 59 | Let's use the cluster execution environment going forward.
 60 | 
 61 | ```python
 62 | env = cluster_env
 63 | ```
 64 | 
 65 | ## Then define some functions to run
 66 | 
 67 | ```python
 68 | @flows.step(returns="x_plus_y")
 69 | def add(x: int, y: int):
 70 |     return x + y
 71 | ```
 72 | 
 73 | ```python
 74 | @flows.step(returns="z")
 75 | def multiply(x: int, y: int) -> int:
 76 |     return x * y
 77 | ```
 78 | 
 79 | ### Can be used as ordinary Python functions
 80 | 
 81 | ```python
 82 | add(1, 2)
 83 | ```
 84 | 
 85 | ```python
 86 | multiply(1, 2)
 87 | ```
 88 | 
 89 | ```python
 90 | multiply(add(3, 4), 5)
 91 | ```
 92 | 
 93 | ### But they are really objects in the Aiida data store
 94 | 
 95 | ```python
 96 | add
 97 | ```
 98 | 
 99 | ## We can submit them using Aiida
100 | 
101 | 
102 | We first build the calculation:
103 | 
104 | ```python
105 | z = flows.engine.apply(add, x=1, y=2)
106 | z
107 | ```
108 | 
109 | We see that `engine.apply` produces a kind of specification for what to run.
110 | 
111 | This is not yet enough to be able to run the thing: we need to specify _where_ to run it.
112 | 
113 | We do this with the `on` method, which expects an execution environment:
114 | 
115 | ```python
116 | z.on(env)
117 | ```
118 | 
119 | Note that the specification returned from the `on` method now contains a `queue_name`, and a `code` (which includes a reference to the cluster to run on).
120 | 
121 | 
122 | Finally we will actually run this specification.
123 | 
124 | Even though the notebook is blocked, execution of `add` is actually happening _on the cluster_.
125 | 
126 | ```python
127 | r = aiida.engine.run(z.on(env))
128 | ```
129 | 
130 | If the execution of the cell above is hanging for too long, you may want to drop to the command line and inspect the running processes, e.g. using via `verdi process list`.
131 | The (verbose) daemon logs should be showing "copying file/folder" + Slurm-related stuff.
132 | 
133 | If you're having trouble with remote execution, feel free to continue through the rest of the tutorial on your local computer by setting `env=local_env`.
134 | 
135 | ```python
136 | %%time
137 | r["return_values"]["x_plus_y"].fetch_value()
138 | ```
139 | 
140 | This is good for debugging, but typically you don't want to block the notebook.
141 | 
142 | Instead of `run` you can use `submit` to get a daemon worker to do the waiting for you:
143 | 
144 | ```python
145 | r_submitted = aiida.engine.submit(z.on(env))
146 | ```
147 | 
148 | ```python
149 | r_submitted
150 | ```
151 | 
152 | ```python
153 | print(flows.report.progress(r_submitted))
154 | flows.report.graph(r_submitted)
155 | ```
156 | 
157 | Only a _reference to a file on the cluster_ is returned:
158 | 
159 | ```python
160 | remote_value = r_submitted.outputs.return_values.x_plus_y
161 | remote_value
162 | ```
163 | 
164 | ```python
165 | remote_value.pickle_path
166 | ```
167 | 
168 | ```python
169 | %%time
170 | remote_value.fetch_value()
171 | ```
172 | 
173 | ## We can pass the output `PyRemoteData` as an _input_ to the next calculation
174 | 
175 | ```python
176 | r_pass_as_remote_value = aiida.engine.run(
177 |     flows.engine
178 |     .apply(multiply, x=remote_value, y=2)
179 |     .on(env)
180 | )
181 | ```
182 | 
183 | ```python
184 | actual_return_value = r_pass_as_remote_value["return_values"]["z"]
185 | ```
186 | 
187 | ```python
188 | %%time
189 | actual_return_value.fetch_value()
190 | ```
191 | 
192 | ## We can also do maps, which will make use of Slurm Job arrays
193 | 
194 | ```python
195 | import numpy as np
196 | 
197 | xs = np.arange(100).reshape(10, 10)
198 | ys = np.arange(100, 200).reshape(10, 5, 2)
199 | ```
200 | 
201 | ```python
202 | z = (
203 |     flows.engine
204 |     .map_(add, "x[i, j], y[j, k, l] -> z[i, j, k, l]")
205 |     .on(env, max_concurrent_machines=2)
206 | )
207 | ```
208 | 
209 | ```python
210 | %%time
211 | r_map = aiida.engine.submit(z.finalize(x=xs, y=ys))
212 | ```
213 | 
214 | ```python
215 | print(flows.report.progress(r_map))
216 | flows.report.graph(r_map)
217 | ```
218 | 
219 | ```python
220 | remote_mapped_values = r_map.outputs.return_values.x_plus_y
221 | remote_mapped_values.shape
222 | ```
223 | 
224 | Each element in the `map` is in its own Slurm job (in a single job array), _and they all write to separate files_.
225 | 
226 | ```python
227 | %%time
228 | a = remote_mapped_values.fetch_value()
229 | a
230 | ```
231 | 
232 | `.fetch_value()` uses the default Aiida transport, and so is quite inefficient for loading many files (as in this example)
233 | 
234 | Passing `local_files=True` is useful when the Aiida working directory on `my-cluster` is actually mounted locally on the machine where this notebook is running.
235 | 
236 | ```python
237 | remote_mapped_values.get_remote_path()
238 | ```
239 | 
240 | ```python
241 | %%time
242 | a = remote_mapped_values.fetch_value(local_files=True)
243 | a
244 | ```
245 | 
246 | The loading operation is, consequently, several times faster.
247 | 
248 | 
249 | ## We can use the output of _that_ map as the input to another, of course!
250 | 
251 | ```python
252 | chained_map = (
253 |     flows.engine
254 |     .map_(
255 |         multiply,
256 |         "x[i, j, k, l] -> z[i, j, k, l]",  # Now we only map over 'x'; 'y' is treated single value
257 |         max_concurrent_machines=1,
258 |     ).on(env)
259 |     .finalize(x=remote_mapped_values, y=5)
260 | )
261 | ```
262 | 
263 | ```python
264 | chained_map_job = aiida.engine.submit(chained_map)
265 | ```
266 | 
267 | ```python
268 | print(flows.report.progress(chained_map_job))
269 | flows.report.graph(chained_map_job)
270 | ```
271 | 
272 | ```python
273 | rv = chained_map_job.outputs.return_values.z
274 | print(rv.shape)
275 | %time rv.fetch_value(local_files=True)
276 | ```
277 | 
278 | ## And then perform a reduction
279 | 
280 | ```python
281 | @flows.step
282 | def reduce(xs: "FileBasedObjectArray"):
283 |     return np.sum(xs.to_array())
284 | ```
285 | 
286 | ```python
287 | r = aiida.engine.submit(flows.engine.apply(reduce, xs=rv).on(env))
288 | ```
289 | 
290 | ```python
291 | print(flows.report.progress(r))
292 | flows.report.graph(r)
293 | ```
294 | 
295 | ```python
296 | r.outputs.return_values._return_value.fetch_value()
297 | ```
298 | 
299 | # Defining the resource requirements for functions
300 | 
301 | 
302 | You can specify that functions need a certain amount of resources to run by passing a `resources` dictionary to `step`.
303 | 
304 | 
305 | Currently only `memory` and `cores` may be specified; these are passed to Slurm using the `--mem` and `--cpus-per-task` flags.
306 | 
307 | For example, the following function declares that it requires 6 cores to run, and a total of `25GB` of memory (for the whole thing, not per core).
308 | 
309 | 
310 | A single instance of the function will run on this allocation, so we may use whatever method we wish to distribute work over the cores. In this example we are using `loky` to perform a simple map-reduce, but you could also use, e.g. an OpenMP-enabled BLAS to distribute a matrix computation over the cores.
311 | 
312 | ```python
313 | import loky
314 | import time
315 | 
316 | @flows.step(returns=("z", "elapsed_time"), resources=dict(memory="25GB", cores=6))
317 | def f_on_several_cores(xs: list) -> list:
318 | 
319 |     def go(x):
320 |         time.sleep(5)
321 |         return x ** 2
322 | 
323 |     with loky.ProcessPoolExecutor(6) as ex:
324 |         start = time.time()
325 |         r = sum(ex.map(go, xs))
326 |         return r, f"execution time: {time.time() - start:.2f}s"
327 | ```
328 | 
329 | ```python
330 | r = aiida.engine.submit(
331 |     flows.engine.apply(
332 |         f_on_several_cores, xs=list(range(18))
333 |     ).on(cluster_env)
334 | )
335 | ```
336 | 
337 | ```python
338 | print(flows.report.progress(r))
339 | flows.report.graph(r)
340 | ```
341 | 
342 | ```python
343 | r.outputs.return_values.z.fetch_value()
344 | ```
345 | 
346 | ```python
347 | r.outputs.return_values.elapsed_time.fetch_value()
348 | ```
349 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/report.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from collections import Counter
  6 | import textwrap
  7 | from typing import Union
  8 | 
  9 | from IPython.display import Image
 10 | import aiida.cmdline.utils.common as cmd
 11 | from aiida.cmdline.utils.query.formatting import format_relative_time
 12 | import aiida.orm
 13 | from aiida.tools.visualization import Graph
 14 | import graphviz
 15 | 
 16 | from . import query
 17 | from .calculations import PyCalcJob, PyMapJob, num_mapjob_tasks
 18 | from .data import PyRemoteArray, PyRemoteData
 19 | from .utils import render_png
 20 | from .workchains import RestartedPyCalcJob, RestartedPyMapJob
 21 | from .workflow import PyWorkChain
 22 | 
 23 | __all__ = [
 24 |     "log",
 25 |     "graph",
 26 |     "progress",
 27 |     "running_workflows",
 28 |     "recent_workflows",
 29 | ]
 30 | 
 31 | 
 32 | ProcessType = Union[aiida.orm.ProcessNode, int, str]
 33 | 
 34 | 
 35 | def log(proc: ProcessType) -> str:
 36 |     """Return the output of 'verdi process report' for the given process.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     proc
 41 |         The Aiida node for the process, or a numeric ID, or a UUID.
 42 |     """
 43 |     proc = _ensure_process_node(proc)
 44 |     if isinstance(proc, aiida.orm.CalcJobNode):
 45 |         return cmd.get_calcjob_report(proc)
 46 |     elif isinstance(proc, aiida.orm.WorkChainNode):
 47 |         return cmd.get_workchain_report(proc, levelname="REPORT")
 48 |     elif isinstance(proc, (aiida.orm.CalcFunctionNode, aiida.orm.WorkFunctionNode)):
 49 |         return cmd.get_process_function_report(proc)
 50 |     else:
 51 |         raise TypeError(f"Cannot get report for processes of type '{type(proc)}'")
 52 | 
 53 | 
 54 | def graph(
 55 |     proc: ProcessType, size=(20, 20), as_png=False
 56 | ) -> Union[graphviz.Digraph, Image]:
 57 |     """Return a graph visualization of a calculation or workflow.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     proc
 62 |         The Aiida node for the process, or a numeric ID, or a UUID.
 63 |     """
 64 |     proc = _ensure_process_node(proc)
 65 |     graph = Graph(
 66 |         graph_attr={"size": ",".join(map(str, size)), "rankdir": "LR"},
 67 |         node_sublabel_fn=_node_sublabel,
 68 |     )
 69 |     graph.recurse_descendants(proc, include_process_inputs=True)
 70 |     if as_png:
 71 |         return render_png(graph.graphviz)
 72 |     return graph.graphviz
 73 | 
 74 | 
 75 | def progress(proc: ProcessType) -> str:
 76 |     """Return a progress report of the given calculation or workflow.
 77 | 
 78 |     Parameters
 79 |     ----------
 80 |     proc
 81 |         The Aiida node for the process, or a numeric ID, or a UUID.
 82 |     """
 83 |     proc = _ensure_process_node(proc)
 84 |     if isinstance(proc, aiida.orm.CalcJobNode):
 85 |         return _calcjob_progress(proc)
 86 |     elif isinstance(proc, aiida.orm.WorkChainNode):
 87 |         if issubclass(proc.process_class, PyWorkChain):
 88 |             return _workflow_progress(proc)
 89 |         elif issubclass(proc.process_class, (RestartedPyCalcJob, RestartedPyMapJob)):
 90 |             return _restarted_calcjob_progress(proc)
 91 |     elif isinstance(proc, (aiida.orm.CalcFunctionNode, aiida.orm.WorkFunctionNode)):
 92 |         return _function_progress(proc)
 93 |     else:
 94 |         raise TypeError(
 95 |             "Cannot get a progress report for processes of type '{type(proc)}'"
 96 |         )
 97 | 
 98 | 
 99 | def running_workflows() -> str:
100 |     """Return a progress report of the running workflows."""
101 |     r = _flatten(query.running_workflows().iterall())
102 |     return "\n\n".join(map(_workflow_progress, r))
103 | 
104 | 
105 | def recent_workflows(days: int = 0, hours: int = 0, minutes: int = 0) -> str:
106 |     """Return a progress report of all workflows that were started recently.
107 | 
108 |     This also includes workflows that are already complete.
109 | 
110 |     Parameters
111 |     ----------
112 |     days, hours, minutes
113 |         Any workflows started more recently than this many days/minutes/hours
114 |         will be included in the result of the query.
115 |     """
116 |     r = _flatten(query.recent_workflows(**locals()).iterall())
117 |     return "\n\n".join(map(_workflow_progress, r))
118 | 
119 | 
120 | def _flatten(xs):
121 |     for ys in xs:
122 |         yield from ys
123 | 
124 | 
125 | def _workflow_progress(p: aiida.orm.WorkChainNode) -> str:
126 |     assert issubclass(p.process_class, PyWorkChain)
127 |     lines = [
128 |         # This is a _single_ output line
129 |         f"{p.label or '<No label>'} (pk: {p.id}) "
130 |         f"[{_process_status(p)}, created {format_relative_time(p.ctime)}]"
131 |     ]
132 |     for c in p.called:
133 |         lines.append(textwrap.indent(progress(c), "    "))
134 | 
135 |     return "\n".join(lines)
136 | 
137 | 
138 | def _restarted_calcjob_progress(p: aiida.orm.WorkChainNode) -> str:
139 |     assert issubclass(p.process_class, (RestartedPyCalcJob, RestartedPyMapJob))
140 |     lines = [
141 |         f"with_restarts({p.get_option('max_restarts')}) "
142 |         f"(pk: {p.id}) [{_process_status(p)}]"
143 |     ]
144 |     for i, c in enumerate(p.called, 1):
145 |         if c.label == p.label:
146 |             # The launched process is the payload that we are running with restarts
147 |             s = f"attempt {i}: {progress(c)}"
148 |         else:
149 |             # Some post-processing (for RestartedPyMapJob)
150 |             s = progress(c)
151 |         lines.append(textwrap.indent(s, "    "))
152 | 
153 |     return "\n".join(lines)
154 | 
155 | 
156 | def _calcjob_progress(p: aiida.orm.CalcJobNode) -> str:
157 |     assert issubclass(p.process_class, PyCalcJob)
158 |     s = p.get_state() or p.process_state
159 | 
160 |     # Show more detailed info while we're waiting for the Slurm job.
161 |     if s == aiida.common.CalcJobState.WITHSCHEDULER:
162 |         sections = [
163 |             f"created {format_relative_time(p.ctime)}",
164 |         ]
165 |         if p.get_scheduler_state():
166 |             sections.append(f"{p.get_scheduler_state().value} job {p.get_job_id()}")
167 | 
168 |         # Show total number of tasks and states of remaining tasks in mapjobs.
169 |         job_states = _slurm_job_states(p)
170 |         if job_states:
171 |             if issubclass(p.process_class, PyMapJob):
172 |                 task_counts = Counter(job_states)
173 |                 task_states = ", ".join(f"{k}: {v}" for k, v in task_counts.items())
174 |                 task_summary = f"{sum(task_counts.values())} / {num_mapjob_tasks(p)}"
175 |                 sections.extend(
176 |                     [
177 |                         f"remaining tasks ({task_summary})",
178 |                         f"task states: {task_states}",
179 |                     ]
180 |                 )
181 |             else:
182 |                 sections.append(f"job state: {job_states[0]}")
183 |         msg = ", ".join(sections)
184 |     else:
185 |         msg = _process_status(p)
186 | 
187 |     return f"{p.label} (pk: {p.id}) [{msg}]"
188 | 
189 | 
190 | def _process_status(p: aiida.orm.ProcessNode) -> str:
191 | 
192 |     generic_failure = (
193 |         f"failed, run 'aiida_dynamic_workflows.report.log({p.id})' "
194 |         "for more information"
195 |     )
196 | 
197 |     if p.is_finished and not p.is_finished_ok:
198 |         # 's.value' is "finished", even if the process finished with a non-zero exit
199 |         # code. We prefer the more informative 'failed' + next steps.
200 |         msg = generic_failure
201 |     elif p.is_killed:
202 |         # Process was killed: 'process_status' includes the reason why.
203 |         msg = f"killed, {p.process_status}"
204 |     elif p.is_excepted:
205 |         # Process failed, and the error occured in the Aiida layers
206 |         msg = generic_failure
207 |     elif p.is_created_from_cache:
208 |         msg = (
209 |             f"{p.process_state.value} "
210 |             f"(created from cache, uuid: {p.get_cache_source()})"
211 |         )
212 |     elif p.is_finished_ok:
213 |         msg = "success"
214 |     else:
215 |         try:
216 |             # Calcjobs have 'get_state', which gives more fine-grained information
217 |             msg = p.get_state().value
218 |         except AttributeError:
219 |             msg = p.process_state.value
220 | 
221 |     return msg
222 | 
223 | 
224 | def _function_progress(
225 |     p: Union[aiida.orm.CalcFunctionNode, aiida.orm.WorkFunctionNode]
226 | ) -> str:
227 |     return f"{p.label} (pk: {p.id}) [{p.process_state.value}]"
228 | 
229 | 
230 | def _slurm_job_states(process):
231 |     info = process.get_last_job_info()
232 |     if not info:
233 |         return []
234 |     else:
235 |         return [x[1] for x in info.raw_data]
236 | 
237 | 
238 | def _ensure_process_node(
239 |     node_or_id: Union[aiida.orm.ProcessNode, int, str]
240 | ) -> aiida.orm.ProcessNode:
241 |     if isinstance(node_or_id, aiida.orm.ProcessNode):
242 |         return node_or_id
243 |     else:
244 |         return aiida.orm.load_node(node_or_id)
245 | 
246 | 
247 | def _node_sublabel(node):
248 |     if isinstance(node, aiida.orm.CalcJobNode) and issubclass(
249 |         node.process_class, PyCalcJob
250 |     ):
251 |         labels = [f"function: {node.inputs.func.name}"]
252 |         if state := node.get_state():
253 |             labels.append(f"State: {state.value}")
254 |         if (job_id := node.get_job_id()) and (state := node.get_scheduler_state()):
255 |             labels.append(f"Job: {job_id} ({state.value})")
256 |         if node.exit_status is not None:
257 |             labels.append(f"Exit Code: {node.exit_status}")
258 |         if node.exception:
259 |             labels.append("excepted")
260 |         return "\n".join(labels)
261 |     elif isinstance(node, (PyRemoteData, PyRemoteArray)):
262 |         try:
263 |             create_link = node.get_incoming().one()
264 |         except Exception:
265 |             return aiida.tools.visualization.graph.default_node_sublabels(node)
266 |         if create_link.link_label.startswith("return_values"):
267 |             return create_link.link_label.split("__")[1]
268 |         else:
269 |             return create_link.link_label
270 |     else:
271 |         return aiida.tools.visualization.graph.default_node_sublabels(node)
272 | 


--------------------------------------------------------------------------------
/examples/02-workflows.md:
--------------------------------------------------------------------------------
  1 | # Dynamic workflows
  2 | 
  3 | This notebook shows how to compose several steps into a workflow and launch them all at once.
  4 | 
  5 | Contrast this to [01-calculations.md](./01-calculations.md), where we waited until calculations were finished before passing their data to the next calculation.
  6 | 
  7 | 
  8 | First we do the usual imports and define an execution environment
  9 | 
 10 | ```python
 11 | from dataclasses import dataclass
 12 | import time
 13 | 
 14 | import numpy as np
 15 | import toolz
 16 | ```
 17 | 
 18 | ```python
 19 | import aiida
 20 | aiida.load_profile()
 21 | 
 22 | aiida.__version__
 23 | ```
 24 | 
 25 | ```python
 26 | import aiida_dynamic_workflows as flows
 27 | 
 28 | flows.control.ensure_daemon_restarted()
 29 | flows.__version__
 30 | ```
 31 | 
 32 | ```python
 33 | cluster_env = flows.engine.execution_environment(
 34 |     "py39",   # conda environment
 35 |     "my-cluster",  # computer name
 36 |     queue=("some-queue", 24),  # queue and num. cores per machine
 37 | )
 38 | ```
 39 | 
 40 | ## Step definitions
 41 | 
 42 | 
 43 | Next we define a bunch of individual "steps" from Python functions.
 44 | 
 45 | as we saw in [01-calculations.md](./01-calculations.md), this will save the pickled function in the Aiida database
 46 | 
 47 | ```python
 48 | from aiida_dynamic_workflows import step
 49 | ```
 50 | 
 51 | ```python
 52 | @dataclass(frozen=True)
 53 | class Geometry:
 54 |     x : float
 55 |     y : float
 56 | 
 57 | 
 58 | @dataclass(frozen=True)
 59 | class Mesh:
 60 |     geometry : Geometry
 61 |     mesh_size : float
 62 | 
 63 | @dataclass(frozen=True)
 64 | class Materials:
 65 |     geometry: Geometry
 66 |     materials: list[str]
 67 | 
 68 | @dataclass(frozen=True)
 69 | class Electrostatics:
 70 |     mesh: Mesh
 71 |     materials: Materials
 72 |     voltages: list[float]
 73 | ```
 74 | 
 75 | ```python
 76 | @step(returns="geo")
 77 | def make_geometry(x: float, y: float) -> Geometry:
 78 |     time.sleep(5)  # do some work
 79 |     return Geometry(x, y)
 80 | 
 81 | 
 82 | @step(returns=("mesh", "coarse_mesh"))
 83 | def make_mesh(
 84 |     geo: Geometry,
 85 |     mesh_size: float,
 86 |     coarse_mesh_size: float,
 87 | ) -> tuple[Mesh, Mesh]:
 88 |     time.sleep(5)  # do some work
 89 |     return Mesh(geo, mesh_size), Mesh(geo, coarse_mesh_size)
 90 | 
 91 | 
 92 | @step(returns="materials")
 93 | def make_materials(geo: Geometry) -> Materials:
 94 |     time.sleep(5)  # do some work
 95 |     return Materials(geo, ["a", "b", "c"])
 96 | 
 97 | 
 98 | @step(returns="electrostatics")
 99 | def run_electrostatics(
100 |     mesh: Mesh, materials: Materials, V_left: float, V_right: float
101 | ) -> Electrostatics:
102 |     time.sleep(10)  # do some work
103 |     return Electrostatics(mesh, materials, [V_left, V_right])
104 | 
105 | @step(returns="charge")
106 | def get_charge(electrostatics: Electrostatics) -> float:
107 |     # obviously not actually the charge; but we should return _some_ number that
108 |     # is "derived" from the electrostatics.
109 |     return sum(electrostatics.voltages)
110 | ```
111 | 
112 | This final step is a little special.
113 | 
114 | As we shall see in a couple cell's time this step will be used on the output `get_charge`, which will be "mapped" over its inputs.
115 | 
116 | As a consequence, `average_charge` will be passed a reference to an "array" of values, where each value in the array is actually stored in a separate file on disk, hence the strance type signature.
117 | 
118 | ```python
119 | @step(returns="average_charge")
120 | def average_charge(charge: "FileBasedObjectArray") -> float:
121 |     # .to_array() is a bit dumb; it loads in _all_ the data at once, but
122 |     # this is the simplest way, and in this example the data is not so large.
123 |     return np.mean(charge.to_array())
124 | ```
125 | 
126 | ## Composing workflows
127 | 
128 | 
129 | Here we compose up 2 "workflows": `model_flow` and `electrostatics_flow`:
130 | 
131 | ```python
132 | from aiida_dynamic_workflows.workflow import first, concurrently, map_, new_workflow
133 | 
134 | model_flow = (
135 |     new_workflow(name="model_flow")
136 |     .then(make_geometry)
137 |     .then(
138 |         # These 2 steps will be done at the same time
139 |         concurrently(make_mesh, make_materials)
140 |     )
141 | )
142 | 
143 | electrostatics_flow = (
144 |     new_workflow(name="electrostatics_flow")
145 |     .then(
146 |         map_(
147 |             run_electrostatics,
148 |             "V_left[a], V_right[b] -> electrostatics[a, b]",
149 |         )
150 |     ).then(
151 |         map_(
152 |             get_charge,
153 |             "electrostatics[i, j] -> charge[i, j]"
154 |         )
155 |     ).then(average_charge)
156 | )
157 | ```
158 | 
159 | We see that `electrostatics_flow` makes use of the `map_` function, which takes the step to execute, as well as a specification for how to map the inputs to the outputs.
160 | 
161 | In the above example we see that `electrostatics_flow` expects `V_left` and `V_right` to be 1D arrays, and it will `run_electrostatics` for each pair of values in these two arrays (an "outer product"), producing a 2D array of values.
162 | 
163 | The next step takes each of the elements in this 2D array and runs `get_charge` on them.
164 | 
165 | The final step of `electrostatics_flow` (`average_charge`) takes the _whole 2D `charge` array_ and produces a single value.
166 | 
167 | 
168 | We can inspect what parameters and what outputs are produced by each flow:
169 | 
170 | ```python
171 | model_flow.parameters, model_flow.all_outputs
172 | ```
173 | 
174 | ```python
175 | electrostatics_flow.parameters, electrostatics_flow.all_outputs
176 | ```
177 | 
178 | Note that the `mat_data` and `mesh` outputs from `model_flow` "line up" with parameters of the same name of `electrostatics_flow`.
179 | 
180 | This enables us to `join` the two flows together:
181 | 
182 | ```python
183 | total_flow = (
184 |     new_workflow(name="total_electrostatics")
185 |     .join(model_flow)
186 |     .join(electrostatics_flow)
187 |     .returning("electrostatics", average_charge="avg_electrostatic_charge")
188 | )
189 | ```
190 | 
191 | Invoking `returning` allows us to declare which of all the outputs should be considered "return values" of the workflow:
192 | 
193 | ```python
194 | total_flow.returns
195 | ```
196 | 
197 | This is purely a convenience; all outputs produced by `total_flow` will be inspectable.
198 | 
199 | 
200 | We can finally visualize the workflow with `.visualize()`:
201 | 
202 | ```python
203 | total_flow.visualize(as_png=True)
204 | ```
205 | 
206 | Ovals represent **data** and rectangles represent **calculations**.
207 | 
208 | **grey** ovals represent _inputs_, while **white** ovals represent "intermediate" data.
209 | 
210 | Any **red** rectangles indicate "map" calculations. **red** ovals represent data that is being mapped over / produced by a "map" step.
211 | 
212 | 
213 | ## Running the workflow
214 | 
215 | 
216 | Firstly we create a dictionary of all the inputs required by `total_flow`:
217 | 
218 | ```python
219 | total_flow.parameters
220 | ```
221 | 
222 | ```python
223 | inputs = dict(
224 |     mesh_size=0.01,
225 |     V_left=np.linspace(0, 2, 10),
226 |     V_right=np.linspace(-0.5, 0.5, 20),
227 |     x=0.1,
228 |     y=0.2,
229 |     coarse_mesh_size=0.05,
230 | )
231 | ```
232 | 
233 | then we combine the workflow and the inputs into a specification that Aiida can run:
234 | 
235 | ```python
236 | ready = flows.workflow.build(
237 |     total_flow.on(cluster_env),
238 |     **inputs,
239 | )
240 | ```
241 | 
242 | Note that similarly to single calculations, the workflow has an `on` method that can be used to specify where the calculations in the workflow should be run.
243 | 
244 | 
245 | Finally we submit the workflow to the Aiida daemon
246 | 
247 | ```python
248 | running_flow = aiida.engine.submit(ready)
249 | ```
250 | 
251 | ## Seeing what's happening
252 | 
253 | 
254 | We can print a progress report of what's going on:
255 | 
256 | ```python
257 | print(flows.report.progress(running_flow))
258 | ```
259 | 
260 | And visualize the workflow graph:
261 | 
262 | ```python
263 | flows.report.graph(running_flow)
264 | ```
265 | 
266 | ### If you restart your notebook
267 | 
268 | 
269 | As soon as you `submit`, your workflow run is recorded in the Aiida database, so even if you restart your notebook you will not "lose" the running workflow.
270 | 
271 | You can use `running_workflows()` to get a summary of the workflows that are currently running:
272 | 
273 | ```python
274 | print(flows.report.running_workflows())
275 | ```
276 | 
277 | You can also get a summary of all the workflows started recently, e.g.:
278 | 
279 | ```python
280 | print(flows.report.recent_workflows(days=2))  # All workflows started in the last 2 days.
281 | ```
282 | 
283 | ## Viewing results
284 | 
285 | 
286 | Once the workflow has completed we can get the returned values by inspecting `outputs.return_values`:
287 | 
288 | ```python
289 | running_flow.outputs.return_values
290 | ```
291 | 
292 | Note that to get an inspectable value back we use `fetch_value()`, which pulls the cloudpickle blob from the cluster filesystem and loads it:
293 | 
294 | ```python
295 | %%time
296 | running_flow.outputs.return_values.avg_electrostatic_charge.fetch_value()
297 | ```
298 | 
299 | We can also inspect any intermediate results by loading the appropriate data:
300 | 
301 | ```python
302 | %%time
303 | running_flow.called[-2].outputs.return_values.charge.fetch_value(local_files=True)[:2, :2]
304 | ```
305 | 
306 | ## Viewing anything else
307 | 
308 | 
309 | We can always load any object that is stored in the database by querying for it's "primary key" or "UUID".
310 | 
311 | For example, if we wanted the database node corresponding to the step `make_geometry` from the above run, we could:
312 | 
313 | ```python
314 | ## NB: change the "5269" to the "primary key" of the "make_geometry" step
315 | ##     You can get this information from the call-graph above.
316 | executed_geometry_step = aiida.orm.load_node(5269)
317 | ```
318 | 
319 | We can get, for example, the output from `sacct` from the completed job:
320 | 
321 | ```python
322 | executed_geometry_step.get_detailed_job_info()
323 | ```
324 | 
325 | Or the contents of `stdout` and `stderr` from the job:
326 | 
327 | ```python
328 | executed_geometry_step.get_scheduler_stdout()
329 | ```
330 | 
331 | In a pinch we can also get the directory on the cluster where the job ran.
332 | We can use this to manually inspect input/output files for sanity.
333 | 
334 | ```python
335 | executed_geometry_step.get_remote_workdir()
336 | ```
337 | 
338 | ## Inspecting "sample plans" for results
339 | 
340 | 
341 | Often, given the result of a simulation we will want to be able to see the parameters that produced it.
342 | 
343 | For example, the above workflow produces an intermediate result `charges`, and we might want to know what values of the inputs `x`, `y`, `V_left` etc. correspond to each values in the `charges` array.
344 | 
345 | We can query this using `input_samples`:
346 | 
347 | ```python
348 | import pandas as pd
349 | 
350 | charges = running_flow.called[-2].outputs.return_values.charge
351 | 
352 | df = pd.DataFrame(flows.input_samples(charges))
353 | 
354 | df
355 | ```
356 | 
357 | We see that we can feed the output into `pd.DataFrame` to get a dataframe of samples.
358 | 
359 | Even though `charges` is a 2D array:
360 | 
361 | ```python
362 | charges.shape
363 | ```
364 | 
365 | The samples are still presented as a (1D) dataframe.
366 | 
367 | The rows of the dataframe are ordered in the same way as a _flattened_ `charges`.
368 | 
369 | We can add another column to the dataframe so that the result is reported along with the inputs:
370 | 
371 | ```python
372 | df_with_results = df.assign(charge=charges.fetch_value(local_files=True).reshape(-1))
373 | df_with_results
374 | ```
375 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/workchains.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from collections import defaultdict
  6 | from typing import Any, Dict, Optional
  7 | 
  8 | from aiida.engine import WorkChain, append_, if_, while_
  9 | import aiida.orm
 10 | import numpy as np
 11 | import toolz
 12 | 
 13 | from . import common
 14 | from .calculations import (
 15 |     PyCalcJob,
 16 |     PyMapJob,
 17 |     array_job_spec_from_booleans,
 18 |     expected_mask,
 19 |     merge_remote_arrays,
 20 | )
 21 | 
 22 | 
 23 | # Subclass needed for "option" getters/setters, so that a WorkChain
 24 | # can transparently wrap a CalcJob.
 25 | class WorkChainNode(aiida.orm.WorkChainNode):
 26 |     """ORM class for nodes representing the execution of a WorkChain."""
 27 | 
 28 |     def get_option(self, name: str) -> Optional[Any]:
 29 |         """Return the value of an option that was set for this CalcJobNode."""
 30 |         return self.get_attribute(name, None)
 31 | 
 32 |     def set_option(self, name: str, value: Any) -> None:
 33 |         """Set an option to the given value."""
 34 |         self.set_attribute(name, value)
 35 | 
 36 |     def get_options(self) -> Dict[str, Any]:
 37 |         """Return the dictionary of options set for this CalcJobNode."""
 38 |         options = {}
 39 |         for name in self.process_class.spec_options.keys():
 40 |             value = self.get_option(name)
 41 |             if value is not None:
 42 |                 options[name] = value
 43 | 
 44 |         return options
 45 | 
 46 |     def set_options(self, options: Dict[str, Any]) -> None:
 47 |         """Set the options for this CalcJobNode."""
 48 |         for name, value in options.items():
 49 |             self.set_option(name, value)
 50 | 
 51 | 
 52 | # Hack to make this new node type use the Aiida logger.
 53 | # This is important so that WorkChains that use this node type also
 54 | # use the Aiida logger.
 55 | WorkChainNode._logger = aiida.orm.WorkChainNode._logger
 56 | 
 57 | 
 58 | class RestartedPyMapJob(WorkChain):
 59 |     """Workchain that resubmits a PyMapJob until all the tasks are complete.
 60 | 
 61 |     Tasks in the PyMapJob that succeeded on previous runs will not be resubmitted.
 62 |     """
 63 | 
 64 |     _node_class = WorkChainNode
 65 | 
 66 |     @classmethod
 67 |     def define(cls, spec):  # noqa: D102
 68 |         super().define(spec)
 69 |         spec.expose_inputs(PyMapJob)
 70 |         spec.expose_outputs(PyMapJob, include=["return_values", "exception"])
 71 |         spec.input(
 72 |             "metadata.options.max_restarts",
 73 |             valid_type=int,
 74 |             default=5,
 75 |             help=(
 76 |                 "Maximum number of iterations the work chain will "
 77 |                 "restart the process to finish successfully."
 78 |             ),
 79 |         )
 80 |         spec.exit_code(
 81 |             410,
 82 |             "MAXIMUM_RESTARTS_EXCEEDED",
 83 |             message="The maximum number of restarts was exceeded.",
 84 |         )
 85 | 
 86 |         spec.outline(
 87 |             cls.setup,
 88 |             while_(cls.should_run)(cls.run_mapjob, cls.inspect_result),
 89 |             if_(cls.was_restarted)(cls.merge_arrays, cls.extract_merged_arrays).else_(
 90 |                 cls.pass_through_arrays
 91 |             ),
 92 |             cls.output,
 93 |         )
 94 | 
 95 |     def setup(self):  # noqa: D102
 96 |         self.report("Setting up")
 97 | 
 98 |         mapspec = common.MapSpec.from_string(self.inputs.metadata.options.mapspec)
 99 |         mapped_inputs = {
100 |             k: v for k, v in self.inputs.kwargs.items() if k in mapspec.parameters
101 |         }
102 | 
103 |         self.ctx.required_mask = expected_mask(mapspec, mapped_inputs)
104 |         self.ctx.total_output_mask = np.full_like(self.ctx.required_mask, True)
105 | 
106 |         self.ctx.job_shape = self.ctx.required_mask.shape
107 |         self.ctx.total_num_tasks = np.sum(~self.ctx.required_mask)
108 | 
109 |         self.ctx.iteration = 0
110 |         self.ctx.launched_mapjobs = []
111 | 
112 |     @property
113 |     def n_tasks_remaining(self) -> int:
114 |         """Return the number of tasks that remain to be run."""
115 |         return self.ctx.total_num_tasks - np.sum(~self.ctx.total_output_mask)
116 | 
117 |     @property
118 |     def remaining_task_array(self) -> np.ndarray:
119 |         """Return a boolean array indicating which tasks still need to be run."""
120 |         return np.logical_xor(self.ctx.required_mask, self.ctx.total_output_mask)
121 | 
122 |     @property
123 |     def has_all_results(self) -> bool:
124 |         """Return True iff all the necessary outputs are present."""
125 |         return np.all(self.ctx.total_output_mask == self.ctx.required_mask)
126 | 
127 |     def should_run(self):  # noqa: D102
128 |         return (
129 |             not self.has_all_results
130 |             and self.ctx.iteration < self.inputs.metadata.options.max_restarts
131 |         )
132 | 
133 |     def run_mapjob(self):  # noqa: D102
134 |         # Run failed elements only, using custom
135 |         # Slurm parameters: -A 1,3-10,20%24
136 |         self.ctx.iteration += 1
137 | 
138 |         self.report(f"Running MapJob for {self.n_tasks_remaining} tasks")
139 | 
140 |         inputs = self.exposed_inputs(PyMapJob)
141 | 
142 |         # Modify "metadata.options.custom_scheduler_commands" so that the
143 |         # correct tasks in the Slurm Job Array are run.
144 |         # NOTE: This assumes we are running on Slurm
145 |         options = inputs["metadata"]["options"]
146 |         csc = options.custom_scheduler_commands
147 |         # Remove the existing Array Job specification
148 |         commands = [x for x in csc.split("\n") if "--array" not in x]
149 |         # Add an updated Array Job specification
150 |         task_spec = array_job_spec_from_booleans(self.remaining_task_array.reshape(-1))
151 |         max_concurrent_jobs = (
152 |             options.cores_per_machine * options.max_concurrent_machines
153 |         )
154 |         commands.append(f"#SBATCH --array={task_spec}%{max_concurrent_jobs}")
155 |         inputs = toolz.assoc_in(
156 |             inputs,
157 |             ("metadata", "options", "custom_scheduler_commands"),
158 |             "\n".join(commands),
159 |         )
160 | 
161 |         # "max_restarts" does not apply to PyMapJobs
162 |         del inputs["metadata"]["options"]["max_restarts"]
163 | 
164 |         fut = self.submit(PyMapJob, **inputs)
165 |         return self.to_context(launched_mapjobs=append_(fut))
166 | 
167 |     def inspect_result(self):  # noqa: D102
168 |         self.report("Inspecting result")
169 | 
170 |         job = self.ctx.launched_mapjobs[-1]
171 | 
172 |         m = result_mask(job, self.ctx.job_shape)
173 |         self.ctx.total_output_mask[~m] = False
174 | 
175 |         self.report(
176 |             f"{np.sum(~m)} tasks succeeded, "
177 |             f"{self.n_tasks_remaining} / {self.ctx.total_num_tasks} remaining"
178 |         )
179 | 
180 |     def was_restarted(self):  # noqa: D102
181 |         return self.ctx.iteration > 1
182 | 
183 |     def merge_arrays(self):  # noqa: D102
184 |         self.report(f"Gathering arrays from {self.ctx.iteration} mapjobs.")
185 |         assert self.ctx.iteration > 1
186 | 
187 |         exception_arrays = []
188 |         return_value_arrays = defaultdict(list)
189 |         for j in self.ctx.launched_mapjobs:
190 |             if "exception" in j.outputs:
191 |                 exception_arrays.append(j.outputs.exception)
192 |             if "return_values" in j.outputs:
193 |                 for k, v in j.outputs.return_values.items():
194 |                     return_value_arrays[k].append(v)
195 | 
196 |         # 'merge_remote_array' must take **kwargs (this is a limitation of Aiida), so
197 |         # we convert a list of inputs into a dictionary with keys 'x0', 'x1' etc.
198 |         def list_to_dict(lst):
199 |             return {f"x{i}": x for i, x in enumerate(lst)}
200 | 
201 |         context_update = dict()
202 | 
203 |         # TODO: switch 'runner.run_get_node' to 'submit' once WorkChain.submit
204 |         #       allows CalcFunctions (it should already; this appears to be a
205 |         #       bug in Aiida).
206 | 
207 |         if exception_arrays:
208 |             r = self.runner.run_get_node(
209 |                 merge_remote_arrays,
210 |                 **list_to_dict(exception_arrays),
211 |             )
212 |             context_update["exception"] = r.node
213 | 
214 |         for k, arrays in return_value_arrays.items():
215 |             r = self.runner.run_get_node(
216 |                 merge_remote_arrays,
217 |                 **list_to_dict(arrays),
218 |             )
219 |             context_update[f"return_values.{k}"] = r.node
220 | 
221 |         return self.to_context(**context_update)
222 | 
223 |     def extract_merged_arrays(self):  # noqa: D102
224 |         if "exception" in self.ctx:
225 |             self.ctx.exception = self.ctx.exception.outputs.result
226 |         if "return_values" in self.ctx:
227 |             for k, v in self.ctx.return_values.items():
228 |                 self.ctx.return_values[k] = v.outputs.result
229 | 
230 |     def pass_through_arrays(self):  # noqa: D102
231 |         self.report("Passing through results from single mapjob")
232 |         assert self.ctx.iteration == 1
233 |         (job,) = self.ctx.launched_mapjobs
234 |         if "exception" in job.outputs:
235 |             self.ctx.exception = job.outputs.exception
236 |         if "return_values" in job.outputs:
237 |             for k, v in job.outputs.return_values.items():
238 |                 self.ctx[f"return_values.{k}"] = v
239 | 
240 |     def output(self):  # noqa: D102
241 |         self.report("Setting outputs")
242 |         if "exception" in self.ctx:
243 |             self.out("exception", self.ctx.exception)
244 |         for k, v in self.ctx.items():
245 |             if k.startswith("return_values"):
246 |                 self.out(k, v)
247 | 
248 |         max_restarts = self.inputs.metadata.options.max_restarts
249 |         if not self.has_all_results and self.ctx.iteration >= max_restarts:
250 |             self.report(f"Restarted the maximum number of times {max_restarts}")
251 |             return self.exit_codes.MAXIMUM_RESTARTS_EXCEEDED
252 | 
253 | 
254 | def result_mask(job, expected_shape) -> np.ndarray:
255 |     """Return the result mask for a PyMapJob that potentially has multiple outputs."""
256 |     if "return_values" not in job.outputs:
257 |         return np.full(expected_shape, True)
258 |     rvs = job.outputs.return_values
259 |     masks = [getattr(rvs, x).mask for x in rvs]
260 |     if len(masks) == 1:
261 |         return masks[0]
262 |     else:
263 |         # If for some reason one of the outputs is missing elements (i.e. the
264 |         # mask value is True) then we need to re-run the corresponding task.
265 |         return np.logical_or(*masks)
266 | 
267 | 
268 | class RestartedPyCalcJob(WorkChain):
269 |     """Workchain that resubmits a PyCalcJob until it succeeds."""
270 | 
271 |     _node_class = WorkChainNode
272 | 
273 |     @classmethod
274 |     def define(cls, spec):  # noqa: D102
275 |         super().define(spec)
276 |         spec.expose_inputs(PyCalcJob)
277 |         spec.expose_outputs(PyCalcJob, include=["return_values", "exception"])
278 |         spec.input(
279 |             "metadata.options.max_restarts",
280 |             valid_type=int,
281 |             default=5,
282 |             help=(
283 |                 "Maximum number of iterations the work chain will "
284 |                 "restart the process to finish successfully."
285 |             ),
286 |         )
287 |         spec.exit_code(
288 |             410,
289 |             "MAXIMUM_RESTARTS_EXCEEDED",
290 |             message="The maximum number of restarts was exceeded.",
291 |         )
292 |         spec.exit_code(
293 |             411,
294 |             "CHILD_PROCESS_EXCEPTED",
295 |             message="The child process excepted.",
296 |         )
297 |         spec.outline(
298 |             cls.setup,
299 |             while_(cls.should_run)(cls.run_calcjob, cls.inspect_result),
300 |             cls.output,
301 |         )
302 | 
303 |     def setup(self):  # noqa: D102
304 |         self.ctx.iteration = 0
305 |         self.ctx.function_name = self.inputs.func.name
306 |         self.ctx.children = []
307 |         self.ctx.is_finished = False
308 | 
309 |     def should_run(self):  # noqa: D102
310 |         return (
311 |             not self.ctx.is_finished
312 |             and self.ctx.iteration < self.inputs.metadata.options.max_restarts
313 |         )
314 | 
315 |     def run_calcjob(self):  # noqa: D102
316 |         self.ctx.iteration += 1
317 |         inputs = self.exposed_inputs(PyCalcJob)
318 |         del inputs["metadata"]["options"]["max_restarts"]
319 |         node = self.submit(PyCalcJob, **inputs)
320 | 
321 |         self.report(
322 |             f"Launching {self.ctx.function_name}<{node.pk}> "
323 |             f"iteration #{self.ctx.iteration}"
324 |         )
325 | 
326 |         return self.to_context(children=append_(node))
327 | 
328 |     def inspect_result(self):  # noqa: D102
329 |         node = self.ctx.children[-1]
330 | 
331 |         if node.is_excepted:
332 |             self.report(f"{self.ctx.function_name}<{node.pk}> excepted; aborting")
333 |             return self.exit_codes.CHILD_PROCESS_EXCEPTED
334 | 
335 |         self.ctx.is_finished = node.exit_status == 0
336 | 
337 |     def output(self):  # noqa: D102
338 |         node = self.ctx.children[-1]
339 |         label = f"{self.ctx.function_name}<{node.pk}>"
340 | 
341 |         self.out_many(self.exposed_outputs(node, PyCalcJob))
342 | 
343 |         max_restarts = self.inputs.metadata.options.max_restarts
344 |         if not self.ctx.is_finished and self.ctx.iteration >= max_restarts:
345 |             self.report(
346 |                 f"Reached the maximum number of iterations {max_restarts}: "
347 |                 f"last ran {label}"
348 |             )
349 |             return self.exit_codes.MAXIMUM_RESTARTS_EXCEEDED
350 |         else:
351 |             self.report(
352 |                 f"Succeeded after {self.ctx.iteration} submissions: "
353 |                 f"last ran {label}"
354 |             )
355 | 


--------------------------------------------------------------------------------
/examples/03-failures.md:
--------------------------------------------------------------------------------
  1 | # Handling failures
  2 | 
  3 | 
  4 | This notebook demonstrates how workflows can be built to handle common failure modes:
  5 | 
  6 | 1. Persistent errors (e.g. a few samples in the sample plan are ill-defined)
  7 | 2. Transient errors (e.g. meshing failed due to some random failure)
  8 | 
  9 | To explore this we will take the workflows developed in [02-workflows.md](./02-workflows.md) and make a few modifications.
 10 | 
 11 | 
 12 | First we do the usual imports and define an execution environment
 13 | 
 14 | ```python
 15 | from dataclasses import dataclass
 16 | import random
 17 | import time
 18 | 
 19 | import numpy as np
 20 | import toolz
 21 | ```
 22 | 
 23 | ```python
 24 | import aiida
 25 | aiida.load_profile()
 26 | 
 27 | aiida.__version__
 28 | ```
 29 | 
 30 | ```python
 31 | import aiida_dynamic_workflows as flows
 32 | from aiida_dynamic_workflows import step
 33 | 
 34 | flows.control.ensure_daemon_restarted()
 35 | flows.__version__
 36 | ```
 37 | 
 38 | ```python
 39 | cluster_env = flows.engine.execution_environment(
 40 |     "py39",   # conda environment
 41 |     "my-cluster",  # computer name
 42 |     queue=("some-queue", 24),  # queue and num. cores per machine
 43 | )
 44 | ```
 45 | 
 46 | ## Defining the steps and workflows
 47 | 
 48 | 
 49 | This is copied verbatim from [02-workflows.md](./02-workflows.md).
 50 | 
 51 | In principle we could put this in a separate module, but this won't quite work until cloudpickle gets [this new feature](https://github.com/cloudpipe/cloudpickle/pull/417).
 52 | 
 53 | ```python
 54 | @dataclass(frozen=True)
 55 | class Geometry:
 56 |     x : float
 57 |     y : float
 58 | 
 59 | @dataclass(frozen=True)
 60 | class Mesh:
 61 |     geometry : Geometry
 62 |     mesh_size : float
 63 | 
 64 | @dataclass(frozen=True)
 65 | class Materials:
 66 |     geometry: Geometry
 67 |     materials: list[str]
 68 | 
 69 | @dataclass(frozen=True)
 70 | class Electrostatics:
 71 |     mesh: Mesh
 72 |     materials: Materials
 73 |     voltages: list[float]
 74 | ```
 75 | 
 76 | ```python
 77 | @step(returns="geo")
 78 | def make_geometry(x: float, y: float) -> Geometry:
 79 |     time.sleep(5)  # do some work
 80 |     return Geometry(x, y)
 81 | 
 82 | 
 83 | @step(returns=("mesh", "coarse_mesh"))
 84 | def make_mesh(
 85 |     geo: Geometry,
 86 |     mesh_size: float,
 87 |     coarse_mesh_size: float,
 88 | ) -> tuple[Mesh, Mesh]:
 89 |     time.sleep(5)  # do some work
 90 |     return Mesh(geo, mesh_size), Mesh(geo, coarse_mesh_size)
 91 | 
 92 | 
 93 | @step(returns="materials")
 94 | def make_materials(geo: Geometry) -> Materials:
 95 |     time.sleep(5)  # do some work
 96 |     return Materials(geo, ["a", "b", "c"])
 97 | 
 98 | 
 99 | @step(returns="electrostatics")
100 | def run_electrostatics(
101 |     mesh: Mesh, materials: Materials, V_left: float, V_right: float
102 | ) -> Electrostatics:
103 |     time.sleep(10)  # do some work
104 |     return Electrostatics(mesh, materials, [V_left, V_right])
105 | 
106 | 
107 | @step(returns="charge")
108 | def get_charge(electrostatics: Electrostatics) -> float:
109 |     # obviously not actually the charge; but we should return _some_ number that
110 |     # is "derived" from the electrostatics.
111 |     return sum(electrostatics.voltages)
112 | 
113 | 
114 | @step(returns="average_charge")
115 | def average_charge(charge: "FileBasedObjectArray") -> float:
116 |     # .to_array() is a bit dumb; it loads in _all_ the data at once, but
117 |     # this is the simplest way, and in this example the data is not so large.
118 |     return np.mean(charge.to_array())
119 | ```
120 | 
121 | ```python
122 | from aiida_dynamic_workflows.workflow import first, concurrently, map_, new_workflow
123 | 
124 | model_flow = (
125 |     new_workflow(name="model_flow")
126 |     .then(make_geometry)
127 |     .then(
128 |         # These 2 steps will be done at the same time
129 |         concurrently(make_mesh, make_materials)
130 |     )
131 | )
132 | 
133 | electrostatics_flow = (
134 |     new_workflow(name="electrostatics_flow")
135 |     .then(
136 |         map_(
137 |             run_electrostatics,
138 |             "V_left[a], V_right[b] -> electrostatics[a, b]",
139 |         )
140 |     ).then(
141 |         map_(
142 |             get_charge,
143 |             "electrostatics[i, j] -> charge[i, j]"
144 |         )
145 |     ).then(average_charge)
146 | )
147 | 
148 | total_flow = (
149 |     new_workflow(name="total_electrostatics")
150 |     .join(model_flow)
151 |     .join(electrostatics_flow)
152 |     .returning("electrostatics", average_charge="avg_electrostatic_charge")
153 | )
154 | ```
155 | 
156 | ## Modifying steps
157 | 
158 | 
159 | Now we make new meshing and electrostatics steps with the following modifications:
160 | 
161 | + If the `mesh_error` parameter is True, then the meshing step always raises a `ValueError`.
162 | + If `V_left` or `V_right` is outside the bounds set by `V_limits` then the electrostatics step raises a `ValueError`.
163 | + The charge-extracting step will randomly fail with probability `failure_probability`.
164 | 
165 | ```python
166 | # Inside the modified steps we should only reference the raw Python function, _not_ the
167 | # object in the Aiida database (which we will not be able to resolve, given that the code
168 | # will eventually be run in a job on the cluster).
169 | original_make_mesh = make_mesh.callable
170 | original_electrostatics = run_electrostatics.callable
171 | original_get_charge = get_charge.callable
172 | 
173 | 
174 | @flows.step(returns=("mesh", "coarse_mesh"))
175 | def modified_make_mesh(geo, mesh_size, coarse_mesh_size, mesh_error):
176 |     if mesh_error:
177 |         raise ValueError("Meshing step failed")
178 |     else:
179 |         return original_make_mesh(geo, mesh_size, coarse_mesh_size)
180 | 
181 | 
182 | @flows.step(returns="electrostatics")
183 | def modified_electrostatics(geo, mesh, materials, V_left, V_right, V_limits: tuple):
184 |     a, b = V_limits
185 |     if not (a < V_left < b and a < V_right < b):
186 |         raise ValueError(f"Voltages ({V_left}, {V_right}) out of acceptable range {V_limits}")
187 |     else:
188 |         return original_electrostatics(mesh, materials, V_left, V_right)
189 | 
190 | @flows.step(returns="charge")
191 | def modified_get_charge(electrostatics, failure_probability):
192 |     import random
193 |     if random.random() < failure_probability:
194 |         raise ValueError("Randomly failed!")
195 |     else:
196 |         return original_get_charge(electrostatics)
197 | ```
198 | 
199 | ## Modifying workflows
200 | 
201 | 
202 | We now use the `replace_steps` method of the `total_flow` defined in `basic_electrostatics`.
203 | 
204 | This allows us to easily replace the `make_mesh` and `run_electrostatics` steps with their modified versions that we defined above:
205 | 
206 | ```python
207 | new_flow = (
208 |     total_flow
209 |     .rename("total_flow_with_failures")
210 |     .replace_steps({
211 |         make_mesh: modified_make_mesh,
212 |         run_electrostatics: modified_electrostatics,
213 |         get_charge: modified_get_charge,
214 |     })
215 | )
216 | 
217 | new_flow.visualize(as_png=True)
218 | ```
219 | 
220 | ## Running the workflow
221 | 
222 | 
223 | Let's first run the workflow with `mesh_error=True`, and see what happens:
224 | 
225 | ```python
226 | inputs = dict(
227 |     mesh_size=0.015,
228 |     V_left=np.linspace(0, 1, 10),
229 |     V_right=np.linspace(-0.5, 0.5, 20),
230 |     x=0.15,
231 |     y=0.25,
232 |     coarse_mesh_size=0.05,
233 |     # Extra parameters; needed for the modified steps
234 |     V_limits=[-0.4, 0.4],
235 |     failure_probability=0.2,
236 |     mesh_error=True,
237 | )
238 | ```
239 | 
240 | ```python
241 | running_workflow = aiida.engine.submit(flows.workflow.build(
242 |     new_flow.on(cluster_env),
243 |     **inputs,
244 | ))
245 | ```
246 | 
247 | 
248 | ```python
249 | print(flows.report.progress(running_workflow))
250 | flows.report.graph(running_workflow, as_png=True)
251 | ```
252 | 
253 | We see that the `make_geometry` and `make_mat_data` steps completed successfully (Exit Code 0), but `modified_make_mesh` failed with exit code 401.
254 | 
255 | We can use `flows.report.log` to figure out what happened:
256 | 
257 | ```python
258 | modified_mesh_calc = running_workflow.called[1]
259 | print(flows.report.log(modified_mesh_calc))
260 | ```
261 | 
262 | We see that `User code raised an Exception`, and that `modified_make_mesh` returned an `exception` output.
263 | 
264 | We can inspect the exception to see what happened:
265 | 
266 | ```python
267 | modified_mesh_calc.outputs.exception.fetch_value()
268 | ```
269 | 
270 | We can get more insight into what happened by printing the log from the _workflow_:
271 | 
272 | ```python
273 | print(flows.report.log(running_workflow))
274 | ```
275 | 
276 | We see that the workflow detected the failure of `modified_make_mesh`.
277 | 
278 | It tried to carry on anyway, but `modified_electrostatics` requires `mesh`, _so the step is skipped_.
279 | The remaining steps are also skipped for similar reasons.
280 | 
281 | 
282 | **The default workflow behaviour is to try to execute all steps, skipping steps for which there is not sufficient input.**
283 | 
284 | 
285 | ## Persistent errors in Map elements
286 | 
287 | 
288 | Now we will flip the `mesh_error` flag so that the mesh step completes successfully.
289 | 
290 | Note, however, that some elements of the `modified_electrostatics` map will raise an exception
291 | because `V_left` or `V_right` are outside of the specified limits.
292 | 
293 | We will see how the workflow handles such an error condition.
294 | 
295 | 
296 | We can easily make a small modification to the parameters before resubmitting by using `get_builder_restart()` on the previously executed workflow:
297 | 
298 | ```python
299 | no_mesh_error = running_workflow.get_builder_restart()
300 | no_mesh_error.kwargs.mesh_error = aiida.orm.to_aiida_type(False)
301 | ```
302 | 
303 | Then submit the workflow with the modified parameters:
304 | 
305 | ```python
306 | running_workflow2 = aiida.engine.submit(no_mesh_error)
307 | ```
308 | 
309 | ```python
310 | print(flows.report.progress(running_workflow2))
311 | flows.report.graph(running_workflow2, as_png=True)
312 | ```
313 | 
314 | We see that the `modified_electrostatics` step returned exit code 401, indicating that our code raised a Python exception; we also see that the `exception` output was produced.
315 | 
316 | 
317 | However, we also see that an `electrostatics` output was produced, despite the non-zero exit code.
318 | 
319 | 
320 | Let's load in the exception to see what is going on:
321 | 
322 | ```python
323 | electrostatics_step = running_workflow2.called[3]
324 | ```
325 | 
326 | ```python
327 | exceptions = electrostatics_step.outputs.exception.fetch_value()
328 | ```
329 | 
330 | ```python
331 | exceptions[:4, :4]
332 | ```
333 | 
334 | As `modified_electrostatics` was run as a `PyMapJob`, `exceptions` is a _masked array_, that contains the exception raised by the given element in the map (and masked for elements that did not raise an exception).
335 | 
336 | 
337 | Similarly, `electrostatics` will be a masked array, with the map elements that raised an exception _masked out_:
338 | 
339 | ```python
340 | electrostatics_array = electrostatics_step.outputs.return_values.electrostatics
341 | electrostatics_array.mask[:4, :4]
342 | ```
343 | 
344 | Nevertheless, the workflow can continue, even with this "partial" output.
345 | 
346 | 
347 | The downstream `PyMapJob` that runs `get_charge` detects that the input(s) are "masked" and only runs `get_charge` for the data that actually exists.
348 | 
349 | We can "see" this by inspecting the `--array` specification that was passed to Slurm by the job:
350 | 
351 | ```python
352 | get_charge_step = running_workflow2.called[4]
353 | print(get_charge_step.attributes["custom_scheduler_commands"])
354 | ```
355 | 
356 | We see that only array elements 2-17, 22-37 etc. are submitted, as the other elements of `electrostatics` are missing.
357 | 
358 | 
359 | ## Mitigating transient errors
360 | 
361 | 
362 | Our modified `get_charge` step randomly fails with a certain probability: a "transient" error.
363 | 
364 | We can see this, as the array of charges output by the step does not have the same mask as the electrostatics that were used as input:
365 | 
366 | ```python
367 | charge_array = get_charge_step.outputs.return_values.charge
368 | 
369 | np.sum(charge_array.mask != electrostatics_array.mask)
370 | ```
371 | 
372 | A simple way to mitigate transient errors is to specify that steps should be restarted.
373 | 
374 | For MapJobs we can do this by specifying `max_restarts` to `map_`:
375 | 
376 | ```python
377 | electrostatics_flow_with_restarts = (
378 |     first(
379 |         map_(
380 |             run_electrostatics,
381 |             "V_left[i], V_right[j] -> electrostatics[i, j]",
382 |         )
383 |     ).then(
384 |         map_(
385 |             get_charge,
386 |             "electrostatics[i, j] -> charge[i, j]",
387 |             max_restarts=5,  # <-- specify the max number of restarts here
388 |         )
389 |     ).then(average_charge)
390 | )
391 | 
392 | ```
393 | 
394 | Alternatively, we can use the `with_restarts` method of an existing workflow to add restarts to the named steps:
395 | 
396 | ```python
397 | total_flow_with_restarts = (
398 |     new_flow
399 |     .rename(name="flow_with_restarts")
400 |     .with_restarts({modified_get_charge: 5})
401 | )
402 | ```
403 | 
404 | ```python
405 | running_workflow_with_restarts = aiida.engine.submit(flows.workflow.build(
406 |     total_flow_with_restarts.on(cluster_env),
407 |     **toolz.assoc(inputs, "mesh_error", False),
408 | ))
409 | ```
410 | 
411 | ```python
412 | print(flows.report.progress(running_workflow_with_restarts))
413 | flows.report.graph(running_workflow_with_restarts, as_png=True)
414 | ```
415 | 
416 | We see that after `modified_electrostatics` there is a "`RestartedPyMapJob`" that sequentially launches several `PyMapJob`s that each run `modified_get_charge`.
417 | 
418 | We can see what is going on by printing the log of this `RestartedMapJob`:
419 | 
420 | ```python
421 | restarted_mapjob = running_workflow_with_restarts.called[-2]
422 | print(flows.report.log(restarted_mapjob))
423 | ```
424 | 
425 | The first time `modified_get_charge` is run, it is run over 64 tasks, the number of unmasked `electrostatics` in the input:
426 | 
427 | ```python
428 | electrostatics_mapjob = running_workflow_with_restarts.called[-3]
429 | np.sum(~electrostatics_mapjob.outputs.return_values.electrostatics.mask)
430 | ```
431 | 
432 | This run results in a few failures, so the failed tasks are submitted again and so on until all 64 results have been obtained (or the maximum number of restarts has been exceeded).
433 | 
434 | ```python
435 | for j in restarted_mapjob.called:
436 |     if 'PyMapJob' not in j.process_type:
437 |         continue
438 |     print(j.get_option("custom_scheduler_commands"))
439 | ```
440 | 
441 | The `RestartedPyMapJob` then merges the outputs from the different runs together into a single array.
442 | 
443 | 
444 | Finally, we see that even the `average_charge` step completed successfully, as it was written in such a way that it transparently handles masked arrays:
445 | 
446 | ```python
447 | running_workflow_with_restarts.outputs.return_values.avg_electrostatic_charge.fetch_value()
448 | ```
449 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | """Aiida data plugins for running arbitrary Python functions."""
  5 | 
  6 | from concurrent.futures import ThreadPoolExecutor
  7 | import functools
  8 | import inspect
  9 | import io
 10 | from itertools import repeat
 11 | import operator
 12 | import os
 13 | from pathlib import Path
 14 | import tempfile
 15 | from typing import Any, Callable, Dict, List, Optional, Tuple
 16 | 
 17 | import aiida.orm
 18 | import cloudpickle
 19 | import numpy as np
 20 | import toolz
 21 | 
 22 | # To get Aiida's caching to be useful we need to have a stable way to hash Python
 23 | # functions. The "default" is to hash the cloudpickle blob, but this is not
 24 | # typically stable for functions defined in a Jupyter notebook.
 25 | # TODO: insert something useful here.
 26 | function_hasher = None
 27 | 
 28 | 
 29 | class PyFunction(aiida.orm.Data):
 30 |     """Aiida representation of a Python function."""
 31 | 
 32 |     def __init__(self, **kwargs):
 33 |         # TODO: basic typechecks on these
 34 |         func = kwargs.pop("func")
 35 |         assert callable(func)
 36 |         returns = kwargs.pop("returns")
 37 |         if isinstance(returns, str):
 38 |             returns = [returns]
 39 |         resources = kwargs.pop("resources", None)
 40 |         if resources is None:
 41 |             resources = dict()
 42 | 
 43 |         super().__init__(**kwargs)
 44 | 
 45 |         self.put_object_from_filelike(
 46 |             path="function.pickle",
 47 |             handle=io.BytesIO(cloudpickle.dumps(func)),
 48 |         )
 49 |         self.set_attribute("resources", resources)
 50 |         self.set_attribute("returns", returns)
 51 |         self.set_attribute("parameters", _parameters(func))
 52 | 
 53 |         # If 'function_hasher' is available then we store the
 54 |         # function hash directly, and _get_objects_to_hash will
 55 |         # _not_ use the pickle blob (which is not stable e.g.
 56 |         # for functions defined in a notebook).
 57 |         if callable(function_hasher):
 58 |             self.set_attribute("_function_hash", function_hasher(func))
 59 | 
 60 |         try:
 61 |             source = inspect.getsource(func)
 62 |         except Exception:
 63 |             pass
 64 |         else:
 65 |             self.set_attribute("source", source)
 66 | 
 67 |         name = getattr(func, "__name__", None)
 68 |         if name:
 69 |             self.set_attribute("name", name)
 70 | 
 71 |     @property
 72 |     def resources(self) -> Dict[str, str]:
 73 |         """Resources required by this function."""
 74 |         return self.get_attribute("resources")
 75 | 
 76 |     @property
 77 |     def source(self) -> str:
 78 |         """Source code of this function."""
 79 |         return self.get_attribute("source")
 80 | 
 81 |     @property
 82 |     def name(self) -> str:
 83 |         """Name of this function."""
 84 |         return self.get_attribute("name")
 85 | 
 86 |     @property
 87 |     def parameters(self) -> List[str]:
 88 |         """Parameters of this function."""
 89 |         return self.get_attribute("parameters")
 90 | 
 91 |     @property
 92 |     def returns(self) -> Optional[List[str]]:
 93 |         """List of names returned by this function."""
 94 |         return self.get_attribute("returns")
 95 | 
 96 |     # TODO: use better caching for this (maybe on the class level?)
 97 |     @functools.cached_property
 98 |     def pickle(self) -> bytes:
 99 |         """Pickled function."""
100 |         return self.get_object_content("function.pickle", "rb")
101 | 
102 |     @functools.cached_property
103 |     def callable(self) -> Callable:
104 |         """Return the function stored in this object."""
105 |         return cloudpickle.loads(self.pickle)
106 | 
107 |     @property
108 |     def __signature__(self):
109 |         return inspect.signature(self.callable)
110 | 
111 |     def __call__(self, *args: Any, **kwargs: Any):
112 |         """Call the function stored in this object."""
113 |         return self.callable(*args, **kwargs)
114 | 
115 |     def _get_objects_to_hash(self) -> List[Any]:
116 |         objects = super()._get_objects_to_hash()
117 | 
118 |         # XXX: this depends on the specifics of the implementation
119 |         #      of super()._get_objects_to_hash(). The second-to-last
120 |         #      elements in 'objects' is the hash of the file repository.
121 |         #      For 'PyFunction' nodes this contains the cloudpickle blob,
122 |         #      which we _do not_ want hashed.
123 |         if "_function_hash" in self.attributes:
124 |             *a, _, x = objects
125 |             return [*a, x]
126 |         else:
127 |             return objects
128 | 
129 | 
130 | def _parameters(f: Callable) -> List[str]:
131 |     valid_kinds = [
132 |         getattr(inspect.Parameter, k) for k in ("POSITIONAL_OR_KEYWORD", "KEYWORD_ONLY")
133 |     ]
134 |     params = inspect.signature(f).parameters.values()
135 |     if any(p.kind not in valid_kinds for p in params):
136 |         raise TypeError("Invalid signature")
137 |     return [p.name for p in params]
138 | 
139 | 
140 | class Nil(aiida.orm.Data):
141 |     """Trivial representation of the None type in Aiida."""
142 | 
143 | 
144 | # TODO: make this JSON serializable so it can go directly in the DB
145 | class PyOutline(aiida.orm.Data):
146 |     """Naive Aiida representation of a workflow outline."""
147 | 
148 |     def __init__(self, **kwargs):
149 |         outline = kwargs.pop("outline")
150 |         super().__init__(**kwargs)
151 | 
152 |         self.put_object_from_filelike(
153 |             path="outline.pickle",
154 |             handle=io.BytesIO(cloudpickle.dumps(outline)),
155 |         )
156 | 
157 |     @functools.cached_property
158 |     def value(self):
159 |         """Python object loaded from the stored pickle."""
160 |         return cloudpickle.loads(self.get_object_content("outline.pickle", "rb"))
161 | 
162 | 
163 | # TODO: Annotate these with the class name (useful for visualization)
164 | class PyData(aiida.orm.Data):
165 |     """Naive Aiida representation of an arbitrary Python object."""
166 | 
167 |     def __init__(self, **kwargs):
168 |         pickle_path = kwargs.pop("pickle_path")
169 | 
170 |         super().__init__(**kwargs)
171 |         self.put_object_from_file(filepath=pickle_path, path="object.pickle")
172 | 
173 |     # TODO: do caching more intelligently: we could attach a cache to the
174 |     #       _class_ instead so that if we create 2 PyData objects that
175 |     #       point to the _same_ database entry (pk) then we only have to
176 |     #       load the data once.
177 |     #       (does Aiida provide some tooling for this?)
178 |     @functools.cached_property
179 |     def value(self):
180 |         """Python object loaded from the stored pickle."""
181 |         return cloudpickle.loads(self.get_object_content("object.pickle", "rb"))
182 | 
183 | 
184 | class PyRemoteData(aiida.orm.RemoteData):
185 |     """Naive Aiida representation of an arbitrary Python object on a remote computer."""
186 | 
187 |     def __init__(self, **kwargs):
188 |         pickle_path = str(kwargs.pop("pickle_path"))
189 |         super().__init__(**kwargs)
190 | 
191 |         self.set_attribute("pickle_path", pickle_path)
192 | 
193 |     @property
194 |     def pickle_path(self):
195 |         """Return the remote path that contains the pickle."""
196 |         return os.path.join(self.get_remote_path(), self.get_attribute("pickle_path"))
197 | 
198 |     def fetch_value(self):
199 |         """Load Python object from the remote pickle."""
200 |         with tempfile.NamedTemporaryFile(mode="rb") as f:
201 |             self.getfile(self.get_attribute("pickle_path"), f.name)
202 |             return cloudpickle.load(f)
203 | 
204 |     @classmethod
205 |     def from_remote_data(cls, rd: aiida.orm.RemoteData, pickle_path: str):
206 |         """Return a new PyRemoteData, given an existing RemoteData.
207 | 
208 |         Parameters
209 |         ----------
210 |         rd
211 |             RemoteData folder.
212 |         pickle_path
213 |             Relative path in the RemoteData that contains pickle data.
214 |         """
215 |         return cls(
216 |             remote_path=rd.get_remote_path(),
217 |             pickle_path=pickle_path,
218 |             computer=rd.computer,
219 |         )
220 | 
221 | 
222 | class PyRemoteArray(aiida.orm.RemoteData):
223 |     """Naive Aiida representation of a remote array of arbitrary Python objects.
224 | 
225 |     Each object is stored in a separate file.
226 |     """
227 | 
228 |     def __init__(self, **kwargs):
229 |         shape = kwargs.pop("shape")
230 |         filename_template = kwargs.pop("filename_template")
231 |         super().__init__(**kwargs)
232 |         self.set_attribute("shape", tuple(shape))
233 |         self.set_attribute("filename_template", str(filename_template))
234 | 
235 |     def _file(self, i: int) -> str:
236 |         return self.get_attribute("filename_template").format(i)
237 | 
238 |     @property
239 |     def pickle_path(self):
240 |         """Return the remote path that contains the pickle files."""
241 |         return self.get_remote_path()
242 | 
243 |     def _fetch_buffer(self, local_files=False):
244 |         """Return iterator over Python objects in this array."""
245 | 
246 |         def _load(dir: Path, pickle_file: str):
247 |             path = dir / pickle_file
248 |             if not path.is_file():
249 |                 return None
250 |             else:
251 |                 with open(path, "rb") as f:
252 |                     return cloudpickle.load(f)
253 | 
254 |         def _iter_files(dir):
255 |             with ThreadPoolExecutor() as ex:
256 |                 file_gen = map(self._file, range(self.size))
257 |                 yield from ex.map(_load, repeat(dir), file_gen)
258 | 
259 |         if local_files:
260 |             # If the array's directory does not exist then it's
261 |             # not actually mounted locally.
262 |             root_dir = Path(self.get_remote_path())
263 |             if not root_dir.is_dir():
264 |                 raise FileNotFoundError(str(root_dir))
265 |             else:
266 |                 yield from _iter_files(root_dir)
267 |         else:
268 |             with tempfile.TemporaryDirectory() as temp_dir:
269 |                 dir = Path(os.path.join(temp_dir, "values"))
270 |                 # TODO: do this with chunks, rather than all files at once.
271 |                 with self.get_authinfo().get_transport() as transport:
272 |                     transport.gettree(self.get_remote_path(), dir)
273 |                 yield from _iter_files(dir)
274 | 
275 |     def fetch_value(self, local_files=False) -> np.ma.core.MaskedArray:
276 |         """Return a numpy array with dtype 'object' for this array."""
277 |         # Objects that have a bogus '__array__' implementation fool
278 |         # 'buff[:] = xs', so we need to manually fill the array.
279 |         buff = np.empty((self.size,), dtype=object)
280 |         for i, x in enumerate(self._fetch_buffer(local_files)):
281 |             buff[i] = x
282 |         buff = buff.reshape(self.shape)
283 |         return np.ma.array(buff, mask=self.mask)
284 | 
285 |     @property
286 |     def shape(self) -> Tuple[int, ...]:
287 |         """Shape of this remote array."""
288 |         return tuple(self.get_attribute("shape"))
289 | 
290 |     @property
291 |     def is_masked(self) -> bool:
292 |         """Return True if some elements of the array are 'masked' (missing)."""
293 |         return np.any(self.mask)
294 | 
295 |     @property
296 |     def mask(self) -> np.ndarray:
297 |         """Return the mask for the missing elements of the array."""
298 |         existing_files = set(
299 |             v["name"] for v in self.listdir_withattributes() if not v["isdir"]
300 |         )
301 |         return np.array(
302 |             [self._file(i) not in existing_files for i in range(self.size)],
303 |             dtype=bool,
304 |         ).reshape(self.shape)
305 | 
306 |     @property
307 |     def size(self) -> int:
308 |         """Size of this remote array (product of the shape)."""
309 |         return toolz.reduce(operator.mul, self.shape, 1)
310 | 
311 | 
312 | class PyArray(PyData):
313 |     """Wrapper around PyData for storing a single array."""
314 | 
315 |     def __init__(self, **kwargs):
316 |         array = np.asarray(kwargs.pop("array"))
317 |         with tempfile.NamedTemporaryFile() as handle:
318 |             cloudpickle.dump(array, handle)
319 |             handle.flush()
320 |             handle.seek(0)
321 |             super().__init__(pickle_path=handle.name, **kwargs)
322 |         self.set_attribute("shape", array.shape)
323 |         self.set_attribute("dtype", str(array.dtype))
324 |         self._cached = None
325 | 
326 |     @property
327 |     def shape(self) -> Tuple[int, ...]:
328 |         """Shape of this remote array."""
329 |         return tuple(self.get_attribute("shape"))
330 | 
331 |     @property
332 |     def dtype(self) -> Tuple[int, ...]:
333 |         """Shape of this remote array."""
334 |         return np.dtype(self.get_attribute("dtype"))
335 | 
336 |     @property
337 |     def size(self) -> int:
338 |         """Size of this remote array (product of the shape)."""
339 |         return toolz.reduce(operator.mul, self.shape, 1)
340 | 
341 |     def get_array(self) -> np.ndarray:
342 |         """Return the array."""
343 |         return self.value
344 | 
345 | 
346 | class PyException(aiida.orm.Data):
347 |     """Aiida representation of a Python exception."""
348 | 
349 |     # - Exception type
350 |     # - message
351 |     # - traceback
352 |     ...
353 | 
354 | 
355 | # Register automatic conversion from lists and numpy arrays
356 | # to the appropriate Aiida datatypes
357 | 
358 | 
359 | @aiida.orm.to_aiida_type.register(type(None))
360 | def _(_: None):
361 |     return Nil()
362 | 
363 | 
364 | # Aiida Lists can only handle built-in types, which is not general
365 | # enough for our purposes. We therefore convert Python lists into
366 | # 1D PyArray types with 'object' dtype.
367 | @aiida.orm.to_aiida_type.register(list)
368 | def _(xs: list):
369 |     arr = np.empty((len(xs),), dtype=object)
370 |     # Objects that have a bogus '__array__' implementation fool
371 |     # 'arr[:] = xs', so we need to manually fill the array.
372 |     for i, x in enumerate(xs):
373 |         arr[i] = x
374 |     return PyArray(array=arr)
375 | 
376 | 
377 | @aiida.orm.to_aiida_type.register(np.ndarray)
378 | def _(x):
379 |     return PyArray(array=x)
380 | 
381 | 
382 | def ensure_aiida_type(x: Any) -> aiida.orm.Data:
383 |     """Return a new Aiida value containing 'x', if not already of an Aiida datatype.
384 | 
385 |     If 'x' is already an Aiida datatype, then return 'x'.
386 |     """
387 |     if isinstance(x, aiida.orm.Data):
388 |         return x
389 |     else:
390 |         r = aiida.orm.to_aiida_type(x)
391 |         if not isinstance(r, aiida.orm.Data):
392 |             raise RuntimeError(
393 |                 "Expected 'to_aiida_type' to return an Aiida data node, but "
394 |                 f"got an object of type '{type(r)}' instead (when passed "
395 |                 f"an object of type '{type(x)}')."
396 |             )
397 |         return r
398 | 
399 | 
400 | # Register handlers for getting native Python objects from their
401 | # Aiida equivalents
402 | 
403 | 
404 | @functools.singledispatch
405 | def from_aiida_type(x):
406 |     """Turn Aiida types into their corresponding native Python types."""
407 |     raise TypeError(f"Do not know how to convert {type(x)} to native Python type")
408 | 
409 | 
410 | @from_aiida_type.register(Nil)
411 | def _(_):
412 |     return None
413 | 
414 | 
415 | @from_aiida_type.register(aiida.orm.BaseType)
416 | def _(x):
417 |     return x.value
418 | 
419 | 
420 | @from_aiida_type.register(PyData)
421 | def _(x):
422 |     return x.value
423 | 
424 | 
425 | @from_aiida_type.register(PyArray)
426 | def _(x):
427 |     return x.get_array()
428 | 
429 | 
430 | # Register handlers for figuring out array shapes for different datatypes
431 | 
432 | 
433 | @functools.singledispatch
434 | def array_shape(x) -> Tuple[int, ...]:
435 |     """Return the shape of 'x'."""
436 |     try:
437 |         return tuple(map(int, x.shape))
438 |     except AttributeError:
439 |         raise TypeError(f"No array shape defined for type {type(x)}")
440 | 
441 | 
442 | @array_shape.register(aiida.orm.List)
443 | def _(x):
444 |     return (len(x),)
445 | 
446 | 
447 | # Register handlers for figuring out array masks for different datatypes
448 | 
449 | 
450 | @functools.singledispatch
451 | def array_mask(x) -> np.ndarray:
452 |     """Return the mask applied to 'x'."""
453 |     try:
454 |         return x.mask
455 |     except AttributeError:
456 |         raise TypeError(f"No array mask defined for type {type(x)}")
457 | 
458 | 
459 | @array_mask.register(aiida.orm.List)
460 | def _(x):
461 |     return np.full((len(x),), False)
462 | 
463 | 
464 | @array_mask.register(PyArray)
465 | @array_mask.register(np.ndarray)
466 | def _(x):
467 |     return np.full(x.shape, False)
468 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/engine.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | from collections.abc import Mapping
  8 | import copy
  9 | from dataclasses import dataclass
 10 | import os
 11 | import sys
 12 | from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 13 | 
 14 | import aiida.engine
 15 | import aiida.orm
 16 | import toolz
 17 | 
 18 | from .calculations import PyCalcJob, PyMapJob, array_job_spec
 19 | from .common import MapSpec
 20 | from .data import PyFunction, ensure_aiida_type
 21 | from .workchains import RestartedPyCalcJob, RestartedPyMapJob
 22 | 
 23 | __all__ = ["apply", "map_"]
 24 | 
 25 | 
 26 | @dataclass(frozen=True)
 27 | class ExecutionEnvironment:
 28 |     """An execution environment in which to run a PyFunction as a PyCalcJob."""
 29 | 
 30 |     code_label: str
 31 |     computer_label: str
 32 |     queue: Optional[Tuple[str, int]] = None
 33 | 
 34 |     @property
 35 |     def code(self):
 36 |         return aiida.orm.load_code("@".join((self.code_label, self.computer_label)))
 37 | 
 38 |     @property
 39 |     def computer(self):
 40 |         return aiida.orm.load_computer(self.computer_label)
 41 | 
 42 | 
 43 | def code_from_conda_env(conda_env: str, computer_name: str) -> aiida.orm.Code:
 44 |     """Create AiiDA Code for python interpreter from conda environment."""
 45 |     c = aiida.orm.load_computer(computer_name)
 46 |     with c.get_transport() as t:
 47 |         username = t.whoami()
 48 |         try:
 49 |             conda_dir = c.get_property("conda_dir").format(username=username)
 50 |         except AttributeError:
 51 |             raise RuntimeError(f"'conda_dir' is not set for {computer_name}.")
 52 | 
 53 |         conda_initscript = os.path.join(conda_dir, "etc", "profile.d", "conda.sh")
 54 |         python_path = os.path.join(conda_dir, "envs", conda_env, "bin", "python")
 55 | 
 56 |         prepend_text = "\n".join(
 57 |             [f"source {conda_initscript}", f"conda activate {conda_env}"]
 58 |         )
 59 | 
 60 |         r, _stdout, stderr = t.exec_command_wait(prepend_text)
 61 | 
 62 |         if r != 0:
 63 |             raise RuntimeError(
 64 |                 f"Failed to find Conda environment '{conda_env}' on '{computer_name}':"
 65 |                 f"\n{stderr}"
 66 |             )
 67 | 
 68 |     code = aiida.orm.Code((c, python_path), label=conda_env)
 69 |     code.set_prepend_text(prepend_text)
 70 |     code.store()
 71 |     return code
 72 | 
 73 | 
 74 | def current_conda_environment() -> str:
 75 |     """Return current conda environment name."""
 76 |     # from https://stackoverflow.com/a/57716519/3447047
 77 |     return sys.exec_prefix.split(os.sep)[-1]
 78 | 
 79 | 
 80 | def execution_environment(conda_env: Optional[str], computer: str, queue=None):
 81 |     if conda_env is None:
 82 |         conda_env = current_conda_environment()
 83 |     code_id = "@".join([conda_env, computer])
 84 |     try:
 85 |         aiida.orm.load_code(code_id)
 86 |     except aiida.common.NotExistent:
 87 |         code = code_from_conda_env(conda_env, computer)
 88 |         code.store()
 89 | 
 90 |     if queue and (queue[0] not in get_queues(computer)):
 91 |         raise ValueError(f"Queue '{queue[0]}' does not exist on '{computer}'")
 92 | 
 93 |     return ExecutionEnvironment(conda_env, computer, queue)
 94 | 
 95 | 
 96 | def get_queues(computer_name) -> List[str]:
 97 |     """Return a list of valid queue names for the named computer."""
 98 |     computer = aiida.orm.load_computer(computer_name)
 99 |     with computer.get_transport() as t:
100 |         command = "sinfo --summarize"
101 |         retval, stdout, stderr = t.exec_command_wait(command)
102 |         if retval != 0:
103 |             raise RuntimeError(
104 |                 f"'{command}' failed on on '{computer_name}' "
105 |                 f"with exit code {retval}: {stderr}"
106 |             )
107 |         _, *lines = stdout.splitlines()
108 |         return [line.split(" ")[0] for line in lines]
109 | 
110 | 
111 | def local_current_execution_environment() -> ExecutionEnvironment:
112 |     return execution_environment(None, "localhost")
113 | 
114 | 
115 | class ProcessBuilder(aiida.engine.ProcessBuilder):
116 |     """ProcessBuilder that is serializable."""
117 | 
118 |     def on(
119 |         self, env: ExecutionEnvironment, max_concurrent_machines: Optional[int] = None
120 |     ) -> ProcessBuilder:
121 |         """Return a new ProcessBuilder, setting it up for execution on 'env'."""
122 |         r = copy.deepcopy(self)
123 | 
124 |         r.code = env.code
125 | 
126 |         if env.queue is not None:
127 |             queue_name, cores_per_machine = env.queue
128 |             r.metadata.options.queue_name = queue_name
129 | 
130 |         if issubclass(r.process_class, (PyMapJob, RestartedPyMapJob)):
131 |             # NOTE: We are using a feature of the scheduler (Slurm in our case) to
132 |             #       use array jobs. We could probably figure a way to do this with
133 |             #       the 'direct' scheduler (GNU parallel or sth), but that is out
134 |             #       of scope for now.
135 |             if env.computer.scheduler_type != "dynamic_workflows.slurm":
136 |                 raise NotImplementedError(
137 |                     "Mapping is currently only supported in an environment that "
138 |                     f"supports Slurm array jobs, but {env.computer.label} is "
139 |                     f" configured to use '{env.computer.scheduler_type}'."
140 |                 )
141 | 
142 |             if env.queue is None:
143 |                 raise ValueError(
144 |                     "A queue specification (e.g. ('my-queue', 24) ) is required"
145 |                 )
146 | 
147 |             r.metadata.options.cores_per_machine = cores_per_machine
148 | 
149 |             if max_concurrent_machines is not None:
150 |                 r.metadata.options.max_concurrent_machines = max_concurrent_machines
151 | 
152 |         return r
153 | 
154 |     def finalize(self, **kwargs) -> ProcessBuilder:
155 |         """Return a new ProcessBuilder, setting its 'kwargs' to those provided."""
156 |         r = copy.deepcopy(self)
157 |         r.kwargs = toolz.valmap(ensure_aiida_type, kwargs)
158 | 
159 |         opts = r.metadata.options
160 | 
161 |         custom_scheduler_commands = ["#SBATCH --requeue"]
162 | 
163 |         if issubclass(r.process_class, (PyMapJob, RestartedPyMapJob)):
164 |             mapspec = MapSpec.from_string(opts.mapspec)
165 |             mapped_kwargs = {
166 |                 k: v for k, v in r.kwargs.items() if k in mapspec.parameters
167 |             }
168 | 
169 |             cores_per_job = opts.resources.get(
170 |                 "num_cores_per_mpiproc", 1
171 |             ) * opts.resources.get("num_mpiprocs_per_machine", 1)
172 |             jobs_per_machine = opts.cores_per_machine // cores_per_job
173 |             max_concurrent_jobs = jobs_per_machine * opts.max_concurrent_machines
174 | 
175 |             task_spec = array_job_spec(mapspec, mapped_kwargs)
176 |             # NOTE: This assumes that we are running on Slurm.
177 |             custom_scheduler_commands.append(
178 |                 f"#SBATCH --array={task_spec}%{max_concurrent_jobs}"
179 |             )
180 | 
181 |         opts.custom_scheduler_commands = "\n".join(custom_scheduler_commands)
182 | 
183 |         return r
184 | 
185 |     def with_restarts(self, max_restarts: int) -> ProcessBuilder:
186 |         """Return a new builder for a RestartedPyCalcJob or RestartedPyMapJob."""
187 |         if issubclass(self.process_class, (PyMapJob, RestartedPyMapJob)):
188 |             r = ProcessBuilder(RestartedPyMapJob)
189 |         elif issubclass(self.process_class, (PyCalcJob, RestartedPyCalcJob)):
190 |             r = ProcessBuilder(RestartedPyCalcJob)
191 |         else:
192 |             raise TypeError(f"Do not know how to add restarts to {self.process_class}")
193 |         _copy_builder_contents(to=r, frm=self)
194 |         r.metadata.options.max_restarts = max_restarts
195 |         return r
196 | 
197 |     # XXX: This is a complete hack to be able to serialize "Outline".
198 |     #      We should think this through more carefully when we come to refactor.
199 | 
200 |     def __getstate__(self):
201 |         def serialized_aiida_nodes(x):
202 |             if isinstance(x, aiida.orm.Data):
203 |                 if not x.is_stored:
204 |                     x.store()
205 |                 return _AiidaData(x.uuid)
206 |             else:
207 |                 return x
208 | 
209 |         serialized_data = traverse_mapping(serialized_aiida_nodes, self._data)
210 |         return self._process_class, serialized_data
211 | 
212 |     def __setstate__(self, state):
213 |         process_class, serialized_data = state
214 |         self.__init__(process_class)
215 | 
216 |         def deserialize_aiida_nodes(x):
217 |             if isinstance(x, _AiidaData):
218 |                 return aiida.orm.load_node(x.uuid)
219 |             else:
220 |                 return x
221 | 
222 |         deserialized_data = traverse_mapping(deserialize_aiida_nodes, serialized_data)
223 | 
224 |         for k, v in deserialized_data.items():
225 |             if isinstance(v, Mapping):
226 |                 getattr(self, k)._update(v)
227 |             else:
228 |                 setattr(self, k, v)
229 | 
230 | 
231 | # XXX: This is part of the __getstate__/__setstate__ hack for our custom ProcessBuilder
232 | @dataclass(frozen=True)
233 | class _AiidaData:
234 |     uuid: str
235 | 
236 | 
237 | def _copy_builder_contents(
238 |     to: aiida.engine.ProcessBuilderNamespace,
239 |     frm: aiida.engine.ProcessBuilderNamespace,
240 | ):
241 |     """Recursively copy the contents of 'frm' into 'to'.
242 | 
243 |     This mutates 'to'.
244 |     """
245 |     for k, v in frm.items():
246 |         if isinstance(v, aiida.engine.ProcessBuilderNamespace):
247 |             _copy_builder_contents(to[k], v)
248 |         else:
249 |             setattr(to, k, v)
250 | 
251 | 
252 | def traverse_mapping(f: Callable[[Any], Any], d: Mapping):
253 |     """Traverse a nested Mapping, applying 'f' to all non-mapping values."""
254 |     return {
255 |         k: traverse_mapping(f, v) if isinstance(v, Mapping) else f(v)
256 |         for k, v in d.items()
257 |     }
258 | 
259 | 
260 | def apply(f: PyFunction, *, max_restarts: int = 1, **kwargs) -> ProcessBuilder:
261 |     """Apply f to **kwargs as a PyCalcJob or RestartedPyCalcJob.
262 | 
263 |     Parameters
264 |     ----------
265 |     f
266 |         The function to apply
267 |     max_restarts
268 |         The number of times to run 'f'. If >1 then a builder
269 |         for a RestartedPyCalcJob is returned, otherwise
270 |         a builder for a PyCalcJob is returned.
271 |     **kwargs
272 |         Keyword arguments to pass to 'f'. Will be converted
273 |         to Aiida types using "aiida.orm.to_aiida_type" if
274 |         not already a subtype of "aiida.orm.Data".
275 |     """
276 |     # TODO: check that 'f' applies cleanly to '**kwargs'
277 |     if max_restarts > 1:
278 |         builder = ProcessBuilder(RestartedPyCalcJob)
279 |         builder.metadata.options.max_restarts = int(max_restarts)
280 |     else:
281 |         builder = ProcessBuilder(PyCalcJob)
282 | 
283 |     builder.func = f
284 |     builder.metadata.label = f.name
285 |     if kwargs:
286 |         builder.kwargs = toolz.valmap(ensure_aiida_type, kwargs)
287 |     if f.resources:
288 |         _apply_pyfunction_resources(f.resources, builder.metadata.options)
289 |     return builder
290 | 
291 | 
292 | def apply_some(f: PyFunction, *, max_restarts: int = 1, **kwargs) -> ProcessBuilder:
293 |     """Apply f to **kwargs as a PyCalcJob or RestartedPyCalcJob.
294 | 
295 |     'kwargs' may contain _more_ inputs than what 'f' requires: extra
296 |     inputs are ignored.
297 | 
298 |     Parameters
299 |     ----------
300 |     f
301 |         The function to apply
302 |     max_restarts
303 |         The number of times to run 'f'. If >1 then a builder
304 |         for a RestartedPyCalcJob is returned, otherwise
305 |         a builder for a PyCalcJob is returned.
306 |     **kwargs
307 |         Keyword arguments to pass to 'f'. Will be converted
308 |         to Aiida types using "aiida.orm.to_aiida_type" if
309 |         not already a subtype of "aiida.orm.Data".
310 |     """
311 |     if max_restarts > 1:
312 |         builder = ProcessBuilder(RestartedPyCalcJob)
313 |         builder.metadata.options.max_restarts = int(max_restarts)
314 |     else:
315 |         builder = ProcessBuilder(PyCalcJob)
316 | 
317 |     builder.func = f
318 |     builder.metadata.label = f.name
319 |     relevant_kwargs = toolz.keyfilter(lambda k: k in f.parameters, kwargs)
320 |     if relevant_kwargs:
321 |         builder.kwargs = toolz.valmap(ensure_aiida_type, relevant_kwargs)
322 |     if f.resources:
323 |         _apply_pyfunction_resources(f.resources, builder.metadata.options)
324 |     return builder
325 | 
326 | 
327 | def map_(
328 |     f: PyFunction,
329 |     spec: Union[str, MapSpec],
330 |     *,
331 |     max_concurrent_machines: Optional[int] = None,
332 |     max_restarts: int = 1,
333 |     **kwargs,
334 | ) -> aiida.engine.ProcessBuilder:
335 |     """Map 'f' over (a subset of) its inputs as a PyMapJob.
336 | 
337 |     Parameters
338 |     ----------
339 |     f
340 |         Function to map over
341 |     spec
342 |         Specification for which parameters to map over, and how to map them.
343 |     max_concurrent_machines
344 |         The maximum number of machines to use concurrently.
345 |     max_restarts
346 |         The maximum number of times to restart the PyMapJob before returning
347 |         a partial (masked) result and a non-zero exit code.
348 |     **kwargs
349 |         Keyword arguments to 'f'. Any arguments that are to be mapped over
350 |         must by Aiida lists.
351 | 
352 |     Examples
353 |     --------
354 |     >>> from aiida.orm import List
355 |     >>> import aiida_dynamic_workflows as flow
356 |     >>>
357 |     >>> f = flow.step(lambda x, y: x + y, returns="sum")
358 |     >>>
359 |     >>> # We can map over _all_ inputs
360 |     >>> sums = flow.engine.map_(
361 |     ...     f, "x[i], y[i] -> sum[i]", x=List([1, 2, 3]), y=List([4, 5, 6])
362 |     ... )
363 |     >>> # or we can map over a _subset_ of inputs
364 |     >>> only_one = flow.engine.map_(f, "x[i] -> sum[i]", x=List([1, 2, 3]), y=5)
365 |     >>> # or we can do an "outer product":
366 |     >>> outer= flow.engine.map_(
367 |     ...     f, "x[i], y[j] -> sum[i, j]", x=List([1, 2, 3]), y=List([4, 5, 6])
368 |     ... )
369 |     """
370 |     if max_restarts > 1:
371 |         builder = ProcessBuilder(RestartedPyMapJob)
372 |         builder.metadata.options.max_restarts = int(max_restarts)
373 |     else:
374 |         builder = ProcessBuilder(PyMapJob)
375 | 
376 |     builder.func = f
377 |     builder.metadata.label = f.name
378 | 
379 |     if isinstance(spec, str):
380 |         spec = MapSpec.from_string(spec)
381 |     elif not isinstance(spec, MapSpec):
382 |         raise TypeError(f"Expected single string or MapSpec, got {spec}")
383 |     if unknown_params := set(x.name for x in spec.inputs) - set(f.parameters):
384 |         raise ValueError(
385 |             f"{f} cannot be mapped over parameters that "
386 |             f"it does not take: {unknown_params}"
387 |         )
388 |     builder.metadata.options.mapspec = spec.to_string()
389 | 
390 |     if max_concurrent_machines is not None:
391 |         builder.metadata.options.max_concurrent_machines = max_concurrent_machines
392 | 
393 |     if f.resources:
394 |         _apply_pyfunction_resources(f.resources, builder.metadata.options)
395 | 
396 |     if not kwargs:
397 |         return builder
398 | 
399 |     return builder.finalize(**kwargs)
400 | 
401 | 
402 | def _apply_pyfunction_resources(
403 |     resources: Dict, options: aiida.engine.ProcessBuilderNamespace
404 | ) -> None:
405 |     """Apply the resource specification in 'resources' to the CalcJob options 'options'.
406 | 
407 |     This mutates 'options'.
408 |     """
409 |     memory = resources.get("memory")
410 |     if memory is not None:
411 |         # The Aiida Slurm plugin erroneously uses the multiplyer "1024" when converting
412 |         # to MegaBytes and passing to "--mem", so we must use it here also.
413 |         multiplier = {"kB": 1, "MB": 1024, "GB": 1000 * 1024}
414 |         amount, unit = memory[:-2], memory[-2:]
415 |         options.max_memory_kb = int(amount) * multiplier[unit]
416 | 
417 |     cores = resources.get("cores")
418 |     if cores is not None:
419 |         # Re-assign the whole 'resources' input dict to avoid problems with
420 |         # serialization (also, mutating it seems to change the 'resources' for
421 |         # all other Builders, which is not good!).
422 |         options.resources = toolz.assoc(
423 |             options.resources, "num_cores_per_mpiproc", int(cores)
424 |         )
425 | 
426 | 
427 | def all_equal(seq):
428 |     """Return True iff all elements of 'seq' are equal.
429 | 
430 |     Returns 'True' if the sequence contains 0 or 1 elements.
431 |     """
432 |     seq = list(seq)
433 |     if len(seq) in (0, 1):
434 |         return True
435 |     fst, *rest = seq
436 |     return all(r == fst for r in rest)
437 | 


--------------------------------------------------------------------------------
/aiida_dynamic_workflows/workflow.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import abc
  8 | import copy
  9 | from dataclasses import dataclass, replace
 10 | from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
 11 | 
 12 | import aiida.engine
 13 | import graphviz
 14 | import toolz
 15 | 
 16 | from . import common, engine
 17 | from .calculations import PyCalcJob, PyMapJob
 18 | from .data import PyFunction, PyOutline, ensure_aiida_type
 19 | from .utils import render_png
 20 | 
 21 | # TODO: this will all need to be refactored when we grok
 22 | #       Aiida's 'Process' and 'Port' concepts.
 23 | 
 24 | 
 25 | class Step(metaclass=abc.ABCMeta):
 26 |     """Abstract base class for steps."""
 27 | 
 28 |     pass
 29 | 
 30 | 
 31 | class Single(Step):
 32 |     """A single workflow step."""
 33 | 
 34 |     pass
 35 | 
 36 | 
 37 | class Action(Single):
 38 |     """Step that will be run with the current workchain passed as argument."""
 39 | 
 40 |     def do(self, workchain):
 41 |         """Do the action on the workchain."""
 42 |         pass
 43 | 
 44 | 
 45 | @dataclass(frozen=True)
 46 | class Concurrent(Step):
 47 |     """Step consisting of several concurrent steps."""
 48 | 
 49 |     steps: List[Step]
 50 | 
 51 | 
 52 | @dataclass(frozen=True)
 53 | class Sequential(Step):
 54 |     """Step consisting of several sequential steps."""
 55 | 
 56 |     steps: List[Step]
 57 | 
 58 | 
 59 | @dataclass(frozen=True)
 60 | class Process(Single):
 61 |     """Step consisting of a single Aiida Process."""
 62 | 
 63 |     builder: aiida.engine.ProcessBuilder
 64 |     parameters: Tuple[str]
 65 |     returns: Tuple[str]
 66 | 
 67 |     def __str__(self):
 68 |         kind = self.builder.process_class
 69 |         if issubclass(kind, PyCalcJob):
 70 |             func = self.builder.func
 71 |             return f"{kind.__name__}[{func.name}(pk: {func.pk})]"
 72 |         else:
 73 |             return kind.__name__
 74 | 
 75 | 
 76 | @dataclass(frozen=True)
 77 | class OutputAction(Action):
 78 |     """Action step that outputs values from the workflow context."""
 79 | 
 80 |     outputs: Dict[str, str]
 81 | 
 82 |     def do(self, workchain):
 83 |         """Return the named outputs from this workflow."""
 84 |         for from_name, to_name in self.outputs.items():
 85 |             if from_name in workchain.ctx:
 86 |                 workchain.out(f"return_values.{to_name}", workchain.ctx[from_name])
 87 |             else:
 88 |                 workchain.report(
 89 |                     f"Failed to set output '{to_name}': '{from_name}' "
 90 |                     "does not exist on the workchain context (did "
 91 |                     "the step that produces this output fail?)"
 92 |                 )
 93 | 
 94 | 
 95 | class PyAction(Action):
 96 |     """Action step defined by a PyFunction."""
 97 | 
 98 |     action: PyFunction
 99 | 
100 |     def do(self, workchain):
101 |         """Do the action on the workchain."""
102 |         self.action(workchain)
103 | 
104 | 
105 | def single_steps(step: Step) -> Iterator[Single]:
106 |     """Yield all Single steps in a given step."""
107 |     if isinstance(step, Single):
108 |         yield step
109 |     elif isinstance(step, (Concurrent, Sequential)):
110 |         yield from toolz.mapcat(single_steps, step.steps)
111 |     else:
112 |         assert False, f"Unknown step type {type(step)}"
113 | 
114 | 
115 | def single_processes(step: Step) -> Iterator[Process]:
116 |     """Yield all Process steps in a given step."""
117 |     return filter(lambda s: isinstance(s, Process), single_steps(step))
118 | 
119 | 
120 | def _check_valid_pyfunction(f: PyFunction):
121 |     """Check that the provided PyFunction may be used as part of a workflow."""
122 |     if not isinstance(f, PyFunction):
123 |         raise TypeError()
124 |     if any(r.startswith("_") for r in f.returns):
125 |         raise ValueError(
126 |             "Cannot use functions with return names containing underscores "
127 |             "in workflows."
128 |         )
129 |     if set(f.parameters).intersection(f.returns):
130 |         raise ValueError(
131 |             "Function has outputs that are named identically to its input(s)."
132 |         )
133 | 
134 | 
135 | def _check_pyfunctions_compatible(a: PyFunction, b: PyFunction):
136 |     """Check that Pyfunction 'b' has enough inputs/outputs to be compatible with 'a'."""
137 |     _check_valid_pyfunction(a)
138 |     _check_valid_pyfunction(b)
139 |     if missing_parameters := set(a.parameters) - set(b.parameters):
140 |         raise ValueError(f"'{b.name}' is missing parameters: {missing_parameters}")
141 |     if missing_returns := set(a.returns) - set(b.returns):
142 |         raise ValueError(f"'{b.name}' is missing return values: {missing_returns}")
143 | 
144 | 
145 | def from_pyfunction(f: PyFunction) -> Step:
146 |     """Construct a Step corresponding to applying a PyFunction."""
147 |     _check_valid_pyfunction(f)
148 |     return Process(
149 |         builder=engine.apply(f),
150 |         parameters=f.parameters,
151 |         returns=f.returns,
152 |     )
153 | 
154 | 
155 | def map_(f: PyFunction, *args, **kwargs) -> Step:
156 |     """Construct a Step corresponding to mapping a PyFunction.
157 | 
158 |     Parameters
159 |     ----------
160 |     *args, **kwargs
161 |         Positional/keyword arguments to pass to 'aiida_dynamic_workflows.engine.map_'.
162 | 
163 |     See Also
164 |     --------
165 |     aiida_dynamic_workflows.engine.map_
166 |     """
167 |     _check_valid_pyfunction(f)
168 |     return Process(
169 |         builder=engine.map_(f, *args, **kwargs),
170 |         parameters=f.parameters,
171 |         returns=f.returns,
172 |     )
173 | 
174 | 
175 | def concurrently(*fs: Union[PyFunction, Step]) -> Step:
176 |     """Construct a Step for several tasks executing concurrently."""
177 |     if len(fs) < 2:
178 |         raise ValueError("Expected at least 2 steps")
179 | 
180 |     for i, f in enumerate(fs):
181 |         for g in fs[i + 1 :]:
182 |             if set(f.returns).intersection(g.returns):
183 |                 raise ValueError("Steps return values that are named the same")
184 | 
185 |     returns = [set(f.returns) for f in fs]
186 | 
187 |     parameters = [set(f.parameters) for f in fs]
188 |     if any(a.intersection(b) for a in parameters for b in returns):
189 |         raise ValueError("Steps cannot be run concurrently")
190 | 
191 |     def ensure_single(f):
192 |         if isinstance(f, PyFunction):
193 |             return from_pyfunction(f)
194 |         elif isinstance(f, Single):
195 |             return f
196 |         else:
197 |             raise TypeError(f"Expected PyFunction or Single, got {type(f)}")
198 | 
199 |     return Concurrent([ensure_single(f) for f in fs])
200 | 
201 | 
202 | def new_workflow(name: str) -> Outline:
203 |     """Return an Outline with no steps , and the given name."""
204 |     return Outline(steps=(), label=name)
205 | 
206 | 
207 | def first(s: Union[PyFunction, Step]) -> Outline:
208 |     """Return an Outline consisting of a single Step."""
209 |     return Outline(steps=(ensure_step(s),))
210 | 
211 | 
212 | def ensure_step(s: Union[Step, PyFunction]) -> Step:
213 |     """Return a Step, given a Step or a PyFunction."""
214 |     if isinstance(s, Step):
215 |         return s
216 |     elif isinstance(s, PyFunction):
217 |         return from_pyfunction(s)
218 |     elif isinstance(s, Outline):
219 |         return Sequential(s.steps)
220 |     else:
221 |         raise TypeError(f"Expected PyFunction, Step, or Outline, got {type(s)}")
222 | 
223 | 
224 | def output(*names: str, **mappings: str) -> OutputAction:
225 |     """Return an OutputAction that can be used in an outline."""
226 |     outputs = {name: name for name in names}
227 |     outputs.update({from_: to_ for from_, to_ in mappings.items()})
228 | 
229 |     return OutputAction(outputs)
230 | 
231 | 
232 | @dataclass(frozen=True)
233 | class Outline:
234 |     """Outline of the steps to be executed.
235 | 
236 |     Each step kicks off either a _single_ process, or several processes
237 |     concurrently.
238 |     """
239 | 
240 |     steps: Tuple[Step]
241 |     #: Sequence of steps constituting the workflow
242 |     label: Optional[str] = None
243 |     #: Optional label identifying the workflow
244 | 
245 |     def rename(self, name: str) -> Outline:
246 |         """Return a new outline with a new name."""
247 |         return replace(self, label=name)
248 | 
249 |     def then(self, step: Union[PyFunction, Step, Outline]) -> Outline:
250 |         """Add the provided Step to the outline.
251 | 
252 |         If a PyFunction is provided it is added as a single step.
253 |         """
254 |         return replace(self, steps=self.steps + (ensure_step(step),))
255 | 
256 |     def join(self, other: Outline) -> Outline:
257 |         """Return a new outline consisting of this and 'other' joined together."""
258 |         return replace(self, steps=self.steps + other.steps)
259 | 
260 |     def returning(self, *names, **mappings) -> Outline:
261 |         """Return the named values from this workflow."""
262 |         possible_names = self.parameters.union(self.all_outputs)
263 |         existing_names = self.returns
264 |         requested_names = set(names).union(mappings.keys())
265 | 
266 |         if invalid_names := requested_names - possible_names:
267 |             raise ValueError(
268 |                 f"Cannot return any of {invalid_names}; "
269 |                 "they do not appear in this outline."
270 |             )
271 | 
272 |         if already_returned := requested_names.intersection(existing_names):
273 |             raise ValueError(
274 |                 "The following names are already returned "
275 |                 f"by this outline: {already_returned}."
276 |             )
277 | 
278 |         return replace(self, steps=self.steps + (output(*names, **mappings),))
279 | 
280 |     @property
281 |     def _single_processes(self) -> Iterator[Process]:
282 |         for step in self.steps:
283 |             yield from single_processes(step)
284 | 
285 |     @property
286 |     def _single_steps(self) -> Iterator[Single]:
287 |         for step in self.steps:
288 |             yield from single_steps(step)
289 | 
290 |     @property
291 |     def parameters(self) -> Set[str]:
292 |         """Parameters of the Outline."""
293 |         raw_parameters = toolz.reduce(
294 |             set.union,
295 |             (s.parameters for s in self._single_processes),
296 |             set(),
297 |         )
298 |         return raw_parameters - self.all_outputs
299 | 
300 |     @property
301 |     def returns(self) -> Set[str]:
302 |         """Values returned by this Outline."""
303 |         ret = set()
304 |         for step in self._single_steps:
305 |             if isinstance(step, OutputAction):
306 |                 ret.update(step.outputs.values())
307 |         return ret
308 | 
309 |     @property
310 |     def all_outputs(self) -> Set[str]:
311 |         """All outputs of this outline."""
312 |         return toolz.reduce(
313 |             set.union,
314 |             (s.returns for s in self._single_processes),
315 |             set(),
316 |         )
317 | 
318 |     def visualize(self, as_png=False) -> Union[graphviz.Digraph]:
319 |         """Return a Graphviz visualization of this outline."""
320 |         g = graphviz.Digraph(graph_attr=dict(rankdir="LR"))
321 | 
322 |         mapped_inputs = set()
323 | 
324 |         for proc in self._single_processes:
325 |             proc_id = str(id(proc))
326 |             is_mapjob = issubclass(proc.builder.process_class, PyMapJob)
327 | 
328 |             opts = dict(shape="rectangle")
329 |             output_opts = dict()
330 |             if is_mapjob:
331 |                 for d in (opts, output_opts):
332 |                     d["style"] = "filled"
333 |                     d["fillcolor"] = "#ffaaaaaa"
334 | 
335 |             g.node(proc_id, label=proc.builder.func.name, **opts)
336 | 
337 |             if is_mapjob:
338 |                 spec = common.MapSpec.from_string(proc.builder.metadata.options.mapspec)
339 |                 for p in spec.parameters:
340 |                     mapped_inputs.add(p)
341 |                     g.node(p, **output_opts)
342 | 
343 |             for r in proc.returns:
344 |                 g.node(r, **output_opts)
345 |                 g.edge(proc_id, r)
346 | 
347 |         for p in self.parameters - mapped_inputs:
348 |             g.node(p, style="filled", fillcolor="#aaaaaa")
349 | 
350 |         for proc in self._single_processes:
351 |             proc_id = str(id(proc))
352 |             for p in proc.parameters:
353 |                 g.edge(p, proc_id)
354 |         if as_png:
355 |             return render_png(g)
356 |         return g
357 | 
358 |     def traverse(self, f: Callable[[Single], Single]) -> Outline:
359 |         """Return a copy of this Outline, with 'f' applied to all Single steps."""
360 | 
361 |         def transform(x: Step) -> Step:
362 |             if isinstance(x, Single):
363 |                 return f(x)
364 |             elif isinstance(x, (Concurrent, Sequential)):
365 |                 return type(x)(steps=tuple(map(transform, x.steps)))
366 |             else:
367 |                 raise TypeError(f"Unknown step type {type(x)}")
368 | 
369 |         return replace(self, steps=tuple(map(transform, self.steps)))
370 | 
371 |     def with_restarts(self, step_restarts: Dict[PyFunction, int]) -> Outline:
372 |         """Return a copy of this Outline with restarts added to all specified steps.
373 | 
374 |         Examples
375 |         --------
376 |         >>> # Set up the original flow
377 |         >>> import aiida_dynamic_workflows as flows
378 |         >>> a = flows.step(lambda x, y: x + y, returning="z")
379 |         >>> b = flows.step(lambda z: 2 * z)
380 |         >>> flow = flows.workflow.first(a).then(b)
381 |         >>> # Apply restarts: a restarted up to 2 times, b up to 3.
382 |         >>> new_flow = flow.with_restarts({a: 2, b: 3})
383 |         """
384 | 
385 |         def mapper(step):
386 |             try:
387 |                 max_restarts = step_restarts[step.builder.func]
388 |             except (AttributeError, KeyError):
389 |                 return step
390 |             else:
391 |                 return replace(step, builder=step.builder.with_restarts(max_restarts))
392 | 
393 |         return self.traverse(mapper)
394 | 
395 |     def replace_steps(self, step_map: Dict[PyFunction, PyFunction]) -> Outline:
396 |         """Return a copy of this Outline, replacing the step functions specified.
397 | 
398 |         Any steps that are PyCalcJobs or PyMapJobs executing a PyFunction specified
399 |         in 'step_map' will have the function executed replaced by the corresponding
400 |         value in 'step_map'.
401 | 
402 |         See Also
403 |         --------
404 |         traverse
405 | 
406 |         Examples
407 |         --------
408 |         >>> # Set up the original flow
409 |         >>> import aiida_dynamic_workflows as flows
410 |         >>> a = flows.step(lambda x, y: x + y, returning="z")
411 |         >>> b = flows.step(lambda z: 2 * z)
412 |         >>> flow = flows.workflow.first(a).then(b)
413 |         >>> # Create the new steps
414 |         >>> new_a = flows.step(lambda x, y: x * y, returning="z")
415 |         >>> new_b = flows.step(lambda z: 5 * z
416 |         >>> # Replace the old steps with new ones!
417 |         >>> new_flow = flow.replacing_steps({a: new_a, b: new_b})
418 |         """
419 |         for a, b in step_map.items():
420 |             _check_pyfunctions_compatible(a, b)
421 | 
422 |         def mapper(step):
423 |             try:
424 |                 new_func = step_map[step.builder.func]
425 |             except (AttributeError, KeyError):
426 |                 return step
427 |             else:
428 |                 b = copy.deepcopy(step.builder)
429 |                 b.func = new_func
430 |                 return Process(
431 |                     builder=b, parameters=new_func.parameters, returns=new_func.returns
432 |                 )
433 | 
434 |         return self.traverse(mapper)
435 | 
436 |     def on(
437 |         self,
438 |         env: engine.ExecutionEnvironment,
439 |         max_concurrent_machines: Optional[int] = None,
440 |     ) -> Outline:
441 |         """Return a new Outline with the execution environment set for all steps."""
442 | 
443 |         def transform(s: Single):
444 |             if not isinstance(s, Process):
445 |                 return s
446 |             return replace(s, builder=s.builder.on(env, max_concurrent_machines))
447 | 
448 |         return self.traverse(transform)
449 | 
450 | 
451 | # TODO: See if we can come up with a cleaner separation of "logical data flow"
452 | #       and "error handling flow".
453 | 
454 | # TODO: see if we can do this more "directly" with the Aiida/Plumpy
455 | #       "process" interface. As-is we are running our own "virtual machine"
456 | #       on top of Aiida's!.
457 | class PyWorkChain(aiida.engine.WorkChain):
458 |     """WorkChain for executing Outlines."""
459 | 
460 |     @classmethod
461 |     def define(cls, spec):  # noqa: D102
462 |         super().define(spec)
463 |         spec.input("outline", valid_type=PyOutline)
464 |         spec.input_namespace("kwargs", dynamic=True)
465 |         spec.output_namespace("return_values", dynamic=True)
466 |         spec.outline(
467 |             cls.setup,
468 |             aiida.engine.while_(cls.is_not_done)(cls.do_step, cls.check_output),
469 |             cls.finalize,
470 |         )
471 | 
472 |         spec.exit_code(401, "INVALID_STEP", message="Invalid step definition")
473 |         spec.exit_code(
474 |             450, "STEP_RETURNED_ERROR_CODE", message="A step returned an error code"
475 |         )
476 | 
477 |     @classmethod
478 |     def get_builder(cls):  # noqa: D102
479 |         return engine.ProcessBuilder(cls)
480 | 
481 |     # TODO: have the outline persisted into "self.ctx"; this way
482 |     #       we don't need to reload it from the DB on every step.
483 | 
484 |     def setup(self):  # noqa: D102
485 |         """Set up the state for the workchain."""
486 |         outline = self.inputs.outline.value
487 |         self.ctx._this_step = 0
488 |         self.ctx._num_steps = len(outline.steps)
489 |         self.ctx._had_errors = False
490 | 
491 |         if "kwargs" in self.inputs:
492 |             self.ctx.update(self.inputs.kwargs)
493 | 
494 |     def finalize(self):
495 |         """Finalize the workchain."""
496 |         if self.ctx._had_errors:
497 |             return self.exit_codes.STEP_RETURNED_ERROR_CODE
498 | 
499 |     def is_not_done(self) -> bool:
500 |         """Return True when there are no more steps in the workchain."""
501 |         return self.ctx._this_step < self.ctx._num_steps
502 | 
503 |     def do_step(self):
504 |         """Execute the current step in the workchain."""
505 |         this_step = self.ctx._this_step
506 |         self.report(f"doing step {this_step} of {self.ctx._num_steps}")
507 |         step = self.inputs.outline.value.steps[this_step]
508 | 
509 |         if isinstance(step, (Single, Sequential)):
510 |             concurrent_steps = [step]
511 |         elif isinstance(step, Concurrent):
512 |             concurrent_steps = list(step.steps)
513 |         else:
514 |             self.report(f"Unknown step type {type(step)}")
515 |             return self.exit_codes.INVALID_STEP
516 | 
517 |         for s in concurrent_steps:
518 |             self._base_step(s)
519 | 
520 |         self.ctx._this_step += 1
521 | 
522 |     def _base_step(self, s: Step):
523 |         if isinstance(s, Process):
524 |             try:
525 |                 inputs = get_keys(self.ctx, s.parameters)
526 |             except KeyError as err:
527 |                 self.report(f"Skipping step {s} due to missing inputs: {err.args}")
528 |                 self.ctx._had_errors = True
529 |                 return
530 | 
531 |             finalized_builder = s.builder.finalize(**inputs)
532 | 
533 |             fut = self.submit(finalized_builder)
534 |             self.report(f"Submitted {s} (pk: {fut.pk})")
535 |             self.to_context(_futures=aiida.engine.append_(fut))
536 |         elif isinstance(s, Sequential):
537 |             ol = Outline(steps=tuple(s.steps))
538 |             try:
539 |                 inputs = get_keys(self.ctx, ol.parameters)
540 |             except KeyError as err:
541 |                 self.report(f"Skipping step {s} due to missing inputs: {err.args}")
542 |                 self.ctx._had_errors = True
543 |                 return
544 | 
545 |             builder = PyWorkChain.get_builder()
546 |             builder.outline = PyOutline(outline=ol)
547 |             builder.kwargs = inputs
548 |             fut = self.submit(builder)
549 |             self.report(f"Submitted sub-workchain: {fut.pk}")
550 |             self.to_context(_futures=aiida.engine.append_(fut))
551 |         elif isinstance(s, Action):
552 |             return s.do(self)
553 | 
554 |     def check_output(self):
555 |         """Check the output of the current step in the workchain."""
556 |         if "_futures" not in self.ctx:
557 |             return
558 | 
559 |         for step in self.ctx._futures:
560 |             if step.exit_status != 0:
561 |                 self.report(f"Step {step} reported a problem: {step.exit_message}")
562 |                 self.ctx._had_errors = True
563 |             for name, value in return_values(step):
564 |                 self.ctx[name] = value
565 | 
566 |         del self.ctx["_futures"]
567 | 
568 | 
569 | def get_keys(dictionary, keys):
570 |     """Select all keys in 'keys' from 'dictionary'."""
571 |     missing = []
572 |     r = dict()
573 |     for k in keys:
574 |         if k in dictionary:
575 |             r[k] = dictionary[k]
576 |         else:
577 |             missing.append(k)
578 |     if missing:
579 |         raise KeyError(*missing)
580 |     return r
581 | 
582 | 
583 | # XXX: This is all very tightly coupled to the definitions of "PyCalcJob"
584 | #      and "PyMapJob".
585 | def return_values(calc: aiida.orm.ProcessNode):
586 |     """Yield (name, node) tuples of return values of the given ProcessNode.
587 | 
588 |     This assumes an output port namespace called "return_values".
589 |     """
590 |     try:
591 |         return calc.outputs.return_values.items()
592 |     except AttributeError:
593 |         return ()
594 | 
595 | 
596 | def build(outline: Outline, **kwargs) -> PyWorkChain:
597 |     """Return a ProcessBuilder for launching the given Outline."""
598 |     # TODO: validate that all ProcessBuilders in 'outline' are fully specified
599 |     _check_outline(outline)
600 |     builder = PyWorkChain.get_builder()
601 |     builder.outline = PyOutline(outline=outline)
602 |     if outline.label:
603 |         builder.metadata.label = outline.label
604 |     if missing := set(outline.parameters) - set(kwargs):
605 |         raise ValueError(f"Missing parameters: {missing}")
606 |     if superfluous := set(kwargs) - set(outline.parameters):
607 |         raise ValueError(f"Too many parameters: {superfluous}")
608 |     builder.kwargs = toolz.valmap(ensure_aiida_type, kwargs)
609 |     return builder
610 | 
611 | 
612 | def _check_outline(outline: Outline):
613 |     for proc in outline._single_processes:
614 |         if proc.builder.code is None:
615 |             raise ValueError(
616 |                 f"Execution environment not specified for {proc.builder.func.name}. "
617 |                 "Did you remember to call 'on(env)' on the workflow?"
618 |             )
619 | 


--------------------------------------------------------------------------------