├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── element_idx.svg ├── environment.yml ├── matflow ├── __init__.py ├── _version.py ├── api.py ├── cli.py ├── config.py ├── errors.py ├── extensions.py ├── hicklable.py ├── models │ ├── __init__.py │ ├── command.py │ ├── construction.py │ ├── element.py │ ├── parameters.py │ ├── software.py │ ├── task.py │ └── workflow.py ├── profile.py ├── scripting.py ├── utils.py └── validation.py ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── test_element_idx.py ├── test_hicklable.py ├── test_task.py └── test_workflow.py └── workflow_viz.svg /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | # VS Code 126 | /.vscode 127 | *.code-workspace 128 | 129 | # Intellij IDEs 130 | /.idea 131 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [0.2.27] - 2024.06.26 4 | 5 | ### Fixed 6 | 7 | - Fix dependencies 8 | 9 | ## [0.2.26] - 2022.03.18 10 | 11 | ### Fixed 12 | 13 | - Use `traceback` module to print full exception from an output map that generates an exception. 14 | - Bug in `scripting.get_snippet_signature` function that produces code with a syntax error. 15 | 16 | ## [0.2.25] - 2021.12.20 17 | 18 | ### Fixed 19 | 20 | - Fix bug where preparation and processing run options were ignored on workflow load. 21 | - Fix bug where archive options were ignored on workflow load. 22 | 23 | ## [0.2.24] - 2021.10.06 24 | 25 | ### Fixed 26 | 27 | - Fix bug introduced in 0.2.23, where default preparation/processing run options were ignored. 28 | 29 | ## [0.2.23] - 2021.10.06 30 | 31 | ### Fixed 32 | 33 | - Fix inability to override default (preparation/processing) run options with an empty dict 34 | 35 | ## [0.2.22] - 2021.08.14 36 | 37 | ### Added 38 | 39 | - Add support for multiple archives. Fix [#72](https://github.com/LightForm-group/matflow/issues/72). 40 | 41 | ### Fixed 42 | 43 | - Fix error message if an input mapper function has an unknown argument. 44 | - Catch and print error message from output map function failure. 45 | - Fix incorrect import key when importing from a non-trivial context that is not defined in the schema. 46 | 47 | ## [0.2.21] - 2021.06.06 48 | 49 | ### Added 50 | 51 | - Allow passing a subset of the task input parameters to the output mapper function. Resolve [#102](https://github.com/LightForm-group/matflow/issues/102). 52 | - Allow passing all iterations of an input parameter to a function mapper. Resolve [#104](https://github.com/LightForm-group/matflow/issues/104). 53 | - Allow running an on-demand archive to an existing/completed workflow: `matflow archive path/to/workflow/directory ARCHIVE_NAME`. Resolve [#68](https://github.com/LightForm-group/matflow/issues/68). 54 | - Allow specifying `default_metadata` in the `config.yml` file. Keys are merged with `metadata` specified in the workflow spec file. Resolve [#98](https://github.com/LightForm-group/matflow/issues/98). 55 | 56 | ### Fixed 57 | 58 | - Save element resource usage (e.g. run time). Fix [#97](https://github.com/LightForm-group/matflow/issues/97). 59 | - Fix bug when determining the "producing task" in an iteration pathway. Fix [#105](https://github.com/LightForm-group/matflow/issues/105). 60 | - Fix bug when a file input parameter is specified with a `$HOME` tilde: `~/path/to/file`. 61 | 62 | ## [0.2.20] - 2021.05.12 63 | 64 | ### Added 65 | 66 | - Add `Task.cleanup` attribute that can be used to optionally specify a list of glob patterns, representing file names to remove at the end of `Workflow.process_task_element`. Useful for removing very large simulation outputs that are not required after MatFlow has extracted the requested data. 67 | - Add methods to `Element` object: `get_file_lines` and `print_file_lines`, which take a file name and a slice of lines to get or print. 68 | 69 | ### Changed 70 | 71 | - Change working directory to element directory for invoking input/output/function mapper functions. This is required in some cases where a tool or script does not accept a file path as an argument. 72 | - Allow specifying the `task_idx` directly when importing parameters. This overrides any specified `context`. 73 | 74 | ### Fixed 75 | 76 | - Catch `ImportError` and `SyntaxError` when trying to load extensions. 77 | - Import from the highest task index when importing a parameter that has been through a parameter-modifying task - fix [#103](https://github.com/LightForm-group/matflow/issues/103). The can be overrode by specifying a `task_idx` directly. 78 | 79 | ## [0.2.19] - 2021.04.12 (April 2021 - Fix 1) 80 | 81 | ### Fixed 82 | 83 | - Fix type problem when input schema keys are specified "inline" in the task schema (e.g. as `CRC_file_path[file=True,save=False]`), in which the keys remain as type `str`, when they should be `bool`. 84 | - Fix problem when an imported parameter is used in a task that is iterated. 85 | 86 | ## [0.2.18] - 2021.04.10 (April 2021) 87 | 88 | ### Fixed 89 | 90 | - Fix misleading error message when a task parameter specified as a file path does not actually exist as a file. 91 | - Fix bug where if all possible dependency pathways are circularly dependent, this is not caught by MatFlow. Fix [#88](https://github.com/LightForm-group/matflow/issues/88). 92 | - Fix issue with accessing parameter data with dot-notation via their "safe names". Fix [#87](https://github.com/LightForm-group/matflow/issues/87). 93 | 94 | ### Added 95 | 96 | - Add new parameter key `ignore_dependency_from`, which is a list of task names. This allows us to exclude tasks when considering the dependencies of this parameter. Fix [#89](https://github.com/LightForm-group/matflow/issues/89). 97 | - Allow embedding file-path inputs (inputs that are text files) into the HDF5 file. Fix [#86](https://github.com/LightForm-group/matflow/issues/86). 98 | - Add `Task.unique_name` property which adds on the non-trivial `Task.context` to `Task.name`. 99 | - Tasks can be accessed from the task list via dot-notation. Fix [#90](https://github.com/LightForm-group/matflow/issues/90). 100 | - Add `Task.elements_idx` property to retrieve to correct `elements_idx` dict for that task. 101 | - Add new exception type: `ParameterImportError`. 102 | - Add ability to import parameters from existing workflows. Fix [#30](https://github.com/LightForm-group/matflow/issues/30) 103 | 104 | ### Changed 105 | 106 | - Non-trivial task contexts are now part of the task directory name to help distinguish task directories where multiple contexts are used. Fix [#50](https://github.com/LightForm-group/matflow/issues/50). 107 | - Add `context` argument to `Workflow.get_input_tasks` and `Workflow.get_output_tasks`. 108 | 109 | ## [0.2.17] - 2021.02.15 110 | 111 | ### Fixed 112 | 113 | - Fix issue [#82](https://github.com/LightForm-group/matflow/issues/82) where the default group is not defined in the `Workflow.element_idx` for tasks where no local inputs are defined. 114 | 115 | ### Added 116 | 117 | - Add support for flexible positioning of parameter-modifying tasks ([#81](https://github.com/LightForm-group/matflow/issues/81)) 118 | 119 | ## [0.2.16] - 2021.02.05 120 | 121 | ### Fixed 122 | 123 | - Bump hpcflow to v0.1.13 to fix #80 and then to v0.1.14 to fix a database locking issue and a bug with choosing the correct working directories. 124 | 125 | ## [0.2.15] - 2021.01.18 126 | 127 | ### Changed 128 | 129 | - Change an Exception to a warning in `Workflow.get_element_data` to allow manually deleting element data without corrupting. 130 | 131 | ## [0.2.14] - 2021.01.17 132 | 133 | ### Added 134 | 135 | - Add method `Task.get_elements_from_iteration(iteration_idx)`. 136 | 137 | ## [0.2.13] - 2020.12.17 138 | 139 | ### Fixed 140 | 141 | - Fix bug when populating `Workflow.elements_idx` for more than two iterations. 142 | 143 | ## [0.2.12] - 2020.12.16 144 | 145 | ### Added 146 | 147 | - Add `Workflow.figures` attribute for storing associated figure definitions. 148 | - Add `Workflow.metadata` attribute for storing arbitrary metadata (will later be used for Zenodo archiving). 149 | - Add various `Workflow` static methods to help with retrieving information in the viewer without loading the whole workflow via `hickle`. 150 | - Add `get_task_schemas` to API to load the available task schemas without generating a workflow. 151 | - Add `refresh` bool parameter to `Config.set_config`, to force a reload of the configuration. 152 | - Support inputs as dependencies as well as outputs. 153 | - Support "parameter modifying" tasks (a task which outputs a parameter that is also an input to that task). 154 | - Add `iterate_run_options` to Workflow. 155 | - Add new methods for finding dependent and dependency tasks/parameters, upstream/downstream parameter values associated with a given element. 156 | - Add input option: `include_all_iterations`. If True, inputs from all iterations are passed to input map functions. 157 | 158 | ### Fixed 159 | 160 | - Only save input/output map files if they exist! 161 | - Fix bug in propagating groups correctly 162 | - Various code formatting issues 163 | - Fix failure to raise on invalid schemas. 164 | - Fix bug when the same file is to be saved from multiple output maps. 165 | 166 | ### Changed 167 | - Redo task sorting algorithm such that minimal ordering changes are made. 168 | - Set `stats` bool to False by default. 169 | - Bump hpcflow version to v0.1.12. 170 | 171 | ## [0.2.11] - 2020.09.29 172 | 173 | ### Fixed 174 | 175 | - Resolve `~` in task schema and software file paths specified in the configuration file. 176 | 177 | ## [0.2.10] - 2020.09.29 178 | 179 | ### Fixed 180 | 181 | - Fix if a function mapper function does not return anything. 182 | 183 | ## [0.2.9] - 2020.09.17 184 | 185 | ### Added 186 | 187 | - Add scripting module for generating Python source scripts. 188 | - Default run options can be specified in the MatFlow configuration file for task, preparation and processing jobs using both "sticky" and "non-sticky" keys: `default_run_options`, `default_sticky_run_options`, `default_preparation_run_options`, `default_sticky_preparation_run_options`, `default_processing_run_options` and `default_sticky_processing_run_options`. The "sticky" defaults are always applied (but workflow-specified run options take precedence), whereas the "non-sticky" defaults are only applied if a task has no workflow-specified run options. 189 | 190 | ## [0.2.8] - 2020.09.01 191 | 192 | ### Changed 193 | - Add `version_info` to `Software.__repr__` method 194 | - Validate source maps after missing schema check 195 | 196 | ### Fixed 197 | - Remove vestigial and buggy line in `construction.get_element_idx` which would lead to enormous memory usage for large sequences. 198 | 199 | ## [0.2.7] - 2020.08.18 200 | 201 | ### Added 202 | - Default values can be specified for output map options within the schema 203 | - Default values can be specified for task input parameters within the schema 204 | - Depending on the inputs defined, different commands can be run, via "command pathway" definitions in the schema implementations. 205 | 206 | ### Changed 207 | 208 | - Uses `hickle` version 4. 209 | - Group structure in workflow HDF5 file has changed (backwards-incompatible); element data is more conveniently organised for inspecting the HDF5 file manually. 210 | 211 | ### Fixed 212 | 213 | - Fix problem when a task input key includes slashes. 214 | 215 | ## [0.2.6] - 2020.07.08 216 | 217 | ### Added 218 | 219 | - Add alternate scratch feature to allow a given task to be executed within a separate temporary directory. 220 | 221 | ### Fixed 222 | 223 | - Fix bug if specifying `merge_priority` on the default group. 224 | 225 | ### Changed 226 | 227 | - Bump hpcflow to v0.1.10 228 | 229 | ## [0.2.5] - 2020.06.27 230 | 231 | ### Fixed 232 | 233 | - Fix copying of profile file to the workflow directory when the profile file path is not in the current working directory. 234 | 235 | ## [0.2.4] - 2020.06.26 236 | 237 | ### Changed 238 | 239 | - Fix dependency `hickle` version for now, until we can assess requirements for jumping to version 4. 240 | 241 | ## [0.2.3] - 2020.06.26 242 | 243 | ### Changed 244 | 245 | - Files generated by input maps are only saved into the workflow file if explicitly requested with `save: true`. 246 | 247 | ### Fixed 248 | 249 | - Fix bug in `SourcesPreparation.get_formatted_commands` that appears if there are no commands. 250 | 251 | ## [0.2.2] - 2020.06.09 252 | 253 | ### Changed 254 | 255 | - Improved Dropbox authorization flow. 256 | - Bump hpcflow to v0.1.9 257 | 258 | ## [0.2.1] - 2020.06.09 259 | 260 | ### Fixed 261 | 262 | - Fix bug in reading `default_preparation_run_options` and `default_processing_run_options` dicts from the config file. 263 | 264 | ## [0.2.0] - 2020.06.09 265 | 266 | ### Added 267 | 268 | - Add a `Workflow.history` attribute that tracks when the workflow was modified. It also stores pertinent software versions. 269 | - Add a CLI command `matflow validate` that runs through the task schema and extension validation. 270 | - Add a CLI command `matflow kill`, which kills all executing and pending tasks. 271 | - Added configuration option `prepare_process_scheduler_options` to specify scheduler options for the prepare and process tasks. 272 | - matflow profile is stored as a `dict` in addition to a string representation of the profile file (both in the `Workflow.profile` attribute). 273 | 274 | ### Changed 275 | 276 | - Module and function `jsonable.py` and `to_jsonable` renamed to `hicklable.py` and `to_hicklable`. 277 | - Workflow and Task attributes in the workflow HDF5 file are now represented without leading underscores. 278 | - Tasks with only a single element use the task directory directly instead of using an element sub-directory. 279 | - Loading extensions and configuration files has been moved from the root `__init__` to separate modules. 280 | - `make_workflow`, `submit_workflow`, `load_workflow`, `append_schema_source`, `prepend_schema_source` and `validate` can now be imported from the root level: `from matflow import make_workflow` etc. 281 | - There are no longer unsightly global variables for `TASK_INPUT_MAP` etc. This functionality has been subsumed into the global `Config` class. This is tidier and provides a better place for some validation. 282 | - Software key `sources` has been replaced by `environment`. 283 | - hpcflow configuration directory is generated within the matflow configuration directory. 284 | - Jobscript names refer to the task to which they prepare/execute/process 285 | - hpcflow profile is passed as a `dict` to hpcflow. For information, the hpcflow profile is still dumped to a file. 286 | 287 | ## [0.1.3] - 2020.05.27 288 | 289 | - New release for Zenodo archive. 290 | 291 | ## [0.1.2] - 2020.05.12 292 | 293 | - Latest dev branch merged... 294 | 295 | ## [0.1.1] - 2020.05.07 296 | 297 | ### Fixed 298 | 299 | - Added missing dependency. 300 | 301 | ## [0.1.0] - 2020.05.07 302 | 303 | Initial release. 304 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/219949875.svg)](https://zenodo.org/badge/latestdoi/219949875) [![PyPI version](https://badge.fury.io/py/matflow.svg)](https://badge.fury.io/py/matflow) 2 | 3 | ## **This code has been superseded by a new version that can be found here: https://github.com/hpcflow/matflow-new.** 4 | 5 | # MatFlow 6 | 7 | MatFlow is a framework for running reproducible workflows in materials science developed in the EPSRC programme grant [LightForm](http://lightform.org.uk), a research programme on light alloy formability. It is a python program that interacts with software (open-source and proprietary) used in materials science via extensions (see supported extensions below). It is particularly suited for hybrid workflows 8 | (involving experimental data and computational work), like for example HPC model calibration. Outputs,together with details of the worflow are automatically stored in an open source file format for post-processing, which Matflow can automatically upload to data repositories like [Zenodo](https://zenodo.org/). 9 | 10 | See [this repository](https://github.com/LightForm-group/UoM-CSF-matflow) for information regarding a MatFlow installation. 11 | 12 | ## Extensions 13 | 14 | MatFlow uses extension packages to interact with arbitrary software. Here is a list of current MatFlow extensions. 15 | 16 | ### Released/in-progress extensions 17 | | Software | Description | Status | Version | 18 | | ------ | ------------- | ------- | ------- | 19 | | [DAMASK](https://damask.mpie.de/) | Düsseldorf Advanced Material Simulation Kit (crystal plasticity) | [Released](https://github.com/LightForm-group/matflow-damask) | [![PyPI version](https://img.shields.io/pypi/v/matflow-damask)](https://pypi.org/project/matflow-damask) | 20 | | [MTEX](https://mtex-toolbox.github.io/) | Matlab toolbox for analyzing and modeling crystallographic textures | [Released](https://github.com/LightForm-group/matflow-mtex) | [![PyPI version](https://img.shields.io/pypi/v/matflow-mtex)](https://pypi.org/project/matflow-mtex) | 21 | | [formable](https://github.com/LightForm-group/formable) | Formability analyses in Python | [Released](https://github.com/LightForm-group/matflow-formable) | [![PyPI version](https://img.shields.io/pypi/v/matflow-formable)](https://pypi.org/project/matflow-formable) | 22 | | [DefDAP](https://github.com/MechMicroMan/DefDAP) | A python library for correlating EBSD and HRDIC data. | [Released](https://github.com/LightForm-group/matflow-defdap) | [![PyPI version](https://img.shields.io/pypi/v/matflow-defdap)](https://pypi.org/project/matflow-defdap) | 23 | | [Abaqus](https://www.3ds.com/products-services/simulia/products/abaqus/) | Finite element analysis | In-progress | [![PyPI version](https://img.shields.io/pypi/v/matflow-abaqus)](https://pypi.org/project/matflow-abaqus) | 24 | | [Neper](http://www.neper.info) | Polycrystal generation and meshing | [Released/In-progress](https://github.com/LightForm-group/matflow-neper) | [![PyPI version](https://img.shields.io/pypi/v/matflow-neper)](https://pypi.org/project/matflow-neper) | 25 | 26 | 27 | ### Example inputs/outputs 28 | | Label | Attributes | Output from tasks | Input to tasks | 29 | | ----------------------- | ------------------------------------------------------------ | ----------------------------------------- | ------------------------------------------------------------ | 30 | | ODF | crystal_symmetry
speciment_symmetry
euler_angles
euler_angle_labels
weights
orientation_coordinate_system | get_model_texture
estimate_ODF
| sample_texture | 31 | | microstructure_seeds | position
**orientations**
grid_size
phase_label | generate_microstructure_seeds | generate_volume_element | 32 | | orientations | euler_angles
euler_angle_labels
orientation_coordinate_system | sample_texture | generate_volume_element | 33 | | volume_element | grid
size
origin
**orientations**
grain_orientation_idx
grain_phase_label_idx
phase_labels
voxel_grain_idx
voxel_homogenization_idx | generate_volume_element | visualise_volume_element
simulate_volume_element_loading | 34 | | load_case | total_time
num_increments
def_grad_aim
def_grad_rate
stress
rotation | generate_load_case | simulate_volume_element_loading | 35 | | volume_element_response | ... | simulate_volume_element_loading | | 36 | 37 | ## Specifying default run options 38 | 39 | Default run options (i.e. options passed to the scheduler) can be specified in a few ways. Firstly, within the workflow file, `run_options` specified at the top-level will be used for any tasks that do not have a `run_options` specified. If a task *does* have a `run_options` key specified, the global `run_options` will not be used at all for that task. 40 | 41 | Additionally, you can specify default run options in the MatFlow configuration file (`config.yml`, by default generated in `~/.matflow`) with the options `default_run_options` and `default_sticky_run_options`. The "sticky" defaults are merged with any run options specified in the workflow file (with workflow-specified options taking precedence), whereas the "non-sticky" defaults are only used if no run options are supplied for a task. If no run options are supplied for a task, then both the "sticky" and "non-sticky" defaults will be used (with the "non-sticky" defaults taking precedence over the "sticky" defaults). Similar keys exist for task preparation and processing run options: `default_preparation_run_options`, `default_sticky_preparation_run_options` and `default_processing_run_options`, `default_sticky_processing_run_options`. 42 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: matflow_env 2 | dependencies: 3 | - python 4 | - pip 5 | - pylint 6 | - ipykernel 7 | - rope 8 | - autopep8 9 | - twine 10 | -------------------------------------------------------------------------------- /matflow/__init__.py: -------------------------------------------------------------------------------- 1 | """`matflow.__init__.py`""" 2 | 3 | from matflow._version import __version__ 4 | from matflow.api import ( 5 | make_workflow, 6 | submit_workflow, 7 | load_workflow, 8 | append_schema_source, 9 | prepend_schema_source, 10 | validate, 11 | get_task_schemas, 12 | ) 13 | -------------------------------------------------------------------------------- /matflow/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.27" 2 | -------------------------------------------------------------------------------- /matflow/api.py: -------------------------------------------------------------------------------- 1 | """`matflow.api.py` 2 | 3 | This module contains the application programming interface (API) to `matflow`, 4 | and includes functions that are called by the command line interface (CLI; in 5 | `matflow.cli.py`). 6 | 7 | """ 8 | 9 | import copy 10 | from pathlib import Path 11 | 12 | import pyperclip 13 | from hpcflow import kill as hpcflow_kill 14 | from hpcflow import cloud_connect as hpcflow_cloud_connect 15 | 16 | from matflow.config import Config 17 | from matflow.extensions import load_extensions 18 | from matflow.profile import parse_workflow_profile 19 | from matflow.models.workflow import Workflow 20 | 21 | 22 | def make_workflow(profile_path, directory=None, write_dirs=True): 23 | """Generate a new Workflow from a profile file. 24 | 25 | Parameters 26 | ---------- 27 | profile : str or Path 28 | Path to the profile file. 29 | directory : str or Path, optional 30 | The directory in which the Workflow will be generated. By default, this 31 | is the working (i.e. invoking) directory. 32 | 33 | Returns 34 | ------- 35 | workflow : Workflow 36 | 37 | """ 38 | 39 | load_extensions() 40 | 41 | profile_path = Path(profile_path) 42 | workflow_dict = parse_workflow_profile(profile_path) 43 | 44 | with profile_path.open('r') as handle: 45 | profile_str = handle.read() 46 | 47 | profile = {'file': profile_str, 'parsed': copy.deepcopy(workflow_dict)} 48 | 49 | iterate_run_opts = { 50 | **Config.get('default_sticky_iterate_run_options'), 51 | **Config.get('default_iterate_run_options'), 52 | } 53 | workflow_dict.update({'iterate_run_options': iterate_run_opts}) 54 | 55 | workflow = Workflow(**workflow_dict, stage_directory=directory, profile=profile) 56 | workflow.set_ids() 57 | 58 | if write_dirs: 59 | workflow.write_HDF5_file() 60 | workflow.write_directories() 61 | workflow.prepare_iteration(iteration_idx=0) 62 | workflow.dump_hpcflow_workflow_file('hpcflow_workflow.yml') 63 | 64 | # Copy profile to workflow directory: 65 | workflow.path.joinpath(profile_path.name).write_bytes(profile_path.read_bytes()) 66 | 67 | # Copy workflow human_id to clipboard, if supported: 68 | try: 69 | pyperclip.copy(workflow.human_id) 70 | except: 71 | pass 72 | 73 | return workflow 74 | 75 | 76 | def submit_workflow(workflow_path, directory=None): 77 | """Generate and submit a new workflow from a profile file. 78 | 79 | Parameters 80 | ---------- 81 | workflow_path : str or Path 82 | Path to either a profile file or a workflow project directory that contains a 83 | previously generated workflow HDF5 file. 84 | directory : str or Path, optional 85 | Applicable if `workflow_path` points to a profile file. The directory in which the 86 | Workflow will be generated. By default, this is the working (i.e. invoking) 87 | directory. 88 | 89 | Returns 90 | ------- 91 | None 92 | 93 | """ 94 | 95 | if Path(workflow_path).is_file(): 96 | workflow = make_workflow(workflow_path, directory=directory, write_dirs=True) 97 | else: 98 | load_extensions() 99 | workflow = load_workflow(workflow_path) 100 | 101 | workflow.submit() 102 | 103 | 104 | def load_workflow(directory, full_path=False): 105 | Config.set_config() 106 | path = Path(directory or '').resolve() 107 | workflow = Workflow.load_HDF5_file(path, full_path) 108 | 109 | return workflow 110 | 111 | 112 | def prepare_task(task_idx, iteration_idx, directory, is_array=False): 113 | """Prepare a task (iteration) for execution by setting inputs and running input 114 | maps.""" 115 | 116 | load_extensions() 117 | workflow = load_workflow(directory) 118 | workflow.prepare_task(task_idx, iteration_idx, is_array=is_array) 119 | 120 | 121 | def prepare_task_element(task_idx, element_idx, directory, is_array=False): 122 | """Prepare a task element for execution by setting inputs and running input maps.""" 123 | load_extensions() 124 | workflow = load_workflow(directory) 125 | workflow.prepare_task_element(task_idx, element_idx, is_array=is_array) 126 | 127 | 128 | def process_task(task_idx, iteration_idx, directory, is_array=False): 129 | """Process a completed task (iteration) by running the output map.""" 130 | load_extensions() 131 | workflow = load_workflow(directory) 132 | workflow.process_task(task_idx, iteration_idx, is_array=is_array) 133 | 134 | 135 | def process_task_element(task_idx, element_idx, directory, is_array=False): 136 | """Process a task element for execution by running output maps and saving outputs.""" 137 | load_extensions() 138 | workflow = load_workflow(directory) 139 | workflow.process_task_element(task_idx, element_idx, is_array=is_array) 140 | 141 | 142 | def run_python_task(task_idx, element_idx, directory): 143 | """Run a (commandless) Python task.""" 144 | load_extensions() 145 | workflow = load_workflow(directory) 146 | workflow.run_python_task(task_idx, element_idx) 147 | 148 | 149 | def prepare_sources(task_idx, iteration_idx, directory): 150 | """Prepare source files.""" 151 | load_extensions() 152 | workflow = load_workflow(directory) 153 | workflow.prepare_sources(task_idx, iteration_idx) 154 | 155 | 156 | def append_schema_source(schema_source_path): 157 | """Add a task schema source file to the end of the schema source list.""" 158 | Config.append_schema_source(schema_source_path) 159 | 160 | 161 | def prepend_schema_source(schema_source_path): 162 | """Add a task schema source file to the front of the schema source list.""" 163 | Config.prepend_schema_source(schema_source_path) 164 | 165 | 166 | def validate(): 167 | load_extensions() 168 | 169 | 170 | def kill(directory): 171 | Config.set_config() 172 | hpcflow_kill(dir_path=directory, config_dir=Config.get('hpcflow_config_dir')) 173 | 174 | 175 | def cloud_connect(provider): 176 | Config.set_config() 177 | hpcflow_cloud_connect(provider, config_dir=Config.get('hpcflow_config_dir')) 178 | 179 | 180 | def write_element_directories(iteration_idx, directory): 181 | 'Generate element directories for a given iteration.' 182 | load_extensions() 183 | workflow = load_workflow(directory) 184 | if workflow.iterate: 185 | num_iters = workflow.iterate['num_iterations'] 186 | else: 187 | num_iters = workflow.num_iterations 188 | if iteration_idx < num_iters: 189 | workflow.write_element_directories(iteration_idx) 190 | workflow.prepare_iteration(iteration_idx) 191 | 192 | 193 | def archive(directory, archive): 194 | """Perform an on-demand archive of an existing workflow.""" 195 | workflow = load_workflow(directory) 196 | workflow.do_archive(archive) 197 | 198 | 199 | def get_task_schemas(): 200 | Config.set_config() 201 | return Config.get('task_schemas') 202 | -------------------------------------------------------------------------------- /matflow/cli.py: -------------------------------------------------------------------------------- 1 | """`matflow.cli.py` 2 | 3 | Module that exposes a command line interface for `matflow`. 4 | 5 | """ 6 | import click 7 | 8 | from matflow import __version__ 9 | from matflow import api 10 | 11 | 12 | @click.group() 13 | @click.version_option(version=__version__) 14 | def cli(): 15 | pass 16 | 17 | 18 | @cli.command() 19 | @click.option('--directory', '-d') 20 | @click.argument('profile', type=click.Path(exists=True)) 21 | def make(profile, directory=None): 22 | """Generate a new Workflow.""" 23 | print('matflow.cli.make', flush=True) 24 | api.make_workflow(profile_path=profile, directory=directory) 25 | 26 | 27 | @cli.command() 28 | @click.option('--directory', '-d') 29 | @click.argument('workflow_path', type=click.Path(exists=True)) 30 | def go(workflow_path, directory=None): 31 | """Generate and submit a new Workflow.""" 32 | print('matflow.cli.go', flush=True) 33 | api.submit_workflow(workflow_path, directory=directory) 34 | 35 | 36 | @cli.command() 37 | @click.option('--task-idx', '-t', type=click.INT, required=True) 38 | @click.option('--iteration-idx', '-i', type=click.INT, required=True) 39 | @click.option('--directory', '-d', type=click.Path(exists=True)) 40 | @click.option('--array', is_flag=True) 41 | def prepare_task(task_idx, iteration_idx, directory=None, array=False): 42 | print('matflow.cli.prepare_task', flush=True) 43 | api.prepare_task(task_idx, iteration_idx, directory, is_array=array) 44 | 45 | 46 | @cli.command() 47 | @click.option('--task-idx', '-t', type=click.INT, required=True) 48 | @click.option('--element-idx', '-e', type=click.INT, required=True) 49 | @click.option('--directory', '-d', type=click.Path(exists=True)) 50 | @click.option('--array', is_flag=True) 51 | def prepare_task_element(task_idx, element_idx, directory=None, array=False): 52 | print('matflow.cli.prepare_task_element', flush=True) 53 | api.prepare_task_element(task_idx, element_idx, directory, is_array=array) 54 | 55 | 56 | @cli.command() 57 | @click.option('--task-idx', '-t', type=click.INT, required=True) 58 | @click.option('--iteration-idx', '-i', type=click.INT, required=True) 59 | @click.option('--directory', '-d', type=click.Path(exists=True)) 60 | @click.option('--array', is_flag=True) 61 | def process_task(task_idx, iteration_idx, directory=None, array=False): 62 | print('matflow.cli.process_task', flush=True) 63 | api.process_task(task_idx, iteration_idx, directory, is_array=array) 64 | 65 | 66 | @cli.command() 67 | @click.option('--task-idx', '-t', type=click.INT, required=True) 68 | @click.option('--element-idx', '-e', type=click.INT, required=True) 69 | @click.option('--directory', '-d', type=click.Path(exists=True)) 70 | @click.option('--array', is_flag=True) 71 | def process_task_element(task_idx, element_idx, directory=None, array=False): 72 | print('matflow.cli.process_task_element', flush=True) 73 | api.process_task_element(task_idx, element_idx, directory, is_array=array) 74 | 75 | 76 | @cli.command() 77 | @click.option('--task-idx', '-t', type=click.INT, required=True) 78 | @click.option('--element-idx', '-e', type=click.INT, required=True) 79 | @click.option('--directory', '-d', type=click.Path(exists=True)) 80 | def run_python_task(task_idx, element_idx, directory=None): 81 | print('matflow.cli.run_python_task', flush=True) 82 | api.run_python_task(task_idx, element_idx, directory) 83 | 84 | 85 | @cli.command() 86 | @click.option('--task-idx', '-t', type=click.INT, required=True) 87 | @click.option('--iteration-idx', '-i', type=click.INT, required=True) 88 | @click.option('--directory', '-d', type=click.Path(exists=True)) 89 | def prepare_sources(task_idx, iteration_idx, directory=None): 90 | print('matflow.cli.prepare_sources', flush=True) 91 | api.prepare_sources(task_idx, iteration_idx, directory) 92 | 93 | 94 | @cli.command() 95 | @click.argument('schema_source_path', type=click.Path(exists=True)) 96 | def append_schema_source(schema_source_path): 97 | api.append_schema_source(schema_source_path) 98 | 99 | 100 | @cli.command() 101 | @click.argument('schema_source_path', type=click.Path(exists=True)) 102 | def prepend_schema_source(schema_source_path): 103 | api.prepend_schema_source(schema_source_path) 104 | 105 | 106 | @cli.command() 107 | def validate(): 108 | """Load and validate task schemas against available extensions.""" 109 | api.validate() 110 | 111 | 112 | @cli.command() 113 | @click.option('--provider', '-p', required=True) 114 | def cloud_connect(provider): 115 | api.cloud_connect(provider) 116 | 117 | 118 | @cli.command() 119 | @click.argument('directory', type=click.Path(exists=True)) 120 | def kill(directory): 121 | """Kill all pending and executing tasks.""" 122 | api.kill(directory) 123 | 124 | 125 | @cli.command() 126 | @click.option('--iteration-idx', '-i', type=click.INT, required=True) 127 | @click.option('--directory', '-d', type=click.Path(exists=True)) 128 | def write_element_directories(iteration_idx, directory=None): 129 | api.write_element_directories(iteration_idx, directory) 130 | 131 | 132 | @cli.command() 133 | @click.argument('directory', type=click.Path(exists=True)) 134 | @click.argument('archive') 135 | def archive(directory, archive): 136 | api.archive(directory, archive) 137 | 138 | 139 | if __name__ == '__main__': 140 | cli() 141 | -------------------------------------------------------------------------------- /matflow/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from warnings import warn 4 | 5 | from ruamel.yaml import YAML, safe_load 6 | 7 | 8 | from matflow.errors import ConfigurationError, MatflowExtensionError 9 | from matflow.models.task import TaskSchema 10 | from matflow.models.software import SoftwareInstance 11 | 12 | 13 | class Config(object): 14 | 15 | __ALLOWED_CONFIG = [ 16 | 'task_schema_sources', 17 | 'software_sources', 18 | 'default_run_options', 19 | 'default_preparation_run_options', 20 | 'default_processing_run_options', 21 | 'default_iterate_run_options', 22 | 'default_sticky_run_options', 23 | 'default_sticky_preparation_run_options', 24 | 'default_sticky_processing_run_options', 25 | 'default_sticky_iterate_run_options', 26 | 'parallel_modes', 27 | 'archive_locations', 28 | 'default_metadata', 29 | ] 30 | 31 | __conf = {} 32 | 33 | _is_set = False 34 | _is_extension_locked = True 35 | 36 | @staticmethod 37 | def append_schema_source(schema_source_path, config_dir=None): 38 | yaml = YAML(typ='rt') 39 | config_dat, config_file = Config.get_config_file(config_dir=config_dir) 40 | config_dat['task_schema_sources'].append(str(schema_source_path)) 41 | yaml.dump(config_dat, config_file) 42 | 43 | @staticmethod 44 | def prepend_schema_source(schema_source_path, config_dir=None): 45 | yaml = YAML(typ='rt') 46 | config_dat, config_file = Config.get_config_file(config_dir=config_dir) 47 | config_dat['task_schema_sources'] = ( 48 | str(schema_source_path) + config_dat['task_schema_sources'] 49 | ) 50 | yaml.dump(config_dat, config_file) 51 | 52 | @staticmethod 53 | def resolve_config_dir(config_dir=None): 54 | 55 | if not config_dir: 56 | config_dir = Path(os.getenv('MATFLOW_CONFIG_DIR', '~/.matflow')).expanduser() 57 | else: 58 | config_dir = Path(config_dir) 59 | 60 | if Config._is_set: 61 | if config_dir != Config.get('config_dir'): 62 | warn(f'Config is already set, but `config_dir` changed from ' 63 | f'"{Config.get("config_dir")}" to "{config_dir}".') 64 | 65 | if not config_dir.is_dir(): 66 | print('Configuration directory does not exist. Generating.') 67 | config_dir.mkdir() 68 | 69 | return config_dir 70 | 71 | @staticmethod 72 | def get_config_file(config_dir): 73 | 74 | yaml = YAML() 75 | config_file = config_dir.joinpath('config.yml') 76 | def_schema_file = config_dir.joinpath('task_schemas.yml') 77 | def_software_file = config_dir.joinpath('software.yml') 78 | if not config_file.is_file(): 79 | print('No config.yml found. Generating a config.yml file.') 80 | def_config = { 81 | 'task_schema_sources': [str(def_schema_file)], 82 | 'software_sources': [str(def_software_file)], 83 | 'parallel_modes': { 84 | 'MPI': {'command': 'mpirun -np <>'}, 85 | 'OpenMP': {'env': 'export OMP_NUM_THREADS=<>'}, 86 | } 87 | } 88 | yaml.dump(def_config, config_file) 89 | 90 | if not def_schema_file.is_file(): 91 | print('Generating a default task schema file.') 92 | yaml.dump([], def_schema_file) 93 | 94 | if not def_software_file.is_file(): 95 | print('Generating a default software file.') 96 | yaml.dump({}, def_software_file) 97 | 98 | print(f'Loading matflow config from {config_file}') 99 | with config_file.open() as handle: 100 | config_dat = safe_load(handle) 101 | bad_keys = list(set(config_dat.keys()) - set(Config.__ALLOWED_CONFIG)) 102 | if bad_keys: 103 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys]) 104 | raise ConfigurationError(f'Unknown configuration options: {bad_keys_fmt}.') 105 | 106 | if 'task_schema_sources' not in config_dat: 107 | msg = (f'Missing `task_schema_sources` from configuration file: ' 108 | f'{config_file}.') 109 | raise ConfigurationError(msg) 110 | 111 | if 'software_sources' not in config_dat: 112 | msg = f'Missing `software_sources` from configuration file: {config_file}' 113 | raise ConfigurationError(msg) 114 | 115 | return config_dat, config_file 116 | 117 | @staticmethod 118 | def set_config(config_dir=None, raise_on_set=False, refresh=False): 119 | """Load configuration from a YAML file.""" 120 | 121 | config_dir = Config.resolve_config_dir(config_dir) 122 | 123 | if Config._is_set: 124 | if raise_on_set: 125 | raise ConfigurationError('Configuration is already set.') 126 | elif not refresh: 127 | return 128 | 129 | config_dat, _ = Config.get_config_file(config_dir) 130 | schema_sources = [Path(i).expanduser() for i in config_dat['task_schema_sources']] 131 | software_sources = [Path(i).expanduser() for i in config_dat['software_sources']] 132 | 133 | # Validate parallel_modes: 134 | ALLOWED_PARA_MODES = ['MPI', 'OpenMP'] 135 | ALLOWED_PARA_MODES_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_MODES]) 136 | ALLOWED_PARA_CONFIGS = ['env', 'command'] 137 | ALLOWED_PARA_CONFIGS_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_CONFIGS]) 138 | para_modes = {} 139 | for name, mode_config in config_dat.get('parallel_modes', {}).items(): 140 | if name.lower() not in [i.lower() for i in ALLOWED_PARA_MODES]: 141 | msg = (f'Parallel mode "{name}" not known. Allowed parallel modes are ' 142 | f'{ALLOWED_PARA_MODES_FMT}.') 143 | raise ConfigurationError(msg) 144 | if not mode_config: 145 | msg = (f'Specify at least one of {ALLOWED_PARA_CONFIGS_FMT} for parallel ' 146 | f'mode configuration: "{name}".') 147 | raise ConfigurationError(msg) 148 | bad_keys = set(mode_config.keys()) - set(ALLOWED_PARA_CONFIGS) 149 | if bad_keys: 150 | bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys]) 151 | msg = (f'Unknown parallel mode configuration keys: {bad_keys_fmt} for ' 152 | f'mode "{name}".') 153 | raise ConfigurationError(msg) 154 | 155 | if 'env' in mode_config: 156 | # Split into list of lines: 157 | mode_config['env'] = mode_config['env'].splitlines() 158 | 159 | # Update to be lowercase: 160 | para_modes.update({name.lower(): mode_config}) 161 | 162 | # Load task_schemas list from all specified task schema files: 163 | task_schema_dicts = {} 164 | yaml = YAML(typ='safe') 165 | for task_schema_file in schema_sources[::-1]: 166 | if not task_schema_file.is_file(): 167 | msg = f'Task schema source is not a file: "{task_schema_file}".' 168 | raise ConfigurationError(msg) 169 | for i in yaml.load(task_schema_file): 170 | if 'name' not in i: 171 | raise ValueError('Task schema definition is missing a "name" key.') 172 | # Overwrite any task schema with the same name (hence we order files in 173 | # reverse so e.g. the first task schema file takes precedence): 174 | task_schema_dicts.update({i['name']: i}) 175 | 176 | # Convert to lists: 177 | task_schema_dicts = [v for k, v in task_schema_dicts.items()] 178 | 179 | # Load and validate self-consistency of task schemas: 180 | print(f'Loading task schemas from {len(schema_sources)} file(s)...', end='') 181 | try: 182 | task_schemas = TaskSchema.load_from_hierarchy(task_schema_dicts) 183 | except Exception as err: 184 | print('Failed.') 185 | raise err 186 | print('OK!') 187 | 188 | print(f'Loading software definitions from {len(software_sources)} ' 189 | f'file(s)...', end='') 190 | software = {} 191 | for software_file in software_sources: 192 | if not software_file.is_file(): 193 | msg = f'Software source is not a file: "{software_file}".' 194 | raise ConfigurationError(msg) 195 | try: 196 | soft_loaded = SoftwareInstance.load_multiple(yaml.load(software_file)) 197 | except Exception as err: 198 | print(f'\nFailed to load software definitions from: "{software_file}".') 199 | raise err 200 | 201 | # Combine software instances from multiple software source files: 202 | for soft_name, instances in soft_loaded.items(): 203 | if soft_name in software: 204 | software[soft_name].extend(instances) 205 | else: 206 | software.update({soft_name: instances}) 207 | print('OK!') 208 | 209 | archive_locs = config_dat.get('archive_locations', {}) 210 | for arch_name, arch in archive_locs.items(): 211 | ALLOWED_ARCH_KEYS = ['path', 'cloud_provider'] 212 | if 'path' not in arch: 213 | msg = f'Missing `path` for archive location "{arch_name}".' 214 | raise ConfigurationError(msg) 215 | bad_keys = set(arch.keys()) - set(ALLOWED_ARCH_KEYS) 216 | if bad_keys: 217 | bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys]) 218 | msg = (f'Unknown archive location keys for archive "{arch_name}": ' 219 | f'{bad_keys_fmt}') 220 | raise ConfigurationError(msg) 221 | 222 | ALLOWED_CLOUD_PROVIDERS = ['dropbox'] 223 | cloud_provider = arch.get('cloud_provider') 224 | if cloud_provider and cloud_provider not in ALLOWED_CLOUD_PROVIDERS: 225 | msg = (f'Unsupported cloud provider for archive "{arch_name}": ' 226 | f'"{cloud_provider}". Supported cloud providers are: ' 227 | f'{ALLOWED_CLOUD_PROVIDERS}.') 228 | raise ConfigurationError(msg) 229 | 230 | Config.__conf['config_dir'] = config_dir 231 | 232 | for i in [ 233 | 'default_run_options', 234 | 'default_preparation_run_options', 235 | 'default_processing_run_options', 236 | 'default_iterate_run_options', 237 | 'default_sticky_run_options', 238 | 'default_sticky_preparation_run_options', 239 | 'default_sticky_processing_run_options', 240 | 'default_sticky_iterate_run_options', 241 | 'default_metadata', 242 | ]: 243 | Config.__conf[i] = config_dat.get(i, {}) 244 | 245 | hpcflow_config_dir = config_dir.joinpath('.hpcflow') 246 | Config.__conf['hpcflow_config_dir'] = hpcflow_config_dir 247 | Config.__conf['software'] = software 248 | Config.__conf['task_schemas'] = task_schemas 249 | Config.__conf['parallel_modes'] = para_modes 250 | Config.__conf['archive_locations'] = archive_locs 251 | 252 | Config.__conf['input_maps'] = {} 253 | Config.__conf['output_maps'] = {} 254 | Config.__conf['func_maps'] = {} 255 | Config.__conf['CLI_arg_maps'] = {} 256 | Config.__conf['sources_maps'] = {} 257 | Config.__conf['output_file_maps'] = {} 258 | Config.__conf['software_versions'] = {} 259 | Config.__conf['extension_info'] = {} 260 | Config.__conf['schema_validity'] = {} 261 | 262 | Config._is_set = True 263 | 264 | @staticmethod 265 | def get(name): 266 | if not Config._is_set: 267 | raise ConfigurationError('Configuration is not yet set.') 268 | return Config.__conf[name] 269 | 270 | @staticmethod 271 | def lock_extensions(): 272 | Config._is_extension_locked = True 273 | 274 | @staticmethod 275 | def unlock_extensions(): 276 | Config._is_extension_locked = False 277 | 278 | @staticmethod 279 | def _get_software_safe(software_name): 280 | return SoftwareInstance.get_software_safe(software_name) 281 | 282 | @staticmethod 283 | def _get_key_safe(key): 284 | return key[0], key[1], Config._get_software_safe(key[2]) 285 | 286 | @staticmethod 287 | def _validate_extension_setter(): 288 | if not Config._is_set: 289 | warn(f'Configuration is not yet set. Matflow extension functions will not ' 290 | 'be mapped to task schemas unless matflow is loaded.') 291 | return False 292 | if Config._is_extension_locked: 293 | msg = 'Configuration is locked against modifying extension data.' 294 | raise ConfigurationError(msg) 295 | return True 296 | 297 | @staticmethod 298 | def set_input_map(key, input_file, func): 299 | if Config._validate_extension_setter(): 300 | key = Config._get_key_safe(key) 301 | if key not in Config.__conf['input_maps']: 302 | Config.__conf['input_maps'].update({key: {}}) 303 | if input_file in Config.__conf['input_maps'][key]: 304 | msg = f'Input file name "{input_file}" already exists in the input map.' 305 | raise MatflowExtensionError(msg) 306 | Config.__conf['input_maps'][key][input_file] = func 307 | 308 | @staticmethod 309 | def set_output_map(key, output_name, func): 310 | if Config._validate_extension_setter(): 311 | key = Config._get_key_safe(key) 312 | if key not in Config.__conf['output_maps']: 313 | Config.__conf['output_maps'].update({key: {}}) 314 | if output_name in Config.__conf['output_maps'][key]: 315 | msg = f'Output name "{output_name}" already exists in the output map.' 316 | raise MatflowExtensionError(msg) 317 | Config.__conf['output_maps'][key][output_name] = func 318 | 319 | @staticmethod 320 | def set_func_map(key, func): 321 | if Config._validate_extension_setter(): 322 | key = Config._get_key_safe(key) 323 | if key in Config.__conf['func_maps']: 324 | msg = f'Function map "{key}" already exists in the function map.' 325 | raise MatflowExtensionError(msg) 326 | Config.__conf['func_maps'][key] = func 327 | 328 | @staticmethod 329 | def set_CLI_arg_map(key, input_name, func): 330 | if Config._validate_extension_setter(): 331 | key = Config._get_key_safe(key) 332 | if key not in Config.__conf['CLI_arg_maps']: 333 | Config.__conf['CLI_arg_maps'].update({key: {}}) 334 | if input_name in Config.__conf['CLI_arg_maps'][key]: 335 | msg = (f'Input name "{input_name}" already exists in the CLI formatter ' 336 | f'map.') 337 | raise MatflowExtensionError(msg) 338 | Config.__conf['CLI_arg_maps'][key][input_name] = func 339 | 340 | @staticmethod 341 | def set_source_map(key, func, **sources_dict): 342 | if Config._validate_extension_setter(): 343 | key = Config._get_key_safe(key) 344 | if key in Config.__conf['sources_maps']: 345 | msg = f'Sources map for key: {key} already exists in.' 346 | raise MatflowExtensionError(msg) 347 | Config.__conf['sources_maps'].update({ 348 | key: {'func': func, 'sources': sources_dict} 349 | }) 350 | 351 | @staticmethod 352 | def set_software_version_func(software, func): 353 | if Config._validate_extension_setter(): 354 | software = Config._get_software_safe(software) 355 | if software in Config.__conf['software_versions']: 356 | msg = (f'Software "{software}" has already registered a ' 357 | f'`software_versions` function.') 358 | raise MatflowExtensionError(msg) 359 | Config.__conf['software_versions'][software] = func 360 | 361 | @staticmethod 362 | def set_output_file_map(key, file_reference, file_name): 363 | if Config._validate_extension_setter(): 364 | key = Config._get_key_safe(key) 365 | if key not in Config.__conf['output_file_maps']: 366 | Config.__conf['output_file_maps'].update({key: {}}) 367 | file_ref_full = '__file__' + file_reference 368 | if file_ref_full in Config.__conf['output_file_maps'][key]: 369 | msg = f'File name "{file_name}" already exists in the output files map.' 370 | raise MatflowExtensionError(msg) 371 | Config.__conf['output_file_maps'][key].update({file_ref_full: file_name}) 372 | 373 | @staticmethod 374 | def set_extension_info(name, info): 375 | if Config._validate_extension_setter(): 376 | if name in Config.__conf['extension_info']: 377 | msg = f'Extension with name "{name}" already loaded.' 378 | raise MatflowExtensionError(msg) 379 | Config.__conf['extension_info'][name] = info 380 | 381 | @staticmethod 382 | def set_schema_validities(validities): 383 | if Config._validate_extension_setter(): 384 | Config.__conf['schema_validity'].update(validities) 385 | 386 | @staticmethod 387 | def unload_extension(software_name): 388 | 389 | name = Config._get_software_safe(software_name) 390 | 391 | in_map = [k for k in Config.__conf['input_maps'] if k[2] == name] 392 | for k in in_map: 393 | del Config.__conf['input_maps'][k] 394 | 395 | out_map = [k for k in Config.__conf['output_maps'] if k[2] == name] 396 | for k in out_map: 397 | del Config.__conf['output_maps'][k] 398 | 399 | func_map = [k for k in Config.__conf['func_maps'] if k[2] == name] 400 | for k in func_map: 401 | del Config.__conf['func_maps'][k] 402 | 403 | CLI_map = [k for k in Config.__conf['CLI_arg_maps'] if k[2] == name] 404 | for k in CLI_map: 405 | del Config.__conf['CLI_arg_maps'][k] 406 | 407 | out_file_map = [k for k in Config.__conf['output_file_maps'] if k[2] == name] 408 | for k in out_file_map: 409 | del Config.__conf['output_file_maps'][k] 410 | 411 | soft_vers = [k for k in Config.__conf['software_versions'] if k == name] 412 | for k in soft_vers: 413 | del Config.__conf['software_versions'][k] 414 | 415 | ext_info = [k for k in Config.__conf['extension_info'] if k == name] 416 | for k in ext_info: 417 | del Config.__conf['extension_info'][k] 418 | 419 | schema_valid = [k for k in Config.__conf['schema_validity'] if k[2] == name] 420 | for k in schema_valid: 421 | del Config.__conf['schema_validity'][k] 422 | 423 | source_map = [k for k in Config.__conf['sources_maps'] if k[2] == name] 424 | for k in source_map: 425 | del Config.__conf['sources_maps'][k] 426 | -------------------------------------------------------------------------------- /matflow/errors.py: -------------------------------------------------------------------------------- 1 | class IncompatibleWorkflow(Exception): 2 | pass 3 | 4 | 5 | class IncompatibleTaskNesting(IncompatibleWorkflow): 6 | pass 7 | 8 | 9 | class MissingMergePriority(IncompatibleTaskNesting): 10 | pass 11 | 12 | 13 | class IncompatibleSequence(Exception): 14 | """For task sequence definitions that are not logically consistent.""" 15 | 16 | 17 | class SequenceError(Exception): 18 | """For malformed sequence definitions.""" 19 | 20 | 21 | class TaskError(Exception): 22 | """For malformed task definitions.""" 23 | 24 | 25 | class TaskSchemaError(Exception): 26 | """For nonsensical task schema definitions.""" 27 | 28 | 29 | class TaskParameterError(Exception): 30 | """For incorrectly parametrised tasks.""" 31 | 32 | 33 | class ProfileError(Exception): 34 | """For malformed profile file data.""" 35 | 36 | 37 | class MissingSoftware(Exception): 38 | """For specified software that cannot be satisfied.""" 39 | 40 | 41 | class WorkflowPersistenceError(Exception): 42 | """For problems related to saving and loading the persistent HDF5 files.""" 43 | 44 | 45 | class UnsatisfiedGroupParameter(Exception): 46 | """For when an input has a group, but that group does not exist in the Workflow.""" 47 | 48 | 49 | class MatflowExtensionError(Exception): 50 | """For problems when loading extensions.""" 51 | 52 | 53 | class MissingSchemaError(Exception): 54 | """For when a suitable schema does not exist.""" 55 | 56 | 57 | class UnsatisfiedSchemaError(Exception): 58 | """For when a suitable extension function cannot be found for a task schema.""" 59 | 60 | 61 | class TaskElementExecutionError(Exception): 62 | """For when the execution of an task element fails.""" 63 | 64 | 65 | class ConfigurationError(Exception): 66 | """For malformed configuration files.""" 67 | 68 | 69 | class SoftwareInstanceError(Exception): 70 | """For malformed SoftwareInstance definitions.""" 71 | pass 72 | 73 | 74 | class MissingSoftwareSourcesError(Exception): 75 | """For when a software instance requires source variables, but none are forthcoming.""" 76 | 77 | 78 | class UnexpectedSourceMapReturnError(Exception): 79 | """For when a source map function does not return the expected dict.""" 80 | 81 | 82 | class CommandError(Exception): 83 | """For problems with command groups and commands.""" 84 | 85 | 86 | class WorkflowIterationError(Exception): 87 | """For issues with resolving requested iterations.""" 88 | 89 | 90 | class ParameterImportError(Exception): 91 | """For issues with importing parameters from pre-existing workflows.""" 92 | -------------------------------------------------------------------------------- /matflow/extensions.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import pkg_resources 3 | import warnings 4 | 5 | from matflow.config import Config 6 | from matflow.validation import validate_task_schemas 7 | 8 | 9 | def load_extensions(): 10 | 11 | Config.set_config(raise_on_set=True) 12 | Config.unlock_extensions() 13 | 14 | extensions_entries = pkg_resources.iter_entry_points('matflow.extension') 15 | if extensions_entries: 16 | print('Loading extensions...') 17 | for entry_point in extensions_entries: 18 | 19 | print(f' "{entry_point.name}"...', end='', flush=True) 20 | 21 | try: 22 | loaded = entry_point.load() 23 | except (ImportError, SyntaxError) as ex: 24 | print(f'Failed: {ex!r}', flush=True) 25 | continue 26 | 27 | unload = False 28 | 29 | if not hasattr(loaded, 'SOFTWARE'): 30 | print('Failed.', flush=True) 31 | warnings.warn(f'Matflow extension "{entry_point.module_name}" has no ' 32 | f'`SOFTWARE` attribute. This extension will not be loaded.') 33 | unload = True 34 | 35 | if not hasattr(loaded, '__version__'): 36 | print('Failed.', flush=True) 37 | warnings.warn(f'Matflow extension "{entry_point.module_name}" has no ' 38 | f'`__version__` attribute. This extension will not be ' 39 | f'loaded.') 40 | unload = True 41 | 42 | software_safe = Config._get_software_safe(loaded.SOFTWARE) 43 | 44 | if ( 45 | not unload and 46 | Config.get('software_versions').get(software_safe) is None 47 | ): 48 | 49 | # Every defined SoftwareInstance must have a specified version_info: 50 | version_defined = True 51 | soft_instances = Config.get('software').get(software_safe) 52 | if not soft_instances: 53 | version_defined = False 54 | else: 55 | for i in soft_instances: 56 | if i.version_info is None: 57 | version_defined = False 58 | break 59 | 60 | if not version_defined: 61 | print('Failed.', flush=True) 62 | msg = (f'Matflow extension "{entry_point.module_name}" does not ' 63 | f'register a function for getting software versions and one ' 64 | f'or more of its software instance definitions do not ' 65 | f'specify `version_info`. This extension will not be loaded.') 66 | warnings.warn(msg) 67 | unload = True 68 | 69 | if unload: 70 | Config.unload_extension(software_safe) 71 | continue 72 | 73 | Config.set_extension_info( 74 | entry_point.name, 75 | {'module_name': entry_point.module_name, 'version': loaded.__version__}, 76 | ) 77 | print(f'(software: "{software_safe}") from ' 78 | f'{entry_point.module_name} (version {loaded.__version__})', flush=True) 79 | 80 | # Validate task schemas against loaded extensions: 81 | print('Validating task schemas against loaded extensions...', end='') 82 | try: 83 | Config.set_schema_validities( 84 | validate_task_schemas( 85 | Config.get('task_schemas'), 86 | Config.get('input_maps'), 87 | Config.get('output_maps'), 88 | Config.get('func_maps'), 89 | ) 90 | ) 91 | except Exception as err: 92 | print('Failed.', flush=True) 93 | raise err 94 | 95 | schema_validity = Config.get('schema_validity') 96 | schema_invalids = [(k, v[1]) for k, v in schema_validity.items() if not v[0]] 97 | num_valid = sum([i[0] for i in schema_validity.values()]) 98 | num_total = len(schema_validity) 99 | print(f'OK! {num_valid}/{num_total} schemas are valid.', flush=True) 100 | if schema_invalids: 101 | sch_invalids_fmt = '\n '.join([f'{i[0]}: {i[1]}' for i in schema_invalids]) 102 | msg = f'The following schemas are invalid:\n {sch_invalids_fmt}\n' 103 | print(msg, flush=True) 104 | 105 | else: 106 | print('No extensions found.') 107 | 108 | Config.lock_extensions() 109 | 110 | 111 | def input_mapper(input_file, task, method, software): 112 | """Function decorator for adding input maps from extensions.""" 113 | def _input_mapper(func): 114 | @functools.wraps(func) 115 | def func_wrap(*args, **kwargs): 116 | return func(*args, **kwargs) 117 | key = (task, method, software) 118 | Config.set_input_map(key, input_file, func_wrap) 119 | return func_wrap 120 | return _input_mapper 121 | 122 | 123 | def output_mapper(output_name, task, method, software): 124 | """Function decorator for adding output maps from extensions.""" 125 | def _output_mapper(func): 126 | @functools.wraps(func) 127 | def func_wrap(*args, **kwargs): 128 | return func(*args, **kwargs) 129 | key = (task, method, software) 130 | Config.set_output_map(key, output_name, func_wrap) 131 | return func_wrap 132 | return _output_mapper 133 | 134 | 135 | def func_mapper(task, method, software): 136 | """Function decorator for adding function maps from extensions.""" 137 | def _func_mapper(func): 138 | @functools.wraps(func) 139 | def func_wrap(*args, **kwargs): 140 | return func(*args, **kwargs) 141 | key = (task, method, software) 142 | Config.set_func_map(key, func_wrap) 143 | return func_wrap 144 | return _func_mapper 145 | 146 | 147 | def cli_format_mapper(input_name, task, method, software): 148 | """Function decorator for adding CLI arg formatter functions from extensions.""" 149 | def _cli_format_mapper(func): 150 | @functools.wraps(func) 151 | def func_wrap(*args, **kwargs): 152 | return func(*args, **kwargs) 153 | key = (task, method, software) 154 | Config.set_CLI_arg_map(key, input_name, func_wrap) 155 | return func_wrap 156 | return _cli_format_mapper 157 | 158 | 159 | def software_versions(software): 160 | """Function decorator to register an extension function as the function that returns 161 | a dict of pertinent software versions for that extension.""" 162 | def _software_versions(func): 163 | @functools.wraps(func) 164 | def func_wrap(*args, **kwargs): 165 | return func(*args, **kwargs) 166 | Config.set_software_version_func(software, func_wrap) 167 | return func_wrap 168 | return _software_versions 169 | 170 | 171 | def sources_mapper(task, method, software, **sources_dict): 172 | """Function decorator to register an extension function that generate task source 173 | files.""" 174 | def _sources_mapper(func): 175 | @functools.wraps(func) 176 | def func_wrap(*args, **kwargs): 177 | return func(*args, **kwargs) 178 | key = (task, method, software) 179 | Config.set_source_map(key, func_wrap, **sources_dict) 180 | return func_wrap 181 | return _sources_mapper 182 | 183 | 184 | def register_output_file(file_reference, file_name, task, method, software): 185 | key = (task, method, software) 186 | Config.set_output_file_map(key, file_reference, file_name) 187 | -------------------------------------------------------------------------------- /matflow/hicklable.py: -------------------------------------------------------------------------------- 1 | """`matflow.hicklable.py`""" 2 | 3 | import numpy as np 4 | 5 | HICKLABLE_PRIMITIVES = ( 6 | int, 7 | float, 8 | str, 9 | np.ndarray, 10 | np.int32, 11 | np.int64, 12 | type(None), 13 | ) 14 | 15 | 16 | def to_hicklable(obj): 17 | """Get an object representation that can be saved to an HDF5 file using `hickle`. 18 | 19 | Parameters 20 | ---------- 21 | obj : object 22 | Object whose hicklable representation is to be returned. 23 | 24 | """ 25 | 26 | if isinstance(obj, (list, tuple, set)): 27 | obj_valid = [] 28 | for item in obj: 29 | obj_valid.append(to_hicklable(item)) 30 | if isinstance(obj, tuple): 31 | obj_valid = tuple(obj_valid) 32 | elif isinstance(obj, set): 33 | obj_valid = set(obj_valid) 34 | 35 | elif isinstance(obj, dict): 36 | obj_valid = {} 37 | for dct_key, dct_val in obj.items(): 38 | obj_valid.update({dct_key: to_hicklable(dct_val)}) 39 | 40 | elif isinstance(obj, HICKLABLE_PRIMITIVES): 41 | obj_valid = obj 42 | 43 | else: 44 | all_attrs = {} 45 | if hasattr(obj, '__dict__'): 46 | all_attrs.update(getattr(obj, '__dict__')) 47 | if hasattr(obj, '__slots__'): 48 | all_attrs.update({k: getattr(obj, k) for k in getattr(obj, '__slots__') 49 | if k != '__dict__'}) 50 | if not hasattr(obj, '__dict__') and not hasattr(obj, '__slots__'): 51 | raise ValueError(f'Object not understood: {obj}.') 52 | 53 | obj_valid = {} 54 | for attr, value in all_attrs.items(): 55 | obj_valid.update({attr: to_hicklable(value)}) 56 | 57 | return obj_valid 58 | -------------------------------------------------------------------------------- /matflow/models/__init__.py: -------------------------------------------------------------------------------- 1 | """`matflow.models.__init__.py`""" 2 | 3 | from matflow.models.command import Command, CommandGroup 4 | from matflow.models.task import Task, TaskSchema 5 | -------------------------------------------------------------------------------- /matflow/models/command.py: -------------------------------------------------------------------------------- 1 | """`matflow.models.command.py` 2 | 3 | Module containing functionality for executing commands. 4 | 5 | """ 6 | 7 | import copy 8 | 9 | import numpy as np 10 | 11 | from matflow.errors import CommandError 12 | from matflow.utils import dump_to_yaml_string 13 | from matflow.hicklable import to_hicklable 14 | 15 | 16 | def list_formatter(lst): 17 | return ' '.join([f'{i}' for i in lst]) 18 | 19 | 20 | DEFAULT_FORMATTERS = { 21 | str: lambda x: x, 22 | int: lambda number: str(number), 23 | float: lambda number: f'{number:.6f}', 24 | list: list_formatter, 25 | set: list_formatter, 26 | tuple: list_formatter, 27 | } 28 | 29 | 30 | class CommandGroup(object): 31 | """Class to represent a group of commands.""" 32 | 33 | def __init__(self, commands, command_files=None, command_pathways=None): 34 | """ 35 | Parameters 36 | ---------- 37 | all_commands : list of Command objects 38 | command_files : dict, optional 39 | command_pathways : list of dict, optional 40 | 41 | """ 42 | 43 | self.commands = [Command(**i) for i in commands] 44 | self.command_files = command_files or {} 45 | self.command_pathways = command_pathways or [] 46 | 47 | self._validate_command_pathways() 48 | self.resolve_command_pathways() 49 | 50 | @property 51 | def all_commands(self): 52 | return self.commands 53 | 54 | def __repr__(self): 55 | out = f'{self.__class__.__name__}(commands=[' 56 | out += ', '.join([f'{i!r}' for i in self.all_commands]) + ']' 57 | out += ')' 58 | return out 59 | 60 | def __str__(self): 61 | return dump_to_yaml_string(self.as_dict()) 62 | 63 | def as_dict(self): 64 | return to_hicklable(self) 65 | 66 | def check_pathway_conditions(self, inputs_list): 67 | """Check the command pathway conditions are compatible with a list of schema 68 | inputs. 69 | 70 | Parameters 71 | ---------- 72 | inputs_list : list of str 73 | 74 | """ 75 | 76 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways): 77 | condition = cmd_pth.get('condition') 78 | if condition: 79 | bad_keys = set(condition) - set(inputs_list) 80 | if bad_keys: 81 | bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys]) 82 | msg = ((f'Unknown command pathway condition inputs for command ' 83 | f'pathway index {cmd_pth_idx}: {bad_keys_fmt}.')) 84 | raise CommandError(msg) 85 | 86 | def _validate_command_pathways(self): 87 | 88 | if not self.command_pathways: 89 | self.command_pathways = [ 90 | {'commands_idx': list(range(len(self.all_commands)))} 91 | ] 92 | 93 | req_keys = ['commands_idx'] 94 | allowed_keys = req_keys + ['condition', 'commands'] 95 | 96 | # Check the condition list is a list of input labels for this task (have to be invoked by schema) 97 | no_condition_count = 0 98 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways): 99 | 100 | bad_keys = set(cmd_pth) - set(allowed_keys) 101 | miss_keys = set(req_keys) - set(cmd_pth) 102 | 103 | if bad_keys: 104 | bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys]) 105 | msg = ((f'Unknown command pathway keys for command pathway index ' 106 | f'{cmd_pth_idx}: {bad_keys_fmt}.')) 107 | raise CommandError(msg) 108 | 109 | if miss_keys: 110 | miss_keys_fmt = ', '.join(['"{}"'.format(i) for i in miss_keys]) 111 | msg = (f'Missing required command pathway keys for command pathway ' 112 | f'index {cmd_pth_idx}: {miss_keys_fmt}.') 113 | raise CommandError(msg) 114 | 115 | if 'condition' not in cmd_pth: 116 | no_condition_count += 1 117 | 118 | cmds_idx = cmd_pth['commands_idx'] 119 | if ( 120 | not isinstance(cmds_idx, list) or 121 | not all([i in range(len(self.all_commands)) for i in cmds_idx]) 122 | ): 123 | msg = (f'`commands_idx` must be a list of integer indices into ' 124 | f'`all_commands`.') 125 | raise CommandError(msg) 126 | 127 | if no_condition_count > 1: 128 | msg = (f'Only one command pathway may be specified without a `condition` key ' 129 | f'(the default command pathway).') 130 | raise CommandError(msg) 131 | 132 | def resolve_command_pathways(self): 133 | """Add a `commands` list to each `commands_pathway`, according to its 134 | `commands_idx`.""" 135 | 136 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways): 137 | commands = [copy.deepcopy(self.all_commands[i]) 138 | for i in cmd_pth['commands_idx']] 139 | cmd_pth.update({'commands': commands}) 140 | self.resolve_command_files(cmd_pth_idx) 141 | 142 | def resolve_command_files(self, cmd_pathway_idx): 143 | 144 | # Validate command_files dict first: 145 | for cmd_fn_label, cmd_fn in self.command_files.items(): 146 | if not isinstance(cmd_fn, str) or '<>' not in cmd_fn: 147 | msg = ('`command_files` must be a dict that maps a command file label to ' 148 | 'a file name template that must include the substring "<>", ' 149 | 'which is substituted by increasing integers.') 150 | raise CommandError(msg) 151 | 152 | file_names = self.get_command_file_names(cmd_pathway_idx) 153 | 154 | for cmd_idx, command in enumerate(self.get_commands(cmd_pathway_idx)): 155 | 156 | for opt_idx, opt in enumerate(command.options): 157 | for opt_token_idx, opt_token in enumerate(opt): 158 | options_files = file_names['all_commands'][cmd_idx]['options'] 159 | for cmd_fn_label, cmd_fn in options_files.items(): 160 | if f'<<{cmd_fn_label}>>' in opt_token: 161 | new_fmt_opt = opt_token.replace(f'<<{cmd_fn_label}>>', cmd_fn) 162 | command.options[opt_idx][opt_token_idx] = new_fmt_opt 163 | 164 | for param_idx, param in enumerate(command.parameters): 165 | params_files = file_names['all_commands'][cmd_idx]['parameters'] 166 | for cmd_fn_label, cmd_fn in params_files.items(): 167 | if f'<<{cmd_fn_label}>>' in param: 168 | new_param = param.replace(f'<<{cmd_fn_label}>>', cmd_fn) 169 | command.parameters[param_idx] = new_param 170 | 171 | if command.stdin: 172 | stdin_files = file_names['all_commands'][cmd_idx]['stdin'] 173 | for cmd_fn_label, cmd_fn in stdin_files.items(): 174 | if f'<<{cmd_fn_label}>>' in command.stdin: 175 | new_stdin = command.stdin.replace(f'<<{cmd_fn_label}>>', cmd_fn) 176 | command.stdin = new_stdin 177 | 178 | if command.stdout: 179 | new_stdout = command.stdout 180 | stdout_files = file_names['all_commands'][cmd_idx]['stdout'] 181 | for cmd_fn_label, cmd_fn in stdout_files.items(): 182 | if f'<<{cmd_fn_label}>>' in command.stdout: 183 | new_stdout = command.stdout.replace(f'<<{cmd_fn_label}>>', cmd_fn) 184 | command.stdout = new_stdout 185 | 186 | if command.stderr: 187 | stderr_files = file_names['all_commands'][cmd_idx]['stderr'] 188 | for cmd_fn_label, cmd_fn in stderr_files.items(): 189 | if f'<<{cmd_fn_label}>>' in command.stderr: 190 | new_stderr = command.stderr.replace(f'<<{cmd_fn_label}>>', cmd_fn) 191 | command.stderr = new_stderr 192 | 193 | def get_commands(self, cmd_pathway_idx): 194 | return self.command_pathways[cmd_pathway_idx]['commands'] 195 | 196 | def select_command_pathway(self, inputs): 197 | """Get the correct command pathway index, give a set of input names and values. 198 | 199 | Parameters 200 | ---------- 201 | inputs : dict of (str: list) 202 | Dict whose keys are input names and whose values are lists of input values 203 | (i.e. one element for each task sequence item). 204 | 205 | Returns 206 | ------- 207 | cmd_pathway_idx : int 208 | 209 | """ 210 | 211 | # Consider an input defined if any of its values (in the sequence) are not `None`: 212 | inputs_defined = [k for k, v in inputs.items() if any([i is not None for i in v])] 213 | 214 | # Sort pathways by most-specific first: 215 | order_idx = np.argsort([len(i.get('condition', [])) 216 | for i in self.command_pathways])[::-1] 217 | 218 | cmd_pathway_idx = None 219 | for cmd_pth_idx in order_idx: 220 | cmd_pth = self.command_pathways[cmd_pth_idx] 221 | condition = cmd_pth.get('condition', []) 222 | if not (set(condition) - set(inputs_defined)): 223 | # All inputs named in condition are defined 224 | cmd_pathway_idx = cmd_pth_idx 225 | break 226 | 227 | if cmd_pathway_idx is None: 228 | raise CommandError('Could not find suitable command pathway.') 229 | 230 | return cmd_pathway_idx 231 | 232 | def get_command_file_names(self, cmd_pathway_idx): 233 | 234 | out = { 235 | 'input_map': {}, 236 | 'output_map': {}, 237 | 'all_commands': [], 238 | } 239 | 240 | file_name_increments = {k: 0 for k in self.command_files.keys()} 241 | 242 | # Input map should use the first increment: 243 | for cmd_fn_label in self.command_files.keys(): 244 | new_fn = self.command_files[cmd_fn_label].replace( 245 | '<>', 246 | str(file_name_increments[cmd_fn_label]), 247 | ) 248 | out['input_map'].update({cmd_fn_label: new_fn}) 249 | 250 | for command in self.get_commands(cmd_pathway_idx): 251 | 252 | file_names_i = { 253 | 'stdin': {}, 254 | 'options': {}, 255 | 'parameters': {}, 256 | 'stdout': {}, 257 | 'stderr': {}, 258 | } 259 | 260 | cmd_fn_is_incremented = {k: False for k in self.command_files.keys()} 261 | for cmd_fn_label in self.command_files.keys(): 262 | 263 | for opt in command.options_raw: 264 | fmt_opt = list(opt) 265 | for opt_token in fmt_opt: 266 | if f'<<{cmd_fn_label}>>' in opt_token: 267 | new_fn = self.command_files[cmd_fn_label].replace( 268 | '<>', 269 | str(file_name_increments[cmd_fn_label]), 270 | ) 271 | file_names_i['stdin'].update({cmd_fn_label: new_fn}) 272 | 273 | for param in command.parameters_raw: 274 | if f'<<{cmd_fn_label}>>' in param: 275 | new_fn = self.command_files[cmd_fn_label].replace( 276 | '<>', 277 | str(file_name_increments[cmd_fn_label]), 278 | ) 279 | file_names_i['parameters'].update({cmd_fn_label: new_fn}) 280 | 281 | if command.stdin_raw: 282 | if f'<<{cmd_fn_label}>>' in command.stdin_raw: 283 | new_fn = self.command_files[cmd_fn_label].replace( 284 | '<>', 285 | str(file_name_increments[cmd_fn_label]), 286 | ) 287 | file_names_i['stdin'].update({cmd_fn_label: new_fn}) 288 | 289 | if command.stdout_raw: 290 | if f'<<{cmd_fn_label}>>' in command.stdout_raw: 291 | file_name_increments[cmd_fn_label] += 1 292 | cmd_fn_is_incremented[cmd_fn_label] = True 293 | new_fn = self.command_files[cmd_fn_label].replace( 294 | '<>', 295 | str(file_name_increments[cmd_fn_label]), 296 | ) 297 | file_names_i['stdout'].update({cmd_fn_label: new_fn}) 298 | 299 | if command.stderr_raw: 300 | if f'<<{cmd_fn_label}>>' in command.stderr_raw: 301 | if not cmd_fn_is_incremented[cmd_fn_label]: 302 | file_name_increments[cmd_fn_label] += 1 303 | new_fn = self.command_files[cmd_fn_label].replace( 304 | '<>', 305 | str(file_name_increments[cmd_fn_label]), 306 | ) 307 | 308 | if not cmd_fn_is_incremented[cmd_fn_label]: 309 | cmd_fn_is_incremented[cmd_fn_label] = True 310 | file_names_i['stderr'].update({cmd_fn_label: new_fn}) 311 | 312 | out['all_commands'].append(file_names_i) 313 | 314 | # Output map should use the final increment: 315 | for cmd_fn_label in self.command_files.keys(): 316 | new_fn = self.command_files[cmd_fn_label].replace( 317 | '<>', 318 | str(file_name_increments[cmd_fn_label]), 319 | ) 320 | out['output_map'].update({cmd_fn_label: new_fn}) 321 | 322 | return out 323 | 324 | def get_formatted_commands(self, inputs_list, num_cores, cmd_pathway_idx): 325 | """Format commands into strings with hpcflow variable substitutions where 326 | required. 327 | 328 | Parameters 329 | ---------- 330 | inputs_list : list of str 331 | List of input names from which a subset of hpcflow variables may be defined. 332 | num_cores : int 333 | Number of CPU cores to use for this task. This is required to determine 334 | whether a "parallel_mode" should be included in the formatted commands. 335 | cmd_pathway_idx : int 336 | Which command pathway should be returned. 337 | 338 | Returns 339 | ------- 340 | tuple of (fmt_commands, var_names) 341 | fmt_commands : list of dict 342 | Each list item is a dict that contains keys corresponding to an individual 343 | command to be run. 344 | var_names : dict of (str, str) 345 | A dict that maps a parameter name to an hpcflow variable name. 346 | 347 | """ 348 | 349 | fmt_commands = [] 350 | 351 | var_names = {} 352 | for command in self.get_commands(cmd_pathway_idx): 353 | 354 | fmt_opts = [] 355 | for opt in command.options: 356 | fmt_opt = list(opt) 357 | for opt_token_idx, opt_token in enumerate(fmt_opt): 358 | if opt_token in inputs_list: 359 | # Replace with an `hpcflow` variable: 360 | var_name = 'matflow_input_{}'.format(opt_token) 361 | fmt_opt[opt_token_idx] = '<<{}>>'.format(var_name) 362 | if opt_token not in var_names: 363 | var_names.update({opt_token: var_name}) 364 | 365 | fmt_opt_joined = ' '.join(fmt_opt) 366 | fmt_opts.append(fmt_opt_joined) 367 | 368 | fmt_params = [] 369 | for param in command.parameters: 370 | 371 | fmt_param = param 372 | if param in inputs_list: 373 | # Replace with an `hpcflow` variable: 374 | var_name = 'matflow_input_{}'.format(param) 375 | fmt_param = '<<{}>>'.format(var_name) 376 | 377 | if param not in var_names: 378 | var_names.update({param: var_name}) 379 | 380 | fmt_params.append(fmt_param) 381 | 382 | cmd_fmt = ' '.join([command.command] + fmt_opts + fmt_params) 383 | 384 | if command.stdin: 385 | cmd_fmt += ' < {}'.format(command.stdin) 386 | 387 | if command.stdout: 388 | cmd_fmt += ' >> {}'.format(command.stdout) 389 | 390 | if command.stderr: 391 | if command.stderr == command.stdout: 392 | cmd_fmt += ' 2>&1' 393 | else: 394 | cmd_fmt += ' 2>> {}'.format(command.stderr) 395 | 396 | cmd_dict = {'line': cmd_fmt} 397 | if command.parallel_mode and num_cores > 1: 398 | cmd_dict.update({'parallel_mode': command.parallel_mode}) 399 | 400 | fmt_commands.append(cmd_dict) 401 | 402 | return fmt_commands, var_names 403 | 404 | 405 | class Command(object): 406 | """Class to represent a command to be executed by a shell.""" 407 | 408 | def __init__(self, command, options=None, parameters=None, stdin=None, stdout=None, 409 | stderr=None, parallel_mode=None): 410 | 411 | self.command = command 412 | self.parallel_mode = parallel_mode 413 | 414 | # Raw versions may include command file name variables: 415 | self.options_raw = options or [] 416 | self.parameters_raw = parameters or [] 417 | self.stdin_raw = stdin 418 | self.stdout_raw = stdout 419 | self.stderr_raw = stderr 420 | 421 | # Non-raw versions modified by the parent CommandGroup to include any resolved 422 | # command file name: 423 | self.options = copy.deepcopy(self.options_raw) 424 | self.parameters = copy.deepcopy(self.parameters_raw) 425 | self.stdin = self.stdin_raw 426 | self.stdout = self.stdout_raw 427 | self.stderr = self.stderr_raw 428 | 429 | def __repr__(self): 430 | out = f'{self.__class__.__name__}({self.command!r}' 431 | if self.options: 432 | out += f', options={self.options!r}' 433 | if self.parameters: 434 | out += f', parameters={self.parameters!r}' 435 | if self.stdin: 436 | out += f', stdin={self.stdin!r}' 437 | if self.stdout: 438 | out += f', stdout={self.stdout!r}' 439 | if self.stderr: 440 | out += f', stderr={self.stderr!r}' 441 | out += ')' 442 | return out 443 | 444 | def __str__(self): 445 | 446 | cmd_fmt = ' '.join( 447 | [self.command] + 448 | [' '.join(i) for i in self.options] + 449 | self.parameters 450 | ) 451 | 452 | if self.stdin: 453 | cmd_fmt += ' < {}'.format(self.stdin) 454 | if self.stdout: 455 | cmd_fmt += ' > {}'.format(self.stdout) 456 | if self.stderr: 457 | if self.stderr == self.stdout: 458 | cmd_fmt += ' 2>&1' 459 | else: 460 | cmd_fmt += ' 2> {}'.format(self.stderr) 461 | 462 | return cmd_fmt 463 | -------------------------------------------------------------------------------- /matflow/models/element.py: -------------------------------------------------------------------------------- 1 | """matflow.models.element.py""" 2 | 3 | import copy 4 | 5 | import hickle 6 | import h5py 7 | 8 | from matflow.models.parameters import Parameters, Files 9 | 10 | 11 | class Element(object): 12 | 13 | __slots__ = [ 14 | '_task', 15 | '_element_idx', 16 | '_inputs', 17 | '_outputs', 18 | '_files', 19 | '_resource_usage', 20 | ] 21 | 22 | def __init__(self, task, element_idx, inputs_data_idx=None, outputs_data_idx=None, 23 | files_data_idx=None, resource_usage=None): 24 | 25 | self._task = task 26 | self._element_idx = element_idx 27 | self._resource_usage = resource_usage 28 | 29 | self._inputs = Parameters(self, inputs_data_idx) 30 | self._outputs = Parameters(self, outputs_data_idx) 31 | self._files = Files(self, files_data_idx) 32 | 33 | def __repr__(self): 34 | out = ( 35 | f'{self.__class__.__name__}(' 36 | f'inputs={self.inputs!r}, ' 37 | f'outputs={self.outputs!r}, ' 38 | f'files={self.files!r}' 39 | f')' 40 | ) 41 | return out 42 | 43 | @property 44 | def task(self): 45 | return self._task 46 | 47 | @property 48 | def element_idx(self): 49 | return self._element_idx 50 | 51 | @property 52 | def resource_usage(self): 53 | return self._resource_usage 54 | 55 | def as_dict(self): 56 | """Return attributes dict with preceding underscores removed.""" 57 | self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__} 58 | self_dict.pop('task') 59 | self_dict['inputs_data_idx'] = self_dict.pop('inputs').as_dict() 60 | self_dict['outputs_data_idx'] = self_dict.pop('outputs').as_dict() 61 | self_dict['files_data_idx'] = self_dict.pop('files').as_dict() 62 | return self_dict 63 | 64 | def get_parameter_data_idx(self, parameter_name): 65 | try: 66 | out = self.outputs.get_data_idx(parameter_name) 67 | except KeyError: 68 | out = self.inputs.get_data_idx(parameter_name) 69 | 70 | return out 71 | 72 | def get_input_data_idx(self, input_name, safe_name=False): 73 | return self.inputs.get_data_idx(input_name, safe_name) 74 | 75 | def get_output_data_idx(self, output_name, safe_name=False): 76 | return self.outputs.get_data_idx(output_name, safe_name) 77 | 78 | def get_file_data_idx(self, file_name, safe_name=False): 79 | return self.files.get_data_idx(file_name, safe_name) 80 | 81 | def get_input(self, input_name, safe_name=False): 82 | if not safe_name: 83 | input_name = self.inputs.get_name_map()[input_name] 84 | return getattr(self.inputs, input_name) 85 | 86 | def get_output(self, output_name, safe_name=False): 87 | if not safe_name: 88 | output_name = self.outputs.get_name_map()[output_name] 89 | return getattr(self.outputs, output_name) 90 | 91 | def get_file(self, file_name, safe_name=False): 92 | if not safe_name: 93 | file_name = self.files.get_name_map()[file_name] 94 | return getattr(self.files, file_name) 95 | 96 | def get_file_lines(self, file_name, lines_slice=(10,), safe_name=False): 97 | return self.files.get_lines(file_name, lines_slice, safe_name) 98 | 99 | def print_file_lines(self, file_name, lines_slice=(10,), safe_name=False): 100 | self.files.print_lines(file_name, lines_slice, safe_name) 101 | 102 | @property 103 | def inputs(self): 104 | return self._inputs 105 | 106 | @property 107 | def outputs(self): 108 | return self._outputs 109 | 110 | @property 111 | def files(self): 112 | return self._files 113 | 114 | @property 115 | def HDF5_path(self): 116 | return self.task.HDF5_path + f'/\'elements\'/data/data_{self.element_idx}' 117 | 118 | def add_input(self, input_name, value=None, data_idx=None): 119 | return self.inputs.add_parameter(input_name, 'inputs', value, data_idx) 120 | 121 | def add_output(self, output_name, value=None, data_idx=None): 122 | return self.outputs.add_parameter(output_name, 'outputs', value, data_idx) 123 | 124 | def add_file(self, file_name, value=None, data_idx=None): 125 | return self.files.add_parameter(file_name, 'files', value, data_idx) 126 | 127 | def add_resource_usage(self, resource_usage): 128 | 129 | with h5py.File(self.task.workflow.loaded_path, 'r+') as handle: 130 | 131 | # Load and save attributes of parameter index dict: 132 | path = self.HDF5_path + "/'resource_usage'" 133 | attributes = dict(handle[path].attrs) 134 | del handle[path] 135 | 136 | # Dump resource usage: 137 | hickle.dump(resource_usage, handle, path=path) 138 | 139 | # Update dict attributes to maintain /workflow_obj loadability 140 | for k, v in attributes.items(): 141 | handle[path].attrs[k] = v 142 | 143 | def get_element_dependencies(self, recurse=False): 144 | """Get the task/element indices of elements that a given element depends on. 145 | 146 | Parameters 147 | ---------- 148 | recurse : bool, optional 149 | If False, only include task/element indices that are direct dependencies of 150 | the given element. If True, also include task/element indices that indirect 151 | dependencies of the given element. 152 | 153 | Returns 154 | ------- 155 | dict of (int : list) 156 | Dict whose keys are task indices and whose values are lists of element indices 157 | for a given task. 158 | 159 | Notes 160 | ----- 161 | For the inverse, see `get_dependent_elements`. 162 | 163 | """ 164 | 165 | task = self.task 166 | workflow = task.workflow 167 | elem_deps = {} 168 | for inp_alias, ins in workflow.elements_idx[task.task_idx]['inputs'].items(): 169 | if ins['task_idx'][self.element_idx] is not None: 170 | dep_elem_idx = ins['element_idx'][self.element_idx] 171 | # (maybe not needed) 172 | if ins['task_idx'][self.element_idx] not in elem_deps: 173 | elem_deps.update({ins['task_idx'][self.element_idx]: []}) 174 | elem_deps[ins['task_idx'][self.element_idx]].extend(dep_elem_idx) 175 | 176 | if recurse: 177 | new_elem_deps = copy.deepcopy(elem_deps) 178 | for task_idx, element_idx in elem_deps.items(): 179 | for element_idx_i in element_idx: 180 | element_i = workflow.tasks[task_idx].elements[element_idx_i] 181 | add_elem_deps = element_i.get_element_dependencies(recurse=True) 182 | for k, v in add_elem_deps.items(): 183 | if k not in new_elem_deps: 184 | new_elem_deps.update({k: []}) 185 | new_elem_deps[k].extend(v) 186 | 187 | elem_deps = new_elem_deps 188 | 189 | # Remove repeats: 190 | for k, v in elem_deps.items(): 191 | elem_deps[k] = list(set(v)) 192 | 193 | return elem_deps 194 | 195 | def get_dependent_elements(self, recurse=False): 196 | """Get the task/element indices of elements that depend on a given element. 197 | 198 | Parameters 199 | ---------- 200 | recurse : bool, optional 201 | If False, only include task/element indices that depend directly on the given 202 | element. If True, also include task/element indices that depend indirectly on 203 | the given element. 204 | 205 | Returns 206 | ------- 207 | dict of (int : list) 208 | Dict whose keys are task indices and whose values are lists of element indices 209 | for a given task. 210 | 211 | Notes 212 | ----- 213 | For the inverse, see `get_element_dependencies`. 214 | 215 | """ 216 | 217 | task = self.task 218 | workflow = task.workflow 219 | dep_elems = {} 220 | 221 | for task_idx, elems_idx in enumerate(workflow.elements_idx): 222 | for inp_alias, ins in elems_idx['inputs'].items(): 223 | if ins.get('task_idx') == task.task_idx: 224 | for element_idx, i in enumerate(ins['element_idx']): 225 | if self.element_idx in i: 226 | if task_idx not in dep_elems: 227 | dep_elems.update({task_idx: []}) 228 | dep_elems[task_idx].append(element_idx) 229 | 230 | if recurse: 231 | new_dep_elems = copy.deepcopy(dep_elems) 232 | for task_idx, element_idx in dep_elems.items(): 233 | for element_idx_i in element_idx: 234 | element_i = workflow.tasks[task_idx].elements[element_idx_i] 235 | add_elem_deps = element_i.get_dependent_elements(recurse=True) 236 | for k, v in add_elem_deps.items(): 237 | if k not in new_dep_elems: 238 | new_dep_elems.update({k: []}) 239 | new_dep_elems[k].extend(v) 240 | 241 | dep_elems = new_dep_elems 242 | 243 | # Remove repeats: 244 | for k, v in dep_elems.items(): 245 | dep_elems[k] = list(set(v)) 246 | 247 | return dep_elems 248 | 249 | def get_parameter_dependency_value(self, parameter_dependency_name): 250 | 251 | workflow = self.task.workflow 252 | 253 | in_tasks = workflow.get_input_tasks(parameter_dependency_name) 254 | out_tasks = workflow.get_output_tasks(parameter_dependency_name) 255 | elem_deps = self.get_element_dependencies(recurse=True) 256 | 257 | if parameter_dependency_name in self.task.schema.input_names: 258 | param_vals = [self.get_input(parameter_dependency_name)] 259 | 260 | elif out_tasks: 261 | elems = [] 262 | out_tasks_valid = set(out_tasks) & set(elem_deps) 263 | if not out_tasks_valid: 264 | msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of ' 265 | f'given element of task "{self.task.name}".') 266 | raise ValueError(msg) 267 | for task_idx in out_tasks_valid: 268 | for i in elem_deps[task_idx]: 269 | elems.append(workflow.tasks[task_idx].elements[i]) 270 | param_vals = [elem.get_output(parameter_dependency_name) for elem in elems] 271 | 272 | elif in_tasks: 273 | elems = [] 274 | in_tasks_valid = set(in_tasks) & set(elem_deps) 275 | if not in_tasks_valid: 276 | msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of ' 277 | f'given element of task "{self.task.name}".') 278 | raise ValueError(msg) 279 | for task_idx in in_tasks_valid: 280 | for i in elem_deps[task_idx]: 281 | elems.append(workflow.tasks[task_idx].elements[i]) 282 | param_vals = [elem.get_input(parameter_dependency_name) for elem in elems] 283 | else: 284 | msg = (f'Parameter "{parameter_dependency_name}" is not an input or output ' 285 | f'parameter for any workflow task.') 286 | raise ValueError(msg) 287 | 288 | if len(param_vals) == 1: 289 | param_vals = param_vals[0] 290 | 291 | return param_vals 292 | 293 | def get_dependent_parameter_value(self, dependent_parameter_name): 294 | 295 | workflow = self.task.workflow 296 | 297 | out_tasks = workflow.get_output_tasks(dependent_parameter_name) 298 | dep_elems = self.get_dependent_elements(recurse=True) 299 | 300 | if dependent_parameter_name in self.task.schema.outputs: 301 | param_vals = [self.get_output(dependent_parameter_name)] 302 | 303 | elif out_tasks: 304 | elems = [] 305 | out_tasks_valid = set(out_tasks) & set(dep_elems) 306 | if not out_tasks_valid: 307 | msg = (f'Parameter "{dependent_parameter_name}" does not depend on the ' 308 | f'given element of task "{self.task.name}".') 309 | raise ValueError(msg) 310 | for task_idx in out_tasks_valid: 311 | for i in dep_elems[task_idx]: 312 | elems.append(workflow.tasks[task_idx].elements[i]) 313 | param_vals = [elem.get_output(dependent_parameter_name) for elem in elems] 314 | else: 315 | msg = (f'Parameter "{dependent_parameter_name}" is not an output parameter ' 316 | f'for any workflow task.') 317 | raise ValueError(msg) 318 | 319 | if len(param_vals) == 1: 320 | param_vals = param_vals[0] 321 | 322 | return param_vals 323 | -------------------------------------------------------------------------------- /matflow/models/parameters.py: -------------------------------------------------------------------------------- 1 | """matflow.models.parameters.py""" 2 | 3 | import re 4 | import keyword 5 | 6 | import h5py 7 | import hickle 8 | 9 | from matflow.utils import zeropad 10 | 11 | 12 | class Parameters(object): 13 | 14 | """ 15 | Attributes 16 | ---------- 17 | _element : Element 18 | _parameters : dict 19 | Dict mapping the safe names of the parameters to their data indices within the 20 | HDF5 element_idx group. 21 | _name_map : dict 22 | Dict mapping the non-safe names of the parameters to their safe names. A safe name 23 | refers to a name that can be used as a variable name within Python. For example, 24 | spaces and dots are removed from non-safe names to become safe names. The reason 25 | for doing this is to allow the use of dot-notation to access element data/files. 26 | 27 | """ 28 | 29 | def __init__(self, element, parameters): 30 | 31 | self._element = element 32 | self._parameters, self._name_map = self._normalise_params_dict(parameters) 33 | 34 | def __getattr__(self, safe_name): 35 | if safe_name in self._parameters: 36 | wkflow = self._element.task.workflow 37 | names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()} 38 | name = names_inv[safe_name] 39 | data_idx = self.get_data_idx(name) 40 | return wkflow.get_element_data(data_idx) 41 | else: 42 | msg = f'{self.__class__.__name__!r} object has no attribute {safe_name!r}.' 43 | raise AttributeError(msg) 44 | 45 | def __setattr__(self, name, value): 46 | if name in ['_element', '_parameters', '_name_map']: 47 | super().__setattr__(name, value) 48 | else: 49 | raise AttributeError 50 | 51 | def __dir__(self): 52 | return super().__dir__() + list(self._parameters.keys()) 53 | 54 | def __repr__(self): 55 | names_fmt = ', '.join([f'{i!r}' for i in self._parameters.keys()]) 56 | out = f'{self.__class__.__name__}({names_fmt})' 57 | return out 58 | 59 | def _normalise_params_dict(self, parameters): 60 | 61 | normed_data_idx = {} 62 | name_map = {} 63 | for name, v in (parameters or {}).items(): 64 | safe_name = self._normalise_param_name(name, normed_data_idx.keys()) 65 | normed_data_idx.update({safe_name: v}) 66 | name_map.update({name: safe_name}) 67 | 68 | return normed_data_idx, name_map 69 | 70 | @staticmethod 71 | def get_element_data_key(element_idx, param_name): 72 | return f'{zeropad(element_idx, 1000)}_{param_name}' 73 | 74 | @staticmethod 75 | def _normalise_param_name(param_name, existing_names): 76 | """Transform a string so that it is a valid Python variable name.""" 77 | param_name_old = param_name 78 | safe_name = param_name.replace('.', '_dot_').replace(' ', '_').replace('-', '_') 79 | if ( 80 | re.match(r'\d', safe_name) or 81 | safe_name in dir(Parameters) or 82 | keyword.iskeyword(safe_name) or 83 | safe_name in existing_names 84 | ): 85 | safe_name = 'param_' + safe_name 86 | 87 | if re.search(r'[^a-zA-Z0-9_]', safe_name) or not safe_name: 88 | raise ValueError(f'Invalid parameter name: "{param_name_old}".') 89 | 90 | return safe_name 91 | 92 | def as_dict(self): 93 | return self.get_parameters(safe_names=False) 94 | 95 | def get_parameters(self, safe_names=True): 96 | if not safe_names: 97 | names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()} 98 | return {names_inv[safe_name]: v for safe_name, v in self._parameters.items()} 99 | return self._parameters 100 | 101 | def get(self, name, safe_name=False): 102 | if not safe_name: 103 | name = self._name_map[name] 104 | return getattr(self, name) 105 | 106 | def get_all(self, safe_names=False): 107 | return { 108 | k: self.get(k, safe_names) 109 | for k in (self._parameters if safe_names else self._name_map).keys() 110 | } 111 | 112 | def get_element(self): 113 | """Not a property to reduce chance of attribute collisions.""" 114 | return self._element 115 | 116 | def get_name_map(self): 117 | """Not a property to reduce chance of attribute collisions.""" 118 | return self._name_map 119 | 120 | def get_data_idx(self, name, safe_name=False): 121 | if not safe_name: 122 | name = self._name_map[name] 123 | out = self._parameters[name] 124 | if isinstance(out, list): 125 | out = tuple(out) 126 | return out 127 | 128 | def add_parameter(self, name, param_type, value=None, data_idx=None): 129 | 130 | if name in self._name_map: 131 | raise ValueError(f'Parameter "{name}" already exists.') 132 | 133 | safe_name = self._normalise_param_name(name, self._parameters.keys()) 134 | loaded_path = self._element.task.workflow.loaded_path 135 | 136 | with h5py.File(loaded_path, 'r+') as handle: 137 | 138 | if data_idx is None: 139 | # Add data to the `element_data` group if required: 140 | path = '/element_data' 141 | next_idx = len(handle[path]) 142 | element_data_key = self.get_element_data_key(next_idx, name) 143 | new_group = handle[path].create_group(element_data_key) 144 | hickle.dump(value, handle, path=new_group.name) 145 | data_idx = next_idx 146 | 147 | # Load and save attributes of parameter index dict: 148 | path = self._element.HDF5_path + f"/'{param_type}_data_idx'" 149 | attributes = dict(handle[path].attrs) 150 | param_index = hickle.load(handle, path=path) 151 | del handle[path] 152 | 153 | # Update and re-dump parameter index dict: 154 | param_index.update({name: data_idx}) 155 | hickle.dump(param_index, handle, path=path) 156 | 157 | # Update parameter index dict attributes to maintain /workflow_obj loadability 158 | for k, v in attributes.items(): 159 | handle[path].attrs[k] = v 160 | 161 | self._name_map.update({name: safe_name}) 162 | self._parameters.update({safe_name: data_idx}) 163 | 164 | return data_idx 165 | 166 | 167 | class Files(Parameters): 168 | 169 | def get_lines(self, file_name, lines_slice=(1, 10), safe_name=False): 170 | 171 | if not safe_name: 172 | file_name = self.get_name_map()[file_name] 173 | 174 | if not isinstance(lines_slice, slice): 175 | if isinstance(lines_slice, int): 176 | lines_slice = (lines_slice,) 177 | lines_slice = slice(*lines_slice) 178 | 179 | return getattr(self, file_name).split('\n')[lines_slice] 180 | 181 | def print_lines(self, file_name, lines_slice=(1, 10), safe_name=False): 182 | 183 | lns = self.get_lines(file_name, lines_slice, safe_name) 184 | print('\n'.join(lns)) 185 | -------------------------------------------------------------------------------- /matflow/models/software.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import socket 3 | 4 | from matflow.errors import SoftwareInstanceError, MissingSoftwareSourcesError 5 | from matflow.utils import extract_variable_names 6 | 7 | 8 | class SourcesPreparation(object): 9 | 10 | __slots__ = ['_commands', '_env'] 11 | 12 | def __init__(self, commands=None, env=None): 13 | self._commands = commands 14 | self._env = EnvironmentSpec(env) 15 | 16 | def __repr__(self): 17 | return f'{self.__class__.__name__}(commands={self.commands!r}, env={self.env!r})' 18 | 19 | def __bool__(self): 20 | return True if self.commands else False 21 | 22 | @property 23 | def commands(self): 24 | return self._commands 25 | 26 | def get_formatted_commands(self, source_vars, sources_dir, task_idx): 27 | out = [{ 28 | 'line': (f'matflow prepare-sources ' 29 | f'--task-idx={task_idx} ' 30 | f'--iteration-idx=$ITER_IDX') 31 | }] 32 | if self.commands: 33 | for new_cmd in self.commands.splitlines(): 34 | new_cmd = new_cmd.replace('<>', sources_dir) 35 | for src_var_name, src_name in source_vars.items(): 36 | new_cmd = new_cmd.replace(f'<<{src_var_name}>>', src_name) 37 | out.append({'line': new_cmd}) 38 | return out 39 | 40 | @property 41 | def commands_fmt(self): 42 | return [{'line': i} for i in self._commands] 43 | 44 | @property 45 | def env(self): 46 | return self._env 47 | 48 | def as_dict(self): 49 | return {'commands': self.commands, 'env': self.env.value} 50 | 51 | 52 | class AuxiliaryTaskSpec(object): 53 | 54 | __slots__ = ['_env'] 55 | 56 | def __init__(self, env=None): 57 | self._env = EnvironmentSpec(env) 58 | 59 | def __repr__(self): 60 | return f'{self.__class__.__name__}(env={self.env!r})' 61 | 62 | @property 63 | def env(self): 64 | return self._env 65 | 66 | def as_dict(self): 67 | return {'env': self.env.value} 68 | 69 | 70 | class EnvironmentSpec(object): 71 | 72 | __slots__ = ['_value'] 73 | 74 | def __init__(self, value=None): 75 | self._value = value 76 | 77 | def __repr__(self): 78 | return f'{self.__class__.__name__}(value={self.value!r})' 79 | 80 | @property 81 | def value(self): 82 | return self._value 83 | 84 | def as_str(self): 85 | return self.value or '' 86 | 87 | def as_list(self): 88 | return self.as_str().splitlines() 89 | 90 | 91 | class SoftwareInstance(object): 92 | 93 | __slots__ = [ 94 | '_machine', 95 | '_software_friendly', 96 | '_label', 97 | '_env', 98 | '_cores_min', 99 | '_cores_max', 100 | '_cores_step', 101 | '_executable', 102 | '_sources_preparation', 103 | '_options', 104 | '_required_scheduler_options', 105 | '_version_info', 106 | '_task_preparation', 107 | '_task_processing', 108 | ] 109 | 110 | def __init__(self, software, label=None, env=None, cores_min=1, cores_max=1, 111 | cores_step=1, executable=None, sources_preparation=None, options=None, 112 | required_scheduler_options=None, version_info=None, 113 | task_preparation=None, task_processing=None): 114 | """Initialise a SoftwareInstance object. 115 | 116 | Parameters 117 | ---------- 118 | software : str 119 | Name of the software. This is the name that will be exposed as the `SOFTWARE` 120 | attribute of a Matflow extension package. 121 | label : str, optional 122 | Label used to distinguish software instances for the same `software`. For 123 | example, this could be a version string. 124 | env : str, optional 125 | Multi-line string containing commands to be executed by the shell that are 126 | necessary to set up the environment for running this software. 127 | executable : str, optional 128 | The command that represents the executable for running this software. 129 | cores_min : int, optional 130 | Specifies the minimum number (inclusive) of cores this software instance 131 | supports. By default, 1. 132 | cores_max : int, optional 133 | Specifies the maximum number (inclusive) of cores this software instance 134 | supports. By default, 1. 135 | cores_step : int, optional 136 | Specifies the step size from `cores_min` to `cores_max` this software instance 137 | supports. By default, 1. 138 | sources_preparation : dict, optional 139 | Dict containing the following keys: 140 | env : str 141 | Multi-line string containing commands to be executed by the shell that 142 | are necessary to set up the environment for running the preparation 143 | commands. 144 | commands : str 145 | Multi-line string containing commands to be executed within the 146 | preparation `environment` that are necessary to prepare the 147 | executable. For instance, this might contain commands that compile a 148 | source code file into an executable. 149 | options : list of str, optional 150 | Additional software options as string labels that this instance supports. This 151 | can be used to label software instances for which add-ons are loaded. 152 | required_scheduler_options : dict, optional 153 | Scheduler options that are required for using this software instance. 154 | version_info : dict, optional 155 | If an extension does not provide a `software_version` function, then the 156 | version info dict must be specified here. The keys are str names and the 157 | values are dicts that must contain at least a key `version`. 158 | task_preparation : dict, optional 159 | Dict containing the following keys: 160 | env : str 161 | Multi-line string containing commands to be executed by the shell that 162 | are necessary to set up the environment for running 163 | `matflow prepare-task`. 164 | task_processing : dict, optional 165 | Dict containing the following keys: 166 | env : str 167 | Multi-line string containing commands to be executed by the shell that 168 | are necessary to set up the environment for running 169 | `matflow process-task`. 170 | 171 | """ 172 | 173 | self._machine = None # Set once by `set_machine` 174 | 175 | self._software_friendly = software 176 | self._label = label 177 | self._env = EnvironmentSpec(env) 178 | self._cores_min = cores_min 179 | self._cores_max = cores_max 180 | self._cores_step = cores_step 181 | self._sources_preparation = SourcesPreparation(**(sources_preparation or {})) 182 | self._executable = executable 183 | self._options = options or [] 184 | self._required_scheduler_options = required_scheduler_options or {} 185 | self._version_info = version_info or None 186 | self._task_preparation = AuxiliaryTaskSpec(**(task_preparation or {})) 187 | self._task_processing = AuxiliaryTaskSpec(**(task_processing or {})) 188 | 189 | self._validate_num_cores() 190 | self._validate_version_infos() 191 | 192 | def _validate_num_cores(self): 193 | if self.cores_min < 1: 194 | raise SoftwareInstanceError('`cores_min` must be greater than 0.') 195 | if self.cores_min > self.cores_max: 196 | msg = '`cores_max` must be greater than or equal to `cores_min`.' 197 | raise SoftwareInstanceError(msg) 198 | if self.cores_step < 1: 199 | raise SoftwareInstanceError('`cores_step` must be greater than 0.') 200 | 201 | def _validate_version_infos(self): 202 | if self.version_info: 203 | REQUIRED = ['version'] 204 | for k, v in self.version_info.items(): 205 | miss_keys = set(REQUIRED) - set(v.keys()) 206 | if miss_keys: 207 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys]) 208 | msg = (f'Missing required keys in version info dict for name "{k}" ' 209 | f'for software definition "{self.software}": {miss_keys_fmt}.') 210 | raise SoftwareInstanceError(msg) 211 | 212 | def __repr__(self): 213 | return ( 214 | f'{self.__class__.__name__}(' 215 | f'software={self.software!r}, ' 216 | f'label={self.label!r}, ' 217 | f'cores_range={self.cores_range!r}, ' 218 | f'executable={self.executable!r}, ' 219 | f'version_info={self.version_info!r}' 220 | f')' 221 | ) 222 | 223 | def as_dict(self): 224 | """Return attributes dict with preceding underscores removed.""" 225 | self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__} 226 | self_dict['software'] = self_dict.pop('software_friendly') 227 | self_dict['env'] = self_dict['env'].value 228 | self_dict['sources_preparation'] = self_dict['sources_preparation'].as_dict() 229 | self_dict['task_preparation'] = self_dict['task_preparation'].as_dict() 230 | self_dict['task_processing'] = self_dict['task_processing'].as_dict() 231 | return self_dict 232 | 233 | def validate_source_maps(self, task, method, software, all_sources_maps): 234 | """Check that any sources required in the preparation commands or executable are 235 | available in the sources map.""" 236 | 237 | source_vars = self.source_variables 238 | if source_vars: 239 | if (task, method, software) not in all_sources_maps: 240 | msg = (f'No extension defines a sources map for the task "{task}" with ' 241 | f'method "{method}" and software "{software}".') 242 | raise MissingSoftwareSourcesError(msg) 243 | else: 244 | sources_map = all_sources_maps[(task, method, software)] 245 | 246 | for i in source_vars: 247 | if i not in sources_map['sources']: 248 | msg = (f'Source variable name "{i}" is not in the sources map for ' 249 | f'task "{task}" with method "{method}" and software ' 250 | f'"{software}".') 251 | raise MissingSoftwareSourcesError(msg) 252 | 253 | @classmethod 254 | def load_multiple(cls, software_dict=None): 255 | """Load many SoftwareInstance objects from a dict of software instance 256 | definitions. 257 | 258 | Parameters 259 | ---------- 260 | software_dict : dict of (str : dict) 261 | Keys are software names and values are dicts with the following keys: 262 | instances : list of dict 263 | Each element is a dict 264 | instance_defaults : dict, optional 265 | Default values to apply to each dict in the `instances` list. 266 | 267 | Returns 268 | ------- 269 | all_instances : dict of (str : list of SoftwareInstance) 270 | 271 | """ 272 | 273 | software_dict = software_dict or {} 274 | REQUIRED = ['instances'] 275 | ALLOWED = REQUIRED + ['instance_defaults'] 276 | 277 | INST_REQUIRED = ['num_cores'] 278 | INST_DICT_KEYS = [ 279 | 'required_scheduler_options', 280 | 'sources_preparation', 281 | ] 282 | INST_ALLOWED = INST_REQUIRED + INST_DICT_KEYS + [ 283 | 'label', 284 | 'options', 285 | 'env', 286 | 'executable', 287 | 'version_info', 288 | 'task_preparation', 289 | 'task_processing', 290 | ] 291 | 292 | all_instances = {} 293 | for name, definition in software_dict.items(): 294 | 295 | name_friendly = name 296 | name = SoftwareInstance.get_software_safe(name) 297 | 298 | bad_keys = set(definition.keys()) - set(ALLOWED) 299 | miss_keys = set(REQUIRED) - set(definition.keys()) 300 | if bad_keys: 301 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys]) 302 | msg = (f'Unknown keys in software instance definitions for software ' 303 | f'"{name}": {bad_keys_fmt}.') 304 | raise SoftwareInstanceError(msg) 305 | if miss_keys: 306 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys]) 307 | msg = (f'Software instance definitions for software "{name}" are missing ' 308 | f'keys: {miss_keys_fmt}.') 309 | raise SoftwareInstanceError(msg) 310 | 311 | # Merge instance defaults with instance definition: 312 | inst_defs = definition.get('instance_defaults', {}) 313 | all_name_instances = [] 314 | for inst in definition['instances']: 315 | 316 | inst = dict(inst) 317 | inst_merged = dict(copy.deepcopy(inst_defs)) 318 | 319 | for key, val in inst.items(): 320 | if key not in INST_DICT_KEYS: 321 | inst_merged.update({key: val}) 322 | 323 | # Merge values of any `INST_DICT_KEYS` individually. 324 | for key in INST_DICT_KEYS: 325 | if key in inst: 326 | if key not in inst_merged: 327 | inst_merged.update({key: {}}) 328 | for subkey in inst[key]: 329 | inst_merged[key].update({subkey: inst[key][subkey]}) 330 | 331 | bad_keys = set(inst_merged.keys()) - set(INST_ALLOWED) 332 | miss_keys = set(INST_REQUIRED) - set(inst_merged.keys()) 333 | 334 | if bad_keys: 335 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys]) 336 | msg = (f'Unknown keys in software instance definitions for software ' 337 | f'"{name}": {bad_keys_fmt}.') 338 | raise SoftwareInstanceError(msg) 339 | if miss_keys: 340 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys]) 341 | msg = (f'Software instance definitions for software "{name}" are ' 342 | f'missing keys: {miss_keys_fmt}.') 343 | raise SoftwareInstanceError(msg) 344 | 345 | inst_merged['software'] = name_friendly 346 | num_cores = inst_merged.pop('num_cores', None) 347 | cores_min = 1 348 | cores_max = 1 349 | cores_step = 1 350 | if num_cores is not None: 351 | if isinstance(num_cores, (list, tuple)): 352 | if len(num_cores) == 2: 353 | cores_min, cores_max = num_cores 354 | elif len(num_cores) == 3: 355 | cores_min, cores_max, cores_step = num_cores 356 | else: 357 | msg = (f'`num_cores` value not understood in software ' 358 | f'instance definition for software "{name}".') 359 | raise SoftwareInstanceError(msg) 360 | else: 361 | cores_min = num_cores 362 | cores_max = num_cores 363 | cores_step = num_cores 364 | 365 | inst_merged.update({ 366 | 'cores_min': cores_min, 367 | 'cores_max': cores_max, 368 | 'cores_step': cores_step, 369 | }) 370 | 371 | soft_inst = cls(**inst_merged) 372 | soft_inst.set_machine() 373 | all_name_instances.append(soft_inst) 374 | 375 | all_instances.update({name: all_name_instances}) 376 | 377 | return all_instances 378 | 379 | @property 380 | def requires_sources(self): 381 | if ( 382 | ( 383 | self.sources_preparation and 384 | '<>' in self.sources_preparation.commands 385 | ) or 386 | (self.executable and '<>' in self.executable) 387 | ): 388 | return True 389 | else: 390 | return False 391 | 392 | @property 393 | def source_variables(self): 394 | if not self.requires_sources: 395 | return [] 396 | else: 397 | source_vars = [] 398 | if self.sources_preparation: 399 | source_vars += extract_variable_names( 400 | self.sources_preparation.commands, 401 | ['<<', '>>'] 402 | ) 403 | if self.executable: 404 | source_vars += extract_variable_names(self.executable, ['<<', '>>']) 405 | 406 | return list(set(source_vars) - set(['sources_dir'])) 407 | 408 | @property 409 | def software(self): 410 | return self.get_software_safe(self.software_friendly) 411 | 412 | @staticmethod 413 | def get_software_safe(software_name): 414 | return software_name.lower().replace(' ', '_') 415 | 416 | @property 417 | def software_friendly(self): 418 | return self._software_friendly 419 | 420 | @property 421 | def label(self): 422 | return self._label 423 | 424 | @property 425 | def env(self): 426 | return self._env 427 | 428 | @property 429 | def task_preparation(self): 430 | return self._task_preparation 431 | 432 | @property 433 | def task_processing(self): 434 | return self._task_processing 435 | 436 | @property 437 | def cores_min(self): 438 | return self._cores_min 439 | 440 | @property 441 | def cores_max(self): 442 | return self._cores_max 443 | 444 | @property 445 | def cores_step(self): 446 | return self._cores_step 447 | 448 | @property 449 | def cores_range(self): 450 | return range(self.cores_min, self.cores_max + 1, self.cores_step) 451 | 452 | @property 453 | def sources_preparation(self): 454 | return self._sources_preparation 455 | 456 | @property 457 | def executable(self): 458 | return self._executable 459 | 460 | @property 461 | def options(self): 462 | return self._options 463 | 464 | @property 465 | def required_scheduler_options(self): 466 | return self._required_scheduler_options 467 | 468 | @property 469 | def version_info(self): 470 | return self._version_info 471 | 472 | @property 473 | def machine(self): 474 | return self._machine 475 | 476 | @machine.setter 477 | def machine(self, machine): 478 | if self._machine: 479 | raise ValueError('`machine` is already set.') 480 | self._machine = machine 481 | 482 | def set_machine(self): 483 | self.machine = socket.gethostname() 484 | -------------------------------------------------------------------------------- /matflow/profile.py: -------------------------------------------------------------------------------- 1 | """`matflow.profile.py`""" 2 | 3 | from pathlib import Path 4 | 5 | from ruamel import yaml 6 | 7 | from matflow.errors import ProfileError 8 | from matflow.config import Config 9 | 10 | 11 | def parse_workflow_profile(profile_path): 12 | 13 | with Path(profile_path).open() as handle: 14 | profile = yaml.safe_load(handle) 15 | 16 | req_keys = ['name', 'tasks'] 17 | task_globals = ['run_options', 'stats'] 18 | good_keys = req_keys + task_globals + [ 19 | 'extends', 20 | 'archive', 21 | 'archives', 22 | 'archive_excludes', 23 | 'figures', 24 | 'metadata', 25 | 'num_iterations', 26 | 'iterate', 27 | 'import', 28 | 'import_list', # equivalent to 'import'; provides a Python-code-safe variant. 29 | ] 30 | 31 | miss_keys = list(set(req_keys) - set(profile.keys())) 32 | bad_keys = list(set(profile.keys()) - set(good_keys)) 33 | 34 | if miss_keys: 35 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys]) 36 | raise ProfileError(f'Missing keys in profile: {miss_keys_fmt}.') 37 | if bad_keys: 38 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys]) 39 | raise ProfileError(f'Unknown keys in profile: {bad_keys_fmt}.') 40 | 41 | if 'import' in profile and 'import_list' in profile: 42 | raise ProfileError(f'Specify exactly one of `import` and `import_list`. ' 43 | f'These options are functionally equivalent.') 44 | 45 | if 'archive' in profile and 'archives' in profile: 46 | raise ValueError('Specify either `archive` or `archives` but not both. For ' 47 | 'either case, valid values are a string or list of strings.') 48 | elif 'archive' in profile: 49 | profile['archives'] = profile.pop('archive') 50 | elif 'archives' not in profile: 51 | profile['archives'] = [] 52 | 53 | if isinstance(profile['archives'], str): 54 | profile['archives'] = [profile['archives']] 55 | 56 | for i in task_globals: 57 | if i in profile: 58 | # Add to each task if it has none: 59 | for idx, task in enumerate(profile['tasks']): 60 | if i not in task: 61 | profile['tasks'][idx][i] = profile[i] 62 | 63 | workflow_dict = { 64 | 'name': profile['name'], 65 | 'tasks': profile['tasks'], 66 | 'archives': profile['archives'], 67 | 'figures': profile.get('figures'), 68 | 'metadata': {**Config.get('default_metadata'), **profile.get('metadata', {})}, 69 | 'num_iterations': profile.get('num_iterations'), 70 | 'iterate': profile.get('iterate'), 71 | 'extends': profile.get('extends'), 72 | 'archive_excludes': profile.get('archive_excludes'), 73 | 'import_list': profile.get('import') or profile.get('import_list'), 74 | } 75 | 76 | return workflow_dict 77 | -------------------------------------------------------------------------------- /matflow/scripting.py: -------------------------------------------------------------------------------- 1 | """Module containing functionality for generating Python scripts as task sources.""" 2 | 3 | import re 4 | from textwrap import dedent 5 | 6 | import black 7 | import autopep8 8 | 9 | from pkg_resources import resource_string 10 | 11 | 12 | def main_func(func): 13 | """Decorator used to annotate which function within a snippet is the main function.""" 14 | def main_inner(*args, **kwargs): 15 | func(*args, **kwargs) 16 | return main_inner 17 | 18 | 19 | def get_snippet(package_name, snippet_name, decorator=True): 20 | """Get a Python snippet function (as a string) from the snippets directory.""" 21 | out = resource_string(package_name, f'snippets/{snippet_name}').decode() 22 | if not decorator: 23 | # Remove the `@main_func` decorator and import. 24 | remove_lns = ['from matflow.scripting import main_func', '@main_func'] 25 | for i in remove_lns: 26 | out = ''.join(out.split(i)) 27 | return out 28 | 29 | 30 | def parse_python_func_return(func_str): 31 | """Get a list of the variable names in a Python function return statement. 32 | 33 | The return statement may return a tuple (with parenthesis or not) or a single variable. 34 | 35 | """ 36 | 37 | out = [] 38 | match = re.search(r'return \(*([\S\s][^\)]+)\)*', func_str) 39 | if match: 40 | match_clean = match.group(1).strip().strip(',') 41 | out = [i.strip() for i in match_clean.split(',')] 42 | 43 | return out 44 | 45 | 46 | def parse_python_func_imports(func_str): 47 | """Get a list of import statement lines from a (string) Python function.""" 48 | 49 | import_lines = func_str.split('def ')[0].strip() 50 | match = re.search(r'((?:import|from)[\S\s]*)', import_lines) 51 | out = [] 52 | if match: 53 | out = match.group(1).splitlines() 54 | 55 | return out 56 | 57 | 58 | def extract_snippet_main(snippet_str): 59 | """Extract only the snippet main function (plus imports), as annotated by the 60 | `@mainfunc` decorator.""" 61 | 62 | func_start_pat = r'((?:@main_func\n)?def\s(?:.*)\((?:[\s\S]*?)\):)' 63 | 64 | func_split_snip = re.split(func_start_pat, snippet_str) 65 | imports = func_split_snip[0] 66 | main_func_dec_str = '@main_func' 67 | 68 | main_func_str = None 69 | for idx in range(1, len(func_split_snip[1:]), 2): 70 | func_str = func_split_snip[idx] + func_split_snip[idx + 1] 71 | if main_func_dec_str in func_str: 72 | if main_func_str: 73 | msg = (f'`{main_func_dec_str}` should decorate only one function within ' 74 | f'the snippet.') 75 | raise ValueError(msg) 76 | else: 77 | main_func_str = func_str.lstrip(f'{main_func_dec_str}\n') 78 | 79 | imports = ''.join(imports.split('from matflow_defdap import main_func')) 80 | 81 | return imports + '\n' + main_func_str 82 | 83 | 84 | def get_snippet_signature(package_name, script_name): 85 | """Get imports, inputs and outputs of a Python snippet function.""" 86 | 87 | snippet_str = get_snippet(package_name, script_name) 88 | snippet_str = extract_snippet_main(snippet_str) 89 | 90 | def_line = re.search(r'def\s(.*)\(([\s\S]*?)\):', snippet_str).groups() 91 | func_name = def_line[0] 92 | func_ins = [i.strip() for i in def_line[1].split(',') if i.strip()] 93 | 94 | if script_name != func_name + '.py': 95 | msg = ('For simplicity, the snippet main function name should be the same as the ' 96 | 'snippet file name.') 97 | raise ValueError(msg) 98 | 99 | func_outs = parse_python_func_return(snippet_str) 100 | func_imports = parse_python_func_imports(snippet_str) 101 | 102 | out = { 103 | 'name': func_name, 104 | 'imports': func_imports, 105 | 'inputs': func_ins, 106 | 'outputs': func_outs, 107 | } 108 | return out 109 | 110 | 111 | def get_snippet_call(package_name, script_name): 112 | sig = get_snippet_signature(package_name, script_name) 113 | outs_fmt = ', '.join(sig['outputs']) 114 | ins_fmt = ', '.join(sig['inputs']) 115 | ret = f'{sig["name"]}({ins_fmt})' 116 | if outs_fmt: 117 | ret = f'{outs_fmt} = {ret}' 118 | return ret 119 | 120 | 121 | def get_wrapper_script(package_name, script_name, snippets, outputs): 122 | 123 | ind = ' ' 124 | sigs = [get_snippet_signature(package_name, i['name']) for i in snippets] 125 | all_ins = [j for i in sigs for j in i['inputs']] 126 | all_outs = [j for i in sigs for j in i['outputs']] 127 | 128 | print(f'all_ins: {all_ins}') 129 | print(f'all_outs: {all_outs}') 130 | 131 | for i in outputs: 132 | if i not in all_outs: 133 | raise ValueError(f'Cannot output "{i}". No functions return this name.') 134 | 135 | # Required inputs are those that are not output by any snippet 136 | req_ins = list(set(all_ins) - set(all_outs)) 137 | req_ins_fmt = ', '.join(req_ins) 138 | 139 | print(f'req_ins: {req_ins}') 140 | print(f'req_ins_fmt: {req_ins_fmt}') 141 | 142 | main_sig = [f'def main({req_ins_fmt}):'] 143 | main_body = [ind + get_snippet_call(package_name, i['name']) for i in snippets] 144 | main_outs = ['\n' + ind + f'return {", ".join([i for i in outputs])}'] 145 | main_func = main_sig + main_body + main_outs 146 | 147 | req_imports = [ 148 | 'import sys', 149 | 'import hickle', 150 | 'from pathlib import Path', 151 | ] 152 | out = req_imports 153 | out += main_func 154 | snippet_funcs = '\n'.join([get_snippet(package_name, i['name'], decorator=False) 155 | for i in snippets]) 156 | 157 | out = '\n'.join(out) + '\n' + snippet_funcs + '\n' 158 | out += dedent('''\ 159 | if __name__ == '__main__': 160 | inputs = hickle.load(sys.argv[1]) 161 | outputs = main(**inputs) 162 | hickle.dump(outputs, 'outputs.hdf5') 163 | 164 | ''') 165 | 166 | print(f'out 1: \n----------\n{out}\n----------\n') 167 | out = autopep8.fix_code(out) 168 | print(f'out 2: \n----------\n{out}\n----------\n') 169 | out = black.format_str(out, mode=black.FileMode()) 170 | print(f'out 3: \n----------\n{out}\n----------\n') 171 | 172 | return out 173 | -------------------------------------------------------------------------------- /matflow/utils.py: -------------------------------------------------------------------------------- 1 | """`matflow.utils.py`""" 2 | 3 | import os 4 | import sys 5 | import io 6 | import collections 7 | import copy 8 | import itertools 9 | import h5py 10 | import numpy as np 11 | import random 12 | import re 13 | import time 14 | from contextlib import redirect_stdout, contextmanager 15 | from datetime import datetime 16 | from pathlib import Path 17 | 18 | from ruamel.yaml import YAML 19 | 20 | 21 | def parse_times(format_str): 22 | """Parse a string which contain time format code and one or 23 | more `%%r` to represent a random digit from 0 to 9.""" 24 | 25 | time_parsed = time.strftime(format_str) 26 | rnd_all = '' 27 | while '%r' in time_parsed: 28 | rnd = str(random.randint(0, 9)) 29 | rnd_all += rnd 30 | time_parsed = time_parsed.replace('%r', rnd, 1) 31 | 32 | return time_parsed, rnd_all 33 | 34 | 35 | def zeropad(num, largest): 36 | """Return a zero-padded string of a number, given the largest number. 37 | 38 | TODO: want to support floating-point numbers as well? Or rename function 39 | accordingly. 40 | 41 | Parameters 42 | ---------- 43 | num : int 44 | The number to be formatted with zeros padding on the left. 45 | largest : int 46 | The number that determines the number of zeros to pad with. 47 | 48 | Returns 49 | ------- 50 | padded : str 51 | The original number, `num`, formatted as a string with zeros added 52 | on the left. 53 | 54 | """ 55 | 56 | num_digits = len('{:.0f}'.format(largest)) 57 | padded = '{0:0{width}}'.format(num, width=num_digits) 58 | 59 | return padded 60 | 61 | 62 | def combine_list_of_dicts(a): 63 | 64 | a = copy.deepcopy(a) 65 | 66 | for i in range(1, len(a)): 67 | update_dict(a[0], a[i]) 68 | 69 | return a[0] 70 | 71 | 72 | def update_dict(base, upd): 73 | """Update an arbitrarily-nested dict.""" 74 | 75 | for key, val in upd.items(): 76 | if isinstance(base, collections.Mapping): 77 | if isinstance(val, collections.Mapping): 78 | r = update_dict(base.get(key, {}), val) 79 | base[key] = r 80 | else: 81 | base[key] = upd[key] 82 | else: 83 | base = {key: upd[key]} 84 | 85 | return base 86 | 87 | 88 | def nest_lists(my_list): 89 | """ 90 | `a` is a list of `N` sublists. 91 | 92 | E.g. 93 | my_list = [ 94 | [1,2], 95 | [3,4,5], 96 | [6,7] 97 | ] 98 | 99 | returns a list of lists of length `N` such that all combinations of elements from sublists in 100 | `a` are found 101 | E.g 102 | out = [ 103 | [1, 3, 6], 104 | [1, 3, 7], 105 | [1, 4, 6], 106 | [1, 4, 7], 107 | [1, 5, 6], 108 | [1, 5, 7], 109 | [2, 3, 6], 110 | [2, 3, 7], 111 | [2, 4, 6], 112 | [2, 4, 7], 113 | [2, 5, 6], 114 | [2, 5, 7] 115 | ] 116 | 117 | """ 118 | 119 | N = len(my_list) 120 | sub_len = [len(i) for i in my_list] 121 | 122 | products = np.array([1] * (N + 1)) 123 | for i in range(len(my_list) - 1, -1, -1): 124 | products[:i + 1] *= len(my_list[i]) 125 | 126 | out = [[None for x in range(N)] for y in range(products[0])] 127 | 128 | for row_idx, row in enumerate(out): 129 | 130 | for col_idx, col in enumerate(row): 131 | 132 | num_repeats = products[col_idx + 1] 133 | sub_list_idx = int(row_idx / num_repeats) % len(my_list[col_idx]) 134 | out[row_idx][col_idx] = copy.deepcopy( 135 | my_list[col_idx][sub_list_idx]) 136 | 137 | return out 138 | 139 | 140 | def repeat(lst, reps): 141 | """Repeat 1D list elements.""" 142 | return list(itertools.chain.from_iterable(itertools.repeat(x, reps) for x in lst)) 143 | 144 | 145 | def tile(lst, tiles): 146 | """Tile a 1D list.""" 147 | return lst * tiles 148 | 149 | 150 | def index(lst, idx): 151 | """Get elements of a list.""" 152 | return [lst[i] for i in idx] 153 | 154 | 155 | def arange(size): 156 | """Get 1D list of increasing integers.""" 157 | return list(range(size)) 158 | 159 | 160 | def extend_index_list(lst, repeats): 161 | """Extend an integer index list by repeating some number of times such that the extra 162 | indices added are new and follow the same ordering as the existing elements. 163 | 164 | Parameters 165 | ---------- 166 | lst : list of int 167 | repeats : int 168 | 169 | Returns 170 | ------- 171 | new_idx : list of int 172 | Returned list has length `len(lst) * repeats`. 173 | 174 | Examples 175 | -------- 176 | >>> extend_index_list([0, 1, 2], 2) 177 | [0, 1, 2, 3, 4, 5] 178 | 179 | >>> extend_index_list([0, 0, 1, 1], 3) 180 | [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5] 181 | 182 | >>> extend_index_list([4, 1, 2], 2) 183 | [4, 1, 2, 8, 5, 6] 184 | 185 | """ 186 | 187 | new_idx = [] 188 | for i in lst: 189 | if i < 0: 190 | raise ValueError('List elements must be positive or zero.') 191 | new_idx.append(i) 192 | 193 | for _ in range(repeats - 1): 194 | next_avail_idx = max(new_idx) + 1 195 | new_idx.extend([next_avail_idx + i - min(lst) for i in lst]) 196 | 197 | return new_idx 198 | 199 | 200 | def flatten_list(lst): 201 | """Flatten a list of lists. 202 | 203 | Parameters 204 | ---------- 205 | lst : list of list 206 | 207 | Returns 208 | ------- 209 | list 210 | 211 | Examples 212 | -------- 213 | >>> flatten_list([[0, 2, 4], [9, 1]]) 214 | [0, 2, 4, 9, 1] 215 | 216 | """ 217 | return [j for i in lst for j in i] 218 | 219 | 220 | def to_sub_list(lst, sub_list_len): 221 | """Transform a list into a list of sub lists of certain size. 222 | 223 | Parameters 224 | ---------- 225 | lst : list 226 | List to transform into a list of sub-lists. 227 | sub_list_len : int 228 | Size of sub-lists. Must be an integer factor of the length of the 229 | original list, `lst`. 230 | 231 | Returns 232 | ------- 233 | list of list 234 | 235 | Examples 236 | -------- 237 | >>> to_sub_list([0, 1, 2, 3], 2) 238 | [[0, 1], [2, 3]] 239 | 240 | """ 241 | 242 | if (sub_list_len <= 0) or (len(lst) % sub_list_len != 0): 243 | raise ValueError('`sub_list_len` must be a positive factor of `len(lst)`.') 244 | out = [lst[(i * sub_list_len):((i * sub_list_len) + sub_list_len)] 245 | for i in range(len(lst) // sub_list_len)] 246 | return out 247 | 248 | 249 | def datetime_to_dict(dt): 250 | return { 251 | 'year': dt.year, 252 | 'month': dt.month, 253 | 'day': dt.day, 254 | 'hour': dt.hour, 255 | 'minute': dt.minute, 256 | 'second': dt.second, 257 | 'microsecond': dt.microsecond, 258 | } 259 | 260 | 261 | def dump_to_yaml_string(data): 262 | yaml = YAML() 263 | yaml.indent(mapping=2, sequence=4, offset=2) 264 | with redirect_stdout(io.StringIO()) as buffer: 265 | yaml.dump(data, sys.stdout) 266 | output = buffer.getvalue() 267 | return output 268 | 269 | 270 | def get_specifier_dict(key, name_key=None, base_key=None, defaults=None, 271 | list_specifiers=None, cast_types=None): 272 | """Resolve a string key with additional specifiers using square-brackets into a dict. 273 | 274 | Parameters 275 | ---------- 276 | key : str or dict 277 | name_key : str 278 | base_key : str 279 | defaults : dict 280 | list_specifiers : list of str 281 | Any specifier in this list will be added to the returned dict as a list element. 282 | cast_types : dict 283 | Dict of (key: type) to cast those keys' values to. 284 | 285 | Returns 286 | ------- 287 | dict 288 | 289 | Examples 290 | -------- 291 | >>> get_specifier_dict( 292 | 'parameter_1[hey, label_2=hi]', 293 | name_key='param_name', 294 | base_key='label_1', 295 | defaults={'a': 1}, 296 | ) 297 | { 298 | 'param_name': 'parameter_1', 299 | 'label_1': 'hey' 300 | 'label_2': 'hi', 301 | 'a': 1, 302 | } 303 | 304 | """ 305 | 306 | list_specifiers = list_specifiers or [] 307 | cast_types = cast_types or {} 308 | out = {} 309 | 310 | if isinstance(key, str): 311 | 312 | if name_key is None: 313 | raise TypeError('`name_key` must be specified.') 314 | 315 | match = re.search(r'([\w\-\s]+)(\[(.*?)\])*', key) 316 | name = match.group(1) 317 | out.update({name_key: name}) 318 | 319 | specifiers_str = match.group(3) 320 | if specifiers_str: 321 | base_keys = [] 322 | for s in specifiers_str.split(','): 323 | if not s: 324 | continue 325 | if '=' in s: 326 | s_key, s_val = [i.strip() for i in s.split('=')] 327 | if s_key in list_specifiers: 328 | if s_key in out: 329 | out[s_key].append(s_val) 330 | else: 331 | out[s_key] = [s_val] 332 | else: 333 | if s_key in out: 334 | raise ValueError( 335 | f'Specifier "{s_key}" multiply defined. Add this ' 336 | f'specifier to `list_specifiers` to add multiple values ' 337 | f'to the returned dict (in a list).' 338 | ) 339 | out.update({s_key: s_val}) 340 | else: 341 | base_keys.append(s.strip()) 342 | 343 | if len(base_keys) > 1: 344 | raise ValueError('Only one specifier may be specified without a key.') 345 | 346 | if base_keys: 347 | if base_key is None: 348 | raise ValueError('Base key found but `base_key` name not specified.') 349 | out.update({base_key: base_keys[0]}) 350 | 351 | elif isinstance(key, dict): 352 | out.update(key) 353 | 354 | else: 355 | raise TypeError('`key` must be a dict or str to allow specifiers to be resolved.') 356 | 357 | for k, v in (defaults or {}).items(): 358 | if k not in out: 359 | out[k] = copy.deepcopy(v) 360 | 361 | for key, cast_type in cast_types.items(): 362 | if key in out: 363 | if cast_type is bool: 364 | new_val = cast_bool(out[key]) 365 | else: 366 | new_val = cast_type(out[key]) 367 | out[key] = new_val 368 | 369 | return out 370 | 371 | 372 | def extract_variable_names(source_str, delimiters): 373 | """Given a specified syntax for embedding variable names within a string, 374 | extract all variable names. 375 | 376 | Parameters 377 | ---------- 378 | source_str : str 379 | The string within which to search for variable names. 380 | delimiters : two-tuple of str 381 | The left and right delimiters of a variable name. 382 | 383 | Returns 384 | ------- 385 | var_names : list of str 386 | The variable names embedded in the original string. 387 | 388 | """ 389 | 390 | delim_esc = [re.escape(i) for i in delimiters] 391 | pattern = delim_esc[0] + r'(.\S+?)' + delim_esc[1] 392 | var_names = re.findall(pattern, source_str) 393 | 394 | return var_names 395 | 396 | 397 | def get_nested_item(obj, address): 398 | out = obj 399 | for i in address: 400 | out = out[i] 401 | return out 402 | 403 | 404 | def get_workflow_paths(base_dir, quiet=True): 405 | base_dir = Path(base_dir) 406 | wkflows = [] 407 | for i in base_dir.glob('**/*'): 408 | if i.name == 'workflow.hdf5': 409 | wk_full_path = i 410 | wk_rel_path = wk_full_path.relative_to(base_dir) 411 | wk_disp_path = wk_rel_path.parent 412 | with h5py.File(wk_full_path, 'r') as handle: 413 | try: 414 | try: 415 | handle["/workflow_obj/data/'figures'"] 416 | except KeyError: 417 | if not quiet: 418 | print(f'No "figures" key for workflow: {wk_disp_path}.') 419 | continue 420 | timestamp_path = "/workflow_obj/data/'history'/data/data_0/'timestamp'/data" 421 | timestamp_dict = {k[1:-1]: v['data'][()] 422 | for k, v in handle[timestamp_path].items()} 423 | timestamp = datetime(**timestamp_dict) 424 | wkflows.append({ 425 | 'ID': handle.attrs['workflow_id'], 426 | 'full_path': str(wk_full_path), 427 | 'display_path': str(wk_disp_path), 428 | 'timestamp': timestamp, 429 | 'display_timestamp': timestamp.strftime(r'%Y-%m-%d %H:%M:%S'), 430 | }) 431 | except: 432 | if not quiet: 433 | print(f'No timestamp for workflow: {wk_disp_path}') 434 | return wkflows 435 | 436 | 437 | def order_workflow_paths_by_date(workflow_paths): 438 | return sorted(workflow_paths, key=lambda x: x['timestamp']) 439 | 440 | 441 | def nested_dict_arrays_to_list(obj): 442 | if isinstance(obj, np.ndarray): 443 | obj = obj.tolist() 444 | elif isinstance(obj, dict): 445 | for key, val in obj.items(): 446 | obj[key] = nested_dict_arrays_to_list(val) 447 | return obj 448 | 449 | 450 | def move_element_forward(lst, index, position, return_map=True): 451 | """Move a list element forward in the list to a new index position.""" 452 | 453 | if index > position: 454 | raise ValueError('`index` cannot be larger than `position`, since that would ' 455 | 'not be a "forward" move!') 456 | 457 | if position > len(lst) - 1: 458 | raise ValueError('`position` must be a valid list index.') 459 | 460 | sub_list_1 = lst[:position + 1] 461 | sub_list_2 = lst[position + 1:] 462 | elem = sub_list_1.pop(index) 463 | out = sub_list_1 + [elem] + sub_list_2 464 | 465 | # Indices to the left of the element that is to be moved do not change: 466 | idx_map_left = {i: i for i in range(0, index)} 467 | 468 | # The index of the moved element changes to `position` 469 | idx_map_element = {index: position} 470 | 471 | # Indicies to the right of the element up to the new position are decremented: 472 | idx_map_middle = {i: i - 1 for i in range(index + 1, position + 1)} 473 | 474 | # Indices to the right of the new position do not change: 475 | idx_map_right = {i: i for i in range(position + 1, len(lst))} 476 | 477 | idx_map = { 478 | **idx_map_left, 479 | **idx_map_element, 480 | **idx_map_middle, 481 | **idx_map_right 482 | } 483 | 484 | if return_map: 485 | return out, idx_map 486 | else: 487 | return out 488 | 489 | 490 | def cast_bool(bool_str): 491 | if isinstance(bool_str, bool): 492 | return bool_str 493 | elif bool_str.lower() == 'true': 494 | return True 495 | elif bool_str.lower() == 'false': 496 | return False 497 | else: 498 | raise ValueError(f'"{bool_str}" cannot be cast to True or False.') 499 | 500 | 501 | @contextmanager 502 | def working_directory(path): 503 | """Change to a working directory and return to previous working directory on exit.""" 504 | prev_cwd = Path.cwd() 505 | os.chdir(path) 506 | try: 507 | yield 508 | finally: 509 | os.chdir(prev_cwd) 510 | -------------------------------------------------------------------------------- /matflow/validation.py: -------------------------------------------------------------------------------- 1 | 2 | import inspect 3 | 4 | from matflow.errors import UnsatisfiedSchemaError 5 | 6 | 7 | def validate_input_mapper_func(func, task_inputs): 8 | """Using `inspect`, validate an input mapper callable from a Matflow extension. 9 | 10 | Parameters 11 | ---------- 12 | func : callable 13 | task_inputs : list of str 14 | List of the input name aliases associated with the task schema. 15 | 16 | Notes 17 | ----- 18 | Checks performed on `func`: 19 | - check the first argument is named "path"; raise `TypeError` if not; 20 | - check for one or more additional arguments which are named according to 21 | a subset of task parameters (passed in `task_inputs`). 22 | 23 | """ 24 | 25 | func_params = inspect.signature(func).parameters 26 | 27 | # Check first argument must be "path": 28 | first_arg = list(func_params.items())[0] 29 | if first_arg[0] != 'path': 30 | msg = (f'The first parameter of an input mapper function must be "path" ' 31 | f'but for {func.__name__} is actually "{first_arg[0]}".') 32 | raise TypeError(msg) 33 | else: 34 | # Remove "path" from argument list, for further analysis: 35 | func_params = dict(func_params) 36 | del func_params[first_arg[0]] 37 | 38 | bad_params = list(set(func_params) - set(task_inputs)) 39 | if bad_params: 40 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params]) 41 | msg = (f'The following arguments to the input mapper function "{func.__name__}" ' 42 | f'are not known by the schema: {bad_params_fmt}.') 43 | raise TypeError(msg) 44 | 45 | 46 | def validate_output_mapper_func(func, num_file_paths, option_names, input_names): 47 | """Using `inspect`, validate an output mapper callable from a Matflow extension. 48 | 49 | Parameters 50 | ---------- 51 | func : callable 52 | num_file_paths : int 53 | Number of output files specified in the schema's output map. 54 | option_names : list of str 55 | List of the names of output map options. 56 | input_names : list of str 57 | List of the names of output map inputs. 58 | 59 | Notes 60 | ----- 61 | Checks performed on `func`: 62 | - After the first `num_file_paths` arguments, check the remaining arguments names 63 | coincide exactly with `option_names` + `inputs`. 64 | 65 | """ 66 | 67 | func_params = inspect.signature(func).parameters 68 | 69 | # Check num args first 70 | exp_num_params = num_file_paths + len(option_names) + len(input_names) 71 | if len(func_params) != exp_num_params: 72 | msg = ( 73 | f'The output mapper function "{func.__name__}" does not have the expected ' 74 | f'number of arguments: found {len(func_params)} but expected ' 75 | f'{exp_num_params} ({num_file_paths} file path(s) + {len(option_names)} ' 76 | f'options parameters + {len(input_names)} inputs).' 77 | ) 78 | raise TypeError(msg) 79 | 80 | # Check option names: 81 | params = list(func_params.items())[num_file_paths:] 82 | params_func = [i[0] for i in params] 83 | 84 | miss_params = list(set(option_names + input_names) - set(params_func)) 85 | bad_params = list(set(params_func) - set(option_names + input_names)) 86 | 87 | if bad_params: 88 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params]) 89 | msg = (f'The following arguments in the output mapper function "{func.__name__}" ' 90 | f'are not output map options or inputs: {bad_params_fmt}.') 91 | raise TypeError(msg) 92 | 93 | if miss_params: 94 | miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params]) 95 | msg = (f'The following output mapper options and/or inputs are missing from the ' 96 | f'signature of the output mapper function "{func.__name__}": ' 97 | f'{miss_params_fmt}.') 98 | raise TypeError(msg) 99 | 100 | 101 | def validate_func_mapper_func(func, task_inputs): 102 | """Using `inspect`, validate an input mapper callable from a Matflow extension. 103 | 104 | Parameters 105 | ---------- 106 | func : callable 107 | task_inputs : list of str 108 | List of the input name aliases associated with the task schema. 109 | 110 | Notes 111 | ----- 112 | Checks performed on `func`: 113 | - check function arguments are named according to all task parameters (passed in 114 | `task_inputs`). 115 | 116 | """ 117 | 118 | func_params = inspect.signature(func).parameters 119 | 120 | bad_params = list(set(func_params) - set(task_inputs)) 121 | miss_params = list(set(task_inputs) - set(func_params)) 122 | 123 | if bad_params: 124 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params]) 125 | msg = (f'The function mapper function "{func.__name__}" contains the following ' 126 | f'arguments that are not consistent with the schema: {bad_params_fmt}.') 127 | raise TypeError(msg) 128 | 129 | if miss_params: 130 | miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params]) 131 | msg = (f'The following task inputs are missing from the signature of the ' 132 | f'function mapper function "{func.__name__}": {miss_params_fmt}.') 133 | raise TypeError(msg) 134 | 135 | 136 | def validate_task_schemas(task_schemas, task_input_map, task_output_map, task_func_map): 137 | """ 138 | Determine whether each task schema is valid. 139 | 140 | Parameters 141 | ---------- 142 | task_schemas : dict of (tuple : TaskSchema) 143 | Dict keys are (task_name, task_method, software). 144 | task_input_map : dict of (tuple : dict of (str : callable)) 145 | Outer dict keys are (task_name, task_method, software); inner dicts map a string 146 | input file name to a MatFlow extension callable which writes that input file. 147 | task_output_map : dict of (tuple : dict of (str : callable)) 148 | Outer dict keys are (task_name, task_method, software); inner dicts map a string 149 | output name to a MatFlow extension callable which return that output. 150 | task_func_map : dict of (tuple : callable) 151 | Dict keys are (task_name, task_method, software); values are MatFlow extension 152 | callables. 153 | 154 | Returns 155 | ------- 156 | schema_is_valid : dict of (tuple : tuple of (bool, str)) 157 | Dict keys are (task_name, task_method, software); values are tuples whose first 158 | values are boolean values indicating if a given schema is valid. If False, this 159 | indicates that one of extension functions (input map, output map or function map) 160 | is missing. Note that this function does not raise any exception in this case --- 161 | but the task schema will be noted as invalid. The second value of the dict value 162 | tuple is a string description of the reason why the schema is invalid. 163 | 164 | Raises 165 | ------ 166 | UnsatisfiedSchemaError 167 | Raised if any of the extension callables (input/output/func maps) are not 168 | consistent with their associated task schema. 169 | 170 | """ 171 | 172 | schema_is_valid = {} 173 | 174 | for key, schema in task_schemas.items(): 175 | 176 | schema_is_valid.update({key: (True, '')}) 177 | 178 | key_msg = (f'Unresolved task schema for task "{schema.name}" with method ' 179 | f'"{schema.method}" and software "{schema.implementation}".') 180 | 181 | for inp_map in schema.input_map: 182 | 183 | extension_inp_maps = task_input_map.get(key) 184 | msg = ( 185 | f'{key_msg} No matching extension function found for the input ' 186 | f'map that generates the input file "{inp_map["file"]}".' 187 | ) 188 | 189 | if not extension_inp_maps: 190 | reason = (f'No input map function found for input map that generates file' 191 | f' "{inp_map["file"]}". ') 192 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason) 193 | continue 194 | else: 195 | inp_map_func = extension_inp_maps.get(inp_map['file']) 196 | if not inp_map_func: 197 | raise UnsatisfiedSchemaError(msg) 198 | 199 | # Validate signature of input map function: 200 | try: 201 | validate_input_mapper_func(inp_map_func, inp_map['inputs']) 202 | except TypeError as err: 203 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None 204 | 205 | for out_map in schema.output_map: 206 | 207 | extension_out_maps = task_output_map.get(key) 208 | msg = ( 209 | f'{key_msg} No matching extension function found for the output ' 210 | f'map that generates the output "{out_map["output"]}".' 211 | ) 212 | 213 | if not extension_out_maps: 214 | reason = (f'No output map function found for output map that generates ' 215 | f'output "{out_map["output"]}". ') 216 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason) 217 | continue 218 | else: 219 | out_map_func = extension_out_maps.get(out_map['output']) 220 | if not out_map_func: 221 | raise UnsatisfiedSchemaError(msg) 222 | 223 | # Validate signature of output map function: 224 | try: 225 | validate_output_mapper_func( 226 | func=out_map_func, 227 | num_file_paths=len(out_map['files']), 228 | option_names=[i['name'] for i in out_map.get('options', [])], 229 | input_names=[i['name'] for i in out_map.get('inputs', [])], 230 | ) 231 | except TypeError as err: 232 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None 233 | 234 | if schema.is_func: 235 | 236 | func = task_func_map.get(key) 237 | if not func: 238 | reason = 'No function mapper function found. ' 239 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason) 240 | continue 241 | 242 | # Validate signature of func mapper function: 243 | try: 244 | validate_func_mapper_func(func, schema.input_aliases) 245 | except TypeError as err: 246 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None 247 | 248 | return schema_is_valid 249 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pylint 2 | ipykernel 3 | rope 4 | autopep8 5 | twine 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Pip installation script for `matflow`.""" 2 | 3 | import os 4 | import re 5 | from setuptools import find_packages, setup 6 | 7 | 8 | def get_version(): 9 | 10 | ver_file = 'matflow/_version.py' 11 | with open(ver_file) as handle: 12 | ver_str_line = handle.read() 13 | 14 | ver_pattern = r'^__version__ = [\'"]([^\'"]*)[\'"]' 15 | match = re.search(ver_pattern, ver_str_line, re.M) 16 | if match: 17 | ver_str = match.group(1) 18 | else: 19 | msg = 'Unable to find version string in "{}"'.format(ver_file) 20 | raise RuntimeError(msg) 21 | 22 | return ver_str 23 | 24 | 25 | def get_long_description(): 26 | 27 | readme_file = 'README.md' 28 | with open(readme_file, encoding='utf-8') as handle: 29 | contents = handle.read() 30 | 31 | return contents 32 | 33 | 34 | package_data = [ 35 | os.path.join(*os.path.join(root, f).split(os.path.sep)[1:]) 36 | for root, dirs, files in os.walk(os.path.join('matflow', 'data')) 37 | for f in files 38 | ] 39 | 40 | setup( 41 | name='matflow', 42 | version=get_version(), 43 | description=('Computational workflow management for materials science.'), 44 | long_description=get_long_description(), 45 | long_description_content_type='text/markdown', 46 | author='Adam J. Plowman', 47 | author_email='adam.plowman@manchester.ac.uk', 48 | packages=find_packages(), 49 | package_data={ 50 | 'matflow': package_data, 51 | }, 52 | install_requires=[ 53 | 'matflow-demo-extension', 54 | 'hpcflow>=0.1.16', 55 | 'click>7.0', 56 | 'hickle==4.0.4', 57 | 'h5py==2.10.0', 58 | 'numpy<1.24', 59 | 'sqlalchemy<2', 60 | 'ruamel.yaml==0.16.10', 61 | 'pyperclip', 62 | 'black', 63 | 'autopep8', 64 | ], 65 | project_urls={ 66 | 'Github': 'https://github.com/Lightform-group/matflow', 67 | }, 68 | classifiers=[ 69 | 'Development Status :: 3 - Alpha', 70 | 'Intended Audience :: Science/Research', 71 | 'Topic :: Scientific/Engineering', 72 | 'Programming Language :: Python :: 3.7', 73 | 'Programming Language :: Python :: 3.8', 74 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', 75 | 'Operating System :: OS Independent', 76 | ], 77 | entry_points=""" 78 | [console_scripts] 79 | matflow=matflow.cli:cli 80 | """ 81 | ) 82 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LightForm-group/matflow/4a40bd27a5c97778bdf902f1a7f47a882c5fb889/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_hicklable.py: -------------------------------------------------------------------------------- 1 | """Module containing unit tests on the `hicklable.to_hicklable` function.""" 2 | 3 | import unittest 4 | from tempfile import TemporaryFile 5 | 6 | import numpy as np 7 | import hickle 8 | 9 | from matflow.hicklable import to_hicklable 10 | 11 | 12 | class ConversionTestCase(unittest.TestCase): 13 | """Tests on `to_hicklable`.""" 14 | 15 | def test_built_ins(self): 16 | """Test expected output for some built-in types.""" 17 | 18 | obj = { 19 | 'a': 1, 20 | 'b': 2.0, 21 | 'c': [3, 4, 5.0], 22 | 'd': (6, 7, 8), 23 | 'e': {9, 10, 11}, 24 | 'f': {'f1': 1, 'f2': 2}, 25 | 'g': 'hello', 26 | } 27 | obj_expected = { 28 | 'a': 1, 29 | 'b': 2.0, 30 | 'c': [3, 4, 5.0], 31 | 'd': (6, 7, 8), 32 | 'e': {9, 10, 11}, 33 | 'f': {'f1': 1, 'f2': 2}, 34 | 'g': 'hello', 35 | } 36 | obj_valid = to_hicklable(obj) 37 | self.assertTrue(obj_valid == obj_expected) 38 | 39 | def test_arrays(self): 40 | """Test expected output for some arrays.""" 41 | 42 | obj = { 43 | 'int_array': np.array([1, 2, 3]), 44 | 'float_array': np.array([3.3, 2.5, -2.1]), 45 | 'bool_array': np.array([1, 0, 0, 1]).astype(bool), 46 | } 47 | obj_valid = to_hicklable(obj) 48 | self.assertTrue(obj_valid == obj) 49 | 50 | def test_object_dict(self): 51 | """Test expected output for an object with a __dict__ attribute.""" 52 | 53 | class myClassObject(object): 54 | def __init__(self, a=1): self.a = a 55 | 56 | my_class_obj = myClassObject(a=3.5) 57 | 58 | obj = {'my_class_obj': my_class_obj} 59 | expected_obj = {'my_class_obj': {'a': 3.5}} 60 | obj_valid = to_hicklable(obj) 61 | self.assertTrue(obj_valid == expected_obj) 62 | 63 | def test_object_slots(self): 64 | """Test expected output for an object with a __slots__ attribute.""" 65 | 66 | class myClassObject(object): 67 | __slots__ = ['a'] 68 | def __init__(self, a=1): self.a = a 69 | 70 | my_class_obj = myClassObject(a=3.5) 71 | 72 | obj = {'my_class_obj': my_class_obj} 73 | expected_obj = {'my_class_obj': {'a': 3.5}} 74 | obj_valid = to_hicklable(obj) 75 | self.assertTrue(obj_valid == expected_obj) 76 | 77 | def test_object_dict_slots(self): 78 | """Test expected output for an object with __dict__ and __slots__ attributes.""" 79 | 80 | class myClassObject(object): 81 | __slots__ = ['a', '__dict__'] 82 | def __init__(self, a=1): self.a = a 83 | 84 | my_class_obj = myClassObject(a=3.5) 85 | my_class_obj.b = 2 86 | 87 | obj = {'my_class_obj': my_class_obj} 88 | expected_obj = {'my_class_obj': {'a': 3.5, 'b': 2}} 89 | obj_valid = to_hicklable(obj) 90 | self.assertTrue(obj_valid == expected_obj) 91 | -------------------------------------------------------------------------------- /tests/test_task.py: -------------------------------------------------------------------------------- 1 | """Module containing unit tests on Task logic.""" 2 | 3 | import copy 4 | import unittest 5 | 6 | from matflow.models import TaskSchema 7 | from matflow.models.construction import normalise_local_inputs, get_local_inputs 8 | from matflow.errors import ( 9 | IncompatibleSequence, 10 | TaskSchemaError, 11 | TaskParameterError, 12 | SequenceError, 13 | ) 14 | 15 | # TODO: add test that warn is issued when an input is in base but also has a sequence. 16 | 17 | 18 | class TaskSchemaTestCase(unittest.TestCase): 19 | """Tests on TaskSchema""" 20 | 21 | def test_raise_on_input_is_output(self): 22 | with self.assertRaises(TaskSchemaError): 23 | TaskSchema('schema_1', inputs=['parameter_1'], outputs=['parameter_1']) 24 | 25 | def test_raise_on_input_map_bad_inputs(self): 26 | """Check inputs defined in the schema input map are in the schema inputs list.""" 27 | 28 | with self.assertRaises(TaskSchemaError): 29 | TaskSchema( 30 | 'schema_1', 31 | inputs=['parameter_7', 'parameter_9'], 32 | outputs=['parameter_8'], 33 | input_map=[ 34 | { 35 | 'inputs': [ 36 | # "parameter_10" is not in the inputs list. 37 | 'parameter_10', 38 | ], 39 | 'file': 'input_file_1', 40 | } 41 | ] 42 | ) 43 | 44 | def test_raise_on_output_map_bad_outputs(self): 45 | """Check outputs defined in the schema output map are in the schema outputs list.""" 46 | 47 | with self.assertRaises(TaskSchemaError): 48 | TaskSchema( 49 | 'schema_1', 50 | inputs=['parameter_7', 'parameter_9'], 51 | outputs=['parameter_8'], 52 | output_map=[ 53 | { 54 | 'files': [ 55 | 'output_file_1', 56 | ], 57 | # "parameter_10" is not in the outputs list. 58 | 'output': 'parameter_10', 59 | } 60 | ] 61 | ) 62 | 63 | 64 | class TaskParameterTestCase(unittest.TestCase): 65 | """Tests of correct behaviour when defining tasks.""" 66 | 67 | def test_raise_on_unknown_input(self): 68 | with self.assertRaises(TaskParameterError): 69 | schema = TaskSchema( 70 | 'schema_1', 71 | inputs=['parameter_1'], 72 | outputs=['parameter_2'], 73 | ) 74 | schema.check_surplus_inputs(['parameter_3']) 75 | 76 | def test_raise_on_missing_input(self): 77 | with self.assertRaises(TaskParameterError): 78 | schema = TaskSchema( 79 | 'schema1', 80 | inputs=['parameter_1', 'parameter_2'], 81 | outputs=['parameter_3'], 82 | ) 83 | schema.check_missing_inputs(['parameter_2']) 84 | 85 | 86 | class NormaliseLocalTestCase(unittest.TestCase): 87 | """Testing `normalise_local_inputs`.""" 88 | 89 | def test_raise_on_bad_nest_idx_float(self): 90 | """Check raises on non-integer (float) nest index for any sequence.""" 91 | sequences = [{'name': 'p1', 'nest_idx': 1.0, 'vals': [101, 102]}] 92 | with self.assertRaises(SequenceError): 93 | normalise_local_inputs(sequences=sequences) 94 | 95 | def test_raise_on_bad_nest_idx_string(self): 96 | """Check raises on non-integer (str) nest index for any sequence.""" 97 | sequences = [{'name': 'p1', 'nest_idx': '0', 'vals': [101, 102]}] 98 | with self.assertRaises(SequenceError): 99 | normalise_local_inputs(sequences=sequences) 100 | 101 | def test_raise_on_bad_nest_idx_list(self): 102 | """Check raises on non-integer (list) nest index for any sequence.""" 103 | sequences = [{'name': 'p1', 'nest_idx': [1, 0], 'vals': [101, 102]}] 104 | with self.assertRaises(SequenceError): 105 | normalise_local_inputs(sequences=sequences) 106 | 107 | 108 | class GetLocalInputsExceptionTestCase(unittest.TestCase): 109 | """Testing exceptions and warnings from `get_local_inputs`.""" 110 | 111 | def test_raise_on_missing_nest_idx(self): 112 | """Check raises when more than one sequence, but nest_idx is missing from any 113 | sequence.""" 114 | sequences = [ 115 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0}, 116 | {'name': 'p3', 'vals': [301, 302]}, 117 | ] 118 | with self.assertRaises(SequenceError): 119 | get_local_inputs([], sequences=sequences) 120 | 121 | def test_raise_on_bad_sequence_vals_type_str(self): 122 | """Test raises when sequence vals is a string.""" 123 | sequences = [{'name': 'p1', 'vals': '120'}] 124 | with self.assertRaises(SequenceError): 125 | get_local_inputs([], sequences=sequences) 126 | 127 | def test_raise_on_bad_sequence_vals_type_number(self): 128 | """Test raises when sequence vals is a number.""" 129 | sequences = [{'name': 'p1', 'vals': 120}] 130 | with self.assertRaises(SequenceError): 131 | get_local_inputs([], sequences=sequences) 132 | 133 | def test_raise_on_bad_sequences_type(self): 134 | """Test raises when sequences is not a list.""" 135 | sequences = {'name': 'p1', 'vals': [1, 2]} 136 | with self.assertRaises(SequenceError): 137 | get_local_inputs([], sequences=sequences) 138 | 139 | def test_warn_on_unrequired_nest_idx(self): 140 | """Test warning on unrequired nest idx.""" 141 | sequences = [{'name': 'p1', 'vals': [101, 102], 'nest_idx': 0}] 142 | with self.assertWarns(Warning): 143 | get_local_inputs([], sequences=sequences) 144 | 145 | def test_raise_on_bad_sequence_keys(self): 146 | """Test raises when a sequence has unknown keys.""" 147 | sequences = [{'name': 'p1', 'vals': [101, 102], 'bad_key': 4}] 148 | with self.assertRaises(SequenceError): 149 | get_local_inputs([], sequences=sequences) 150 | 151 | def test_raise_on_missing_sequence_keys(self): 152 | """Test raises when a sequence has missing keys.""" 153 | sequences = [{'vals': [101, 102]}] 154 | with self.assertRaises(SequenceError): 155 | get_local_inputs([], sequences=sequences) 156 | 157 | def test_raise_on_incompatible_nesting(self): 158 | """Test error raised on logically inconsistent Task sequence.""" 159 | sequences = [ 160 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102]}, 161 | {'name': 'p2', 'nest_idx': 0, 'vals': [201]}, 162 | ] 163 | with self.assertRaises(IncompatibleSequence): 164 | get_local_inputs([], sequences=sequences) 165 | 166 | 167 | class GetLocalInputsInputsTestCase(unittest.TestCase): 168 | """Tests on the `inputs` dict generated by `get_local_inputs`.""" 169 | 170 | def test_base_only(self): 171 | """Check expected output for no sequences.""" 172 | base = {'p1': 101} 173 | local_ins = get_local_inputs([], base=base)['inputs'] 174 | local_ins_exp = {'p1': {'vals': [101], 'vals_idx': [0]}} 175 | self.assertTrue(local_ins == local_ins_exp) 176 | 177 | def test_base_and_sequence(self): 178 | """Check expected output for base and one sequence.""" 179 | base = {'p1': 101} 180 | sequences = [{'name': 'p2', 'vals': [201, 202]}] 181 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs'] 182 | local_ins_exp = { 183 | 'p1': {'vals': [101], 'vals_idx': [0, 0]}, 184 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 1]}, 185 | } 186 | self.assertTrue(local_ins == local_ins_exp) 187 | 188 | def test_base_and_multi_nested_sequences(self): 189 | """Check expected output for base and two nested sequences.""" 190 | base = {'p1': 101} 191 | sequences = [ 192 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0}, 193 | {'name': 'p3', 'vals': [301, 302, 303], 'nest_idx': 1}, 194 | ] 195 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs'] 196 | local_ins_exp = { 197 | 'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]}, 198 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]}, 199 | 'p3': {'vals': [301, 302, 303], 'vals_idx': [0, 1, 2, 0, 1, 2]}, 200 | } 201 | self.assertTrue(local_ins == local_ins_exp) 202 | 203 | def test_base_and_multi_merged_sequences(self): 204 | """Check expected output for base and two merged sequences.""" 205 | base = {'p1': 101} 206 | sequences = [ 207 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0}, 208 | {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0}, 209 | ] 210 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs'] 211 | local_ins_exp = { 212 | 'p1': {'vals': [101], 'vals_idx': [0, 0]}, 213 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 1]}, 214 | 'p3': {'vals': [301, 302], 'vals_idx': [0, 1]}, 215 | } 216 | self.assertTrue(local_ins == local_ins_exp) 217 | 218 | def test_base_and_merged_and_nested_sequences(self): 219 | """Check expected output for base and two merged sequences.""" 220 | base = {'p1': 101} 221 | sequences = [ 222 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0}, 223 | {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0}, 224 | {'name': 'p4', 'vals': [401, 402, 403], 'nest_idx': 1}, 225 | ] 226 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs'] 227 | local_ins_exp = { 228 | 'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]}, 229 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]}, 230 | 'p3': {'vals': [301, 302], 'vals_idx': [0, 0, 0, 1, 1, 1]}, 231 | 'p4': {'vals': [401, 402, 403], 'vals_idx': [0, 1, 2, 0, 1, 2]}, 232 | } 233 | self.assertTrue(local_ins == local_ins_exp) 234 | 235 | def test_equivalent_relative_nesting_idx(self): 236 | """Check the actual value of `nest_idx` is inconsequential.""" 237 | sequences_1 = [ 238 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]}, 239 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]}, 240 | ] 241 | sequences_2 = copy.deepcopy(sequences_1) 242 | sequences_2[0]['nest_idx'] = 105 243 | sequences_2[1]['nest_idx'] = 2721 244 | 245 | local_ins_1 = get_local_inputs([], sequences=sequences_1)['inputs'] 246 | local_ins_2 = get_local_inputs([], sequences=sequences_2)['inputs'] 247 | 248 | self.assertTrue(local_ins_1 == local_ins_2) 249 | 250 | def test_correct_number_of_local_inputs_all_nesting(self): 251 | """Check the correct number of elements for a given input.""" 252 | sequences = [ 253 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]}, 254 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]}, 255 | ] 256 | local_ins = get_local_inputs([], sequences=sequences)['inputs'] 257 | self.assertTrue(len(local_ins['p1']['vals_idx']) == 6) 258 | 259 | def test_all_inputs_local_inputs_size(self): 260 | """Check all inputs have the same number of elements.""" 261 | sequences = [ 262 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]}, 263 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]}, 264 | ] 265 | local_ins = get_local_inputs([], sequences=sequences)['inputs'] 266 | self.assertTrue( 267 | len(local_ins['p1']['vals_idx']) == len(local_ins['p2']['vals_idx']) 268 | ) 269 | 270 | def test_correct_number_of_local_inputs_all_merge(self): 271 | """Check the correct number of local inputs for merging three sequences.""" 272 | sequences = [ 273 | {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]}, 274 | {'name': 'p2', 'nest_idx': 3, 'vals': [201, 202]}, 275 | {'name': 'p3', 'nest_idx': 3, 'vals': [301, 302]}, 276 | ] 277 | local_ins = get_local_inputs([], sequences=sequences)['inputs'] 278 | self.assertTrue( 279 | len(local_ins['p1']['vals_idx']) == 280 | len(local_ins['p2']['vals_idx']) == 281 | len(local_ins['p3']['vals_idx']) == 2 282 | ) 283 | 284 | def test_correct_number_of_local_inputs_one_merge(self): 285 | """Check the correct number of local inputs for merging/nesting three sequences.""" 286 | sequences = [ 287 | {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]}, 288 | {'name': 'p2', 'nest_idx': 4, 'vals': [201, 202]}, 289 | {'name': 'p3', 'nest_idx': 4, 'vals': [301, 302]}, 290 | ] 291 | local_ins = get_local_inputs([], sequences=sequences)['inputs'] 292 | self.assertTrue( 293 | len(local_ins['p1']['vals_idx']) == 294 | len(local_ins['p2']['vals_idx']) == 295 | len(local_ins['p3']['vals_idx']) == 4 296 | ) 297 | 298 | def test_base_is_merged_into_sequence(self): 299 | """Check the base dict is merged into a sequence.""" 300 | base = {'p1': 101} 301 | sequences = [{'name': 'p2', 'nest_idx': 0, 'vals': [201, 202]}] 302 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs'] 303 | self.assertTrue( 304 | local_ins['p1']['vals_idx'] == [0, 0] and 305 | local_ins['p2']['vals_idx'] == [0, 1] 306 | ) 307 | 308 | def test_unit_length_sequence(self): 309 | """Check specifying sequences of length one has the same effect as specifying the 310 | parameter in the base dict.""" 311 | base = {'p1': 101} 312 | sequences = [{'name': 'p1', 'nest_idx': 0, 'vals': [101]}] 313 | local_ins_1 = get_local_inputs([], sequences=sequences)['inputs'] 314 | local_ins_2 = get_local_inputs([], base=base)['inputs'] 315 | self.assertTrue(local_ins_1 == local_ins_2) 316 | 317 | 318 | class GetLocalInputsFullTestCase(unittest.TestCase): 319 | """Explicit checks on the full outputs of `get_local_inputs`.""" 320 | 321 | def full_test_1(self): 322 | pass 323 | -------------------------------------------------------------------------------- /tests/test_workflow.py: -------------------------------------------------------------------------------- 1 | """Module containing unit tests on Workflow initialisation.""" 2 | 3 | import unittest 4 | 5 | from matflow.errors import IncompatibleWorkflow 6 | from matflow.models import TaskSchema 7 | from matflow.models.construction import get_dependency_idx 8 | 9 | """ 10 | tests for inputs/outputs_idx: 11 | - for a variety of scenarios, check all parameters from the same task have the same number of elements_idx. 12 | - for a few scenarios, check expected elements_idx and task_idx. 13 | - check all keys of output (i.e. `task_idx`) are exactly the set of task_idx values in downstream + upstream tasks. 14 | - check works when no upstream tasks. 15 | 16 | tests for resolve_task_num_elements: 17 | - check works when no upstream tasks 18 | 19 | """ 20 | 21 | 22 | def init_schemas(task_lst): 23 | """Construct TaskSchema objects for TaskDependencyTestCase tests.""" 24 | for idx, i in enumerate(task_lst): 25 | task_lst[idx]['schema'] = TaskSchema(**i['schema']) 26 | return task_lst 27 | 28 | 29 | class TaskDependencyTestCase(unittest.TestCase): 30 | """Tests on `get_dependency_idx`""" 31 | 32 | def test_single_dependency(self): 33 | """Test correct dependency index for a single task dependency.""" 34 | task_lst = [ 35 | { 36 | 'context': '', 37 | 'schema': { 38 | 'name': 'one', 39 | 'inputs': [ 40 | {'name': 'p1', 'context': None}, 41 | {'name': 'p2', 'context': None}, 42 | ], 43 | 'outputs': ['p3'], 44 | }, 45 | }, 46 | { 47 | 'context': '', 48 | 'schema': { 49 | 'name': 'one', 50 | 'inputs': [ 51 | {'name': 'p3', 'context': None}, 52 | {'name': 'p4', 'context': None}, 53 | ], 54 | 'outputs': ['p5'], 55 | }, 56 | }, 57 | ] 58 | dep_idx = get_dependency_idx(init_schemas(task_lst)) 59 | dep_idx_exp = [[], [0]] 60 | self.assertTrue(dep_idx == dep_idx_exp) 61 | 62 | def test_single_dependency_two_contexts(self): 63 | """Test single dependencies for two parallel contexts.""" 64 | task_lst = [ 65 | { 66 | 'context': 'context_A', 67 | 'schema': { 68 | 'name': 'one', 69 | 'inputs': [ 70 | {'name': 'p1', 'context': None}, 71 | {'name': 'p2', 'context': None}, 72 | ], 73 | 'outputs': ['p3'], 74 | }, 75 | }, 76 | { 77 | 'context': 'context_A', 78 | 'schema': { 79 | 'name': 'one', 80 | 'inputs': [ 81 | {'name': 'p3', 'context': None}, 82 | {'name': 'p4', 'context': None}, 83 | ], 84 | 'outputs': ['p5'], 85 | }, 86 | }, 87 | { 88 | 'context': 'context_B', 89 | 'schema': { 90 | 'name': 'one', 91 | 'inputs': [ 92 | {'name': 'p1', 'context': None}, 93 | {'name': 'p2', 'context': None}, 94 | ], 95 | 'outputs': ['p3'], 96 | }, 97 | }, 98 | { 99 | 'context': 'context_B', 100 | 'schema': { 101 | 'name': 'one', 102 | 'inputs': [ 103 | {'name': 'p3', 'context': None}, 104 | {'name': 'p4', 'context': None}, 105 | ], 106 | 'outputs': ['p5'], 107 | }, 108 | }, 109 | ] 110 | dep_idx = get_dependency_idx(init_schemas(task_lst)) 111 | dep_idx_exp = [[], [0], [], [2]] 112 | self.assertTrue(dep_idx == dep_idx_exp) 113 | 114 | def test_two_dependencies(self): 115 | """Test where a task depends on two tasks.""" 116 | task_lst = [ 117 | { 118 | 'context': 'contextA', 119 | 'schema': { 120 | 'name': 'one', 121 | 'inputs': [ 122 | {'name': 'p1', 'context': None}, 123 | {'name': 'p2', 'context': None}, 124 | ], 125 | 'outputs': ['p3', 'p4'], 126 | }, 127 | }, 128 | { 129 | 'context': 'contextB', 130 | 'schema': { 131 | 'name': 'one', 132 | 'inputs': [ 133 | {'name': 'p1', 'context': None}, 134 | {'name': 'p2', 'context': None}, 135 | ], 136 | 'outputs': ['p3', 'p4'], 137 | }, 138 | }, 139 | { 140 | 'context': '', 141 | 'schema': { 142 | 'name': 'one', 143 | 'inputs': [ 144 | {'name': 'p3', 'context': 'contextA'}, 145 | {'name': 'p4', 'context': 'contextB'}, 146 | ], 147 | 'outputs': ['p5'], 148 | }, 149 | }, 150 | ] 151 | dep_idx = get_dependency_idx(init_schemas(task_lst)) 152 | dep_idx_exp = [[], [], [0, 1]] 153 | self.assertTrue(dep_idx == dep_idx_exp) 154 | 155 | def test_raise_on_output_non_exclusivity(self): 156 | """Test raises on multiple tasks that include the same output (and context).""" 157 | task_lst = [ 158 | { 159 | 'context': '', 160 | 'schema': { 161 | 'name': 'one', 162 | 'inputs': [ 163 | {'name': 'p1', 'context': None}, 164 | {'name': 'p2', 'context': None}, 165 | ], 166 | 'outputs': ['p3'], 167 | }, 168 | }, 169 | { 170 | 'context': '', 171 | 'schema': { 172 | 'name': 'two', 173 | 'inputs': [ 174 | {'name': 'p4', 'context': None}, 175 | ], 176 | 'outputs': ['p3'], 177 | }, 178 | }, 179 | ] 180 | with self.assertRaises(IncompatibleWorkflow): 181 | get_dependency_idx(init_schemas(task_lst)) 182 | 183 | def test_raise_on_circular_reference(self): 184 | """Test raises on circularly referential Tasks.""" 185 | task_lst = [ 186 | { 187 | 'context': '', 188 | 'schema': { 189 | 'name': 'one', 190 | 'inputs': [ 191 | {'name': 'p1', 'context': None}, 192 | ], 193 | 'outputs': ['p2'], 194 | }, 195 | }, 196 | { 197 | 'context': '', 198 | 'schema': { 199 | 'name': 'two', 200 | 'inputs': [ 201 | {'name': 'p2', 'context': None}, 202 | ], 203 | 'outputs': ['p1'], 204 | }, 205 | }, 206 | ] 207 | with self.assertRaises(IncompatibleWorkflow): 208 | get_dependency_idx(init_schemas(task_lst)) 209 | -------------------------------------------------------------------------------- /workflow_viz.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 54 | 56 | 57 | 59 | image/svg+xml 60 | 62 | 63 | 64 | 65 | 66 | 70 | 77 | 83 | 89 | 95 | 101 | 107 | 113 | 119 | 125 | 131 | 137 | 143 | 149 | 155 | 161 | 167 | 173 | 179 | 185 | 192 | 199 | 206 | 213 | 220 | 227 | Default group Group A Group B 272 | 277 | 282 | 287 | 292 | 297 | 304 | Task 1: Generate volume element 321 | 326 | 331 | 336 | 341 | 346 | 351 | 356 | 361 | 366 | 371 | 376 | 381 | 386 | 391 | 396 | 401 | 406 | group colours follow Plotly.qualitative.Vivid (11 colours, repeat if more than 11 groups in workflow):https://plotly.com/python/discrete-color/ group memeber colours follow Plotly.qualitative.Antique, then Bold, then Pastel etc 433 | 568 | 569 | --------------------------------------------------------------------------------