├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── element_idx.svg
├── environment.yml
├── matflow
    ├── __init__.py
    ├── _version.py
    ├── api.py
    ├── cli.py
    ├── config.py
    ├── errors.py
    ├── extensions.py
    ├── hicklable.py
    ├── models
    │   ├── __init__.py
    │   ├── command.py
    │   ├── construction.py
    │   ├── element.py
    │   ├── parameters.py
    │   ├── software.py
    │   ├── task.py
    │   └── workflow.py
    ├── profile.py
    ├── scripting.py
    ├── utils.py
    └── validation.py
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── test_element_idx.py
    ├── test_hicklable.py
    ├── test_task.py
    └── test_workflow.py
└── workflow_viz.svg


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # pipenv
 86 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 87 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 88 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
 89 | #   install all needed dependencies.
 90 | #Pipfile.lock
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # SageMath parsed files
 96 | *.sage.py
 97 | 
 98 | # Environments
 99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 | 
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 | 
111 | # Rope project settings
112 | .ropeproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 | 
122 | # Pyre type checker
123 | .pyre/
124 | 
125 | # VS Code
126 | /.vscode
127 | *.code-workspace
128 | 
129 | # Intellij IDEs
130 | /.idea
131 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | ## [0.2.27] - 2024.06.26
  4 | 
  5 | ### Fixed
  6 | 
  7 | - Fix dependencies
  8 | 
  9 | ## [0.2.26] - 2022.03.18
 10 | 
 11 | ### Fixed
 12 | 
 13 | - Use `traceback` module to print full exception from an output map that generates an exception.
 14 | - Bug in `scripting.get_snippet_signature` function that produces code with a syntax error.
 15 | 
 16 | ## [0.2.25] - 2021.12.20
 17 | 
 18 | ### Fixed
 19 | 
 20 | - Fix bug where preparation and processing run options were ignored on workflow load.
 21 | - Fix bug where archive options were ignored on workflow load.
 22 | 
 23 | ## [0.2.24] - 2021.10.06
 24 | 
 25 | ### Fixed
 26 | 
 27 | - Fix bug introduced in 0.2.23, where default preparation/processing run options were ignored.
 28 | 
 29 | ## [0.2.23] - 2021.10.06
 30 | 
 31 | ### Fixed
 32 | 
 33 | - Fix inability to override default (preparation/processing) run options with an empty dict
 34 | 
 35 | ## [0.2.22] - 2021.08.14
 36 | 
 37 | ### Added
 38 | 
 39 | - Add support for multiple archives. Fix [#72](https://github.com/LightForm-group/matflow/issues/72).
 40 | 
 41 | ### Fixed
 42 | 
 43 | - Fix error message if an input mapper function has an unknown argument.
 44 | - Catch and print error message from output map function failure.
 45 | - Fix incorrect import key when importing from a non-trivial context that is not defined in the schema.
 46 | 
 47 | ## [0.2.21] - 2021.06.06
 48 | 
 49 | ### Added
 50 | 
 51 | - Allow passing a subset of the task input parameters to the output mapper function. Resolve [#102](https://github.com/LightForm-group/matflow/issues/102).
 52 | - Allow passing all iterations of an input parameter to a function mapper. Resolve [#104](https://github.com/LightForm-group/matflow/issues/104).
 53 | - Allow running an on-demand archive to an existing/completed workflow: `matflow archive path/to/workflow/directory ARCHIVE_NAME`. Resolve [#68](https://github.com/LightForm-group/matflow/issues/68).
 54 | - Allow specifying `default_metadata` in the `config.yml` file. Keys are merged with `metadata` specified in the workflow spec file. Resolve [#98](https://github.com/LightForm-group/matflow/issues/98).
 55 | 
 56 | ### Fixed
 57 | 
 58 | - Save element resource usage (e.g. run time). Fix [#97](https://github.com/LightForm-group/matflow/issues/97).
 59 | - Fix bug when determining the "producing task" in an iteration pathway. Fix [#105](https://github.com/LightForm-group/matflow/issues/105).
 60 | - Fix bug when a file input parameter is specified with a `$HOME` tilde: `~/path/to/file`.
 61 | 
 62 | ## [0.2.20] - 2021.05.12
 63 | 
 64 | ### Added
 65 | 
 66 | - Add `Task.cleanup` attribute that can be used to optionally specify a list of glob patterns, representing file names to remove at the end of `Workflow.process_task_element`. Useful for removing very large simulation outputs that are not required after MatFlow has extracted the requested data.
 67 | - Add methods to `Element` object: `get_file_lines` and `print_file_lines`, which take a file name and a slice of lines to get or print.
 68 | 
 69 | ### Changed
 70 | 
 71 | - Change working directory to element directory for invoking input/output/function mapper functions. This is required in some cases where a tool or script does not accept a file path as an argument.
 72 | - Allow specifying the `task_idx` directly when importing parameters. This overrides any specified `context`.
 73 | 
 74 | ### Fixed
 75 | 
 76 | - Catch `ImportError` and `SyntaxError` when trying to load extensions.
 77 | - Import from the highest task index when importing a parameter that has been through a parameter-modifying task - fix [#103](https://github.com/LightForm-group/matflow/issues/103). The can be overrode by specifying a `task_idx` directly.
 78 | 
 79 | ## [0.2.19] - 2021.04.12 (April 2021 - Fix 1)
 80 | 
 81 | ### Fixed
 82 | 
 83 | - Fix type problem when input schema keys are specified "inline" in the task schema (e.g. as `CRC_file_path[file=True,save=False]`), in which the keys remain as type `str`, when they should be `bool`.
 84 | - Fix problem when an imported parameter is used in a task that is iterated.
 85 | 
 86 | ## [0.2.18] - 2021.04.10 (April 2021)
 87 | 
 88 | ### Fixed
 89 | 
 90 | - Fix misleading error message when a task parameter specified as a file path does not actually exist as a file.
 91 | - Fix bug where if all possible dependency pathways are circularly dependent, this is not caught by MatFlow. Fix [#88](https://github.com/LightForm-group/matflow/issues/88).
 92 | - Fix issue with accessing parameter data with dot-notation via their "safe names". Fix [#87](https://github.com/LightForm-group/matflow/issues/87).
 93 | 
 94 | ### Added
 95 | 
 96 | - Add new parameter key `ignore_dependency_from`, which is a list of task names. This allows us to exclude tasks when considering the dependencies of this parameter. Fix [#89](https://github.com/LightForm-group/matflow/issues/89).
 97 | - Allow embedding file-path inputs (inputs that are text files) into the HDF5 file. Fix [#86](https://github.com/LightForm-group/matflow/issues/86).
 98 | - Add `Task.unique_name` property which adds on the non-trivial `Task.context` to `Task.name`.
 99 | - Tasks can be accessed from the task list via dot-notation. Fix [#90](https://github.com/LightForm-group/matflow/issues/90).
100 | - Add `Task.elements_idx` property to retrieve to correct `elements_idx` dict for that task.
101 | - Add new exception type: `ParameterImportError`.
102 | - Add ability to import parameters from existing workflows. Fix [#30](https://github.com/LightForm-group/matflow/issues/30)
103 | 
104 | ### Changed
105 | 
106 | - Non-trivial task contexts are now part of the task directory name to help distinguish task directories where multiple contexts are used. Fix [#50](https://github.com/LightForm-group/matflow/issues/50).
107 | - Add `context` argument to `Workflow.get_input_tasks` and `Workflow.get_output_tasks`.
108 | 
109 | ## [0.2.17] - 2021.02.15
110 | 
111 | ### Fixed
112 | 
113 | - Fix issue [#82](https://github.com/LightForm-group/matflow/issues/82) where the default group is not defined in the `Workflow.element_idx` for tasks where no local inputs are defined.
114 | 
115 | ### Added
116 | 
117 | - Add support for flexible positioning of parameter-modifying tasks ([#81](https://github.com/LightForm-group/matflow/issues/81))
118 | 
119 | ## [0.2.16] - 2021.02.05
120 | 
121 | ### Fixed
122 | 
123 | - Bump hpcflow to v0.1.13 to fix #80 and then to v0.1.14 to fix a database locking issue and a bug with choosing the correct working directories.
124 | 
125 | ## [0.2.15] - 2021.01.18
126 | 
127 | ### Changed
128 | 
129 | - Change an Exception to a warning in `Workflow.get_element_data` to allow manually deleting element data without corrupting.
130 | 
131 | ## [0.2.14] - 2021.01.17
132 | 
133 | ### Added
134 | 
135 | - Add method `Task.get_elements_from_iteration(iteration_idx)`.
136 | 
137 | ## [0.2.13] - 2020.12.17
138 | 
139 | ### Fixed 
140 | 
141 | - Fix bug when populating `Workflow.elements_idx` for more than two iterations.
142 | 
143 | ## [0.2.12] - 2020.12.16
144 | 
145 | ### Added
146 | 
147 | - Add `Workflow.figures` attribute for storing associated figure definitions.
148 | - Add `Workflow.metadata` attribute for storing arbitrary metadata (will later be used for Zenodo archiving).
149 | - Add various `Workflow` static methods to help with retrieving information in the viewer without loading the whole workflow via `hickle`.
150 | - Add `get_task_schemas` to API to load the available task schemas without generating a workflow.
151 | - Add `refresh` bool parameter to `Config.set_config`, to force a reload of the configuration.
152 | - Support inputs as dependencies as well as outputs.
153 | - Support "parameter modifying" tasks (a task which outputs a parameter that is also an input to that task).
154 | - Add `iterate_run_options` to Workflow.
155 | - Add new methods for finding dependent and dependency tasks/parameters, upstream/downstream parameter values associated with a given element.
156 | - Add input option: `include_all_iterations`. If True, inputs from all iterations are passed to input map functions.
157 | 
158 | ### Fixed
159 | 
160 | - Only save input/output map files if they exist!
161 | - Fix bug in propagating groups correctly
162 | - Various code formatting issues
163 | - Fix failure to raise on invalid schemas.
164 | - Fix bug when the same file is to be saved from multiple output maps.
165 | 
166 | ### Changed
167 | - Redo task sorting algorithm such that minimal ordering changes are made.
168 | - Set `stats` bool to False by default.
169 | - Bump hpcflow version to v0.1.12.
170 | 
171 | ## [0.2.11] - 2020.09.29
172 | 
173 | ### Fixed
174 | 
175 | - Resolve `~` in task schema and software file paths specified in the configuration file.
176 | 
177 | ## [0.2.10] - 2020.09.29
178 | 
179 | ### Fixed 
180 | 
181 | - Fix if a function mapper function does not return anything.
182 | 
183 | ## [0.2.9] - 2020.09.17
184 | 
185 | ### Added
186 | 
187 | - Add scripting module for generating Python source scripts.
188 | - Default run options can be specified in the MatFlow configuration file for task, preparation and processing jobs using both "sticky" and "non-sticky" keys: `default_run_options`, `default_sticky_run_options`, `default_preparation_run_options`, `default_sticky_preparation_run_options`, `default_processing_run_options` and `default_sticky_processing_run_options`. The "sticky" defaults are always applied (but workflow-specified run options take precedence), whereas the "non-sticky" defaults are only applied if a task has no workflow-specified run options.
189 | 
190 | ## [0.2.8] - 2020.09.01
191 | 
192 | ### Changed
193 | - Add `version_info` to `Software.__repr__` method
194 | - Validate source maps after missing schema check
195 | 
196 | ### Fixed 
197 | - Remove vestigial and buggy line in `construction.get_element_idx` which would lead to enormous memory usage for large sequences.
198 | 
199 | ## [0.2.7] - 2020.08.18
200 | 
201 | ### Added
202 | - Default values can be specified for output map options within the schema
203 | - Default values can be specified for task input parameters within the schema
204 | - Depending on the inputs defined, different commands can be run, via "command pathway" definitions in the schema implementations.
205 | 
206 | ### Changed
207 | 
208 | - Uses `hickle` version 4.
209 | - Group structure in workflow HDF5 file has changed (backwards-incompatible); element data is more conveniently organised for inspecting the HDF5 file manually.
210 | 
211 | ### Fixed
212 | 
213 | - Fix problem when a task input key includes slashes.
214 | 
215 | ## [0.2.6] - 2020.07.08
216 | 
217 | ### Added
218 | 
219 | - Add alternate scratch feature to allow a given task to be executed within a separate temporary directory.
220 | 
221 | ### Fixed
222 | 
223 | - Fix bug if specifying `merge_priority` on the default group.
224 | 
225 | ### Changed
226 | 
227 | - Bump hpcflow to v0.1.10
228 | 
229 | ## [0.2.5] - 2020.06.27
230 | 
231 | ### Fixed
232 | 
233 | - Fix copying of profile file to the workflow directory when the profile file path is not in the current working directory.
234 | 
235 | ## [0.2.4] - 2020.06.26
236 | 
237 | ### Changed
238 | 
239 | - Fix dependency `hickle` version for now, until we can assess requirements for jumping to version 4.
240 | 
241 | ## [0.2.3] - 2020.06.26
242 | 
243 | ### Changed
244 | 
245 | - Files generated by input maps are only saved into the workflow file if explicitly requested with `save: true`.
246 | 
247 | ### Fixed
248 | 
249 | - Fix bug in `SourcesPreparation.get_formatted_commands` that appears if there are no commands.
250 | 
251 | ## [0.2.2] - 2020.06.09
252 | 
253 | ### Changed
254 | 
255 | - Improved Dropbox authorization flow. 
256 | - Bump hpcflow to v0.1.9
257 | 
258 | ## [0.2.1] - 2020.06.09
259 | 
260 | ### Fixed
261 | 
262 | - Fix bug in reading `default_preparation_run_options` and `default_processing_run_options` dicts from the config file.
263 | 
264 | ## [0.2.0] - 2020.06.09
265 | 
266 | ### Added
267 | 
268 | - Add a `Workflow.history` attribute that tracks when the workflow was modified. It also stores pertinent software versions.
269 | - Add a CLI command `matflow validate` that runs through the task schema and extension validation.
270 | - Add a CLI command `matflow kill`, which kills all executing and pending tasks.
271 | - Added configuration option `prepare_process_scheduler_options` to specify scheduler options for the prepare and process tasks.
272 | - matflow profile is stored as a `dict` in addition to a string representation of the profile file (both in the `Workflow.profile` attribute).
273 | 
274 | ### Changed
275 | 
276 | - Module and function `jsonable.py` and `to_jsonable` renamed to `hicklable.py` and `to_hicklable`.
277 | - Workflow and Task attributes in the workflow HDF5 file are now represented without leading underscores.
278 | - Tasks with only a single element use the task directory directly instead of using an element sub-directory.
279 | - Loading extensions and configuration files has been moved from the root `__init__` to separate modules.
280 | - `make_workflow`, `submit_workflow`, `load_workflow`, `append_schema_source`, `prepend_schema_source` and `validate` can now be imported from the root level: `from matflow import make_workflow` etc.
281 | - There are no longer unsightly global variables for `TASK_INPUT_MAP` etc. This functionality has been subsumed into the global `Config` class. This is tidier and provides a better place for some validation.
282 | - Software key `sources` has been replaced by `environment`.
283 | - hpcflow configuration directory is generated within the matflow configuration directory.
284 | - Jobscript names refer to the task to which they prepare/execute/process
285 | - hpcflow profile is passed as a `dict` to hpcflow. For information, the hpcflow profile is still dumped to a file.
286 | 
287 | ## [0.1.3] - 2020.05.27
288 | 
289 | - New release for Zenodo archive.
290 | 
291 | ## [0.1.2] - 2020.05.12
292 | 
293 | - Latest dev branch merged...
294 | 
295 | ## [0.1.1] - 2020.05.07
296 | 
297 | ### Fixed
298 | 
299 | - Added missing dependency.
300 | 
301 | ## [0.1.0] - 2020.05.07
302 | 
303 | Initial release.
304 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![DOI](https://zenodo.org/badge/219949875.svg)](https://zenodo.org/badge/latestdoi/219949875) [![PyPI version](https://badge.fury.io/py/matflow.svg)](https://badge.fury.io/py/matflow)
 2 | 
 3 | ## **This code has been superseded by a new version that can be found here: https://github.com/hpcflow/matflow-new.**
 4 | 
 5 | # MatFlow
 6 | 
 7 | MatFlow is a framework for running reproducible workflows in materials science developed in the EPSRC programme grant [LightForm](http://lightform.org.uk), a research programme on light alloy formability. It is a python program that interacts with software (open-source and proprietary) used in materials science via extensions (see supported extensions below). It is particularly suited for hybrid workflows
 8 | (involving experimental data and computational work), like for example HPC model calibration. Outputs,together with details of the worflow are automatically stored in an open source file format for post-processing, which Matflow can automatically upload to data repositories like [Zenodo](https://zenodo.org/).
 9 | 
10 | See [this repository](https://github.com/LightForm-group/UoM-CSF-matflow) for information regarding a MatFlow installation.
11 | 
12 | ## Extensions
13 | 
14 | MatFlow uses extension packages to interact with arbitrary software. Here is a list of current MatFlow extensions.
15 | 
16 | ### Released/in-progress extensions
17 | | Software | Description | Status | Version |
18 | | ------ | ------------- | ------- | ------- |
19 | | [DAMASK](https://damask.mpie.de/) | Düsseldorf Advanced Material Simulation Kit (crystal plasticity) | [Released](https://github.com/LightForm-group/matflow-damask) | [![PyPI version](https://img.shields.io/pypi/v/matflow-damask)](https://pypi.org/project/matflow-damask) |
20 | | [MTEX](https://mtex-toolbox.github.io/) | Matlab toolbox for analyzing and modeling crystallographic textures | [Released](https://github.com/LightForm-group/matflow-mtex) | [![PyPI version](https://img.shields.io/pypi/v/matflow-mtex)](https://pypi.org/project/matflow-mtex) |
21 | | [formable](https://github.com/LightForm-group/formable) | Formability analyses in Python | [Released](https://github.com/LightForm-group/matflow-formable) | [![PyPI version](https://img.shields.io/pypi/v/matflow-formable)](https://pypi.org/project/matflow-formable) |
22 | | [DefDAP](https://github.com/MechMicroMan/DefDAP) | A python library for correlating EBSD and HRDIC data. | [Released](https://github.com/LightForm-group/matflow-defdap) | [![PyPI version](https://img.shields.io/pypi/v/matflow-defdap)](https://pypi.org/project/matflow-defdap) |
23 | | [Abaqus](https://www.3ds.com/products-services/simulia/products/abaqus/) | Finite element analysis | In-progress | [![PyPI version](https://img.shields.io/pypi/v/matflow-abaqus)](https://pypi.org/project/matflow-abaqus) |
24 | | [Neper](http://www.neper.info) | Polycrystal generation and meshing | [Released/In-progress](https://github.com/LightForm-group/matflow-neper) | [![PyPI version](https://img.shields.io/pypi/v/matflow-neper)](https://pypi.org/project/matflow-neper) |
25 | 
26 | 
27 | ### Example inputs/outputs 
28 | | Label                   | Attributes                                                   | Output from tasks                         | Input to tasks                                               |
29 | | ----------------------- | ------------------------------------------------------------ | ----------------------------------------- | ------------------------------------------------------------ |
30 | | ODF                     | crystal_symmetry<br />speciment_symmetry<br />euler_angles<br />euler_angle_labels<br />weights<br />orientation_coordinate_system | get_model_texture<br />estimate_ODF<br /> | sample_texture                                               |
31 | | microstructure_seeds    | position<br />**orientations**<br />grid_size<br />phase_label | generate_microstructure_seeds             | generate_volume_element                                      |
32 | | orientations            | euler_angles<br />euler_angle_labels<br />orientation_coordinate_system | sample_texture                            | generate_volume_element                                      |
33 | | volume_element          | grid<br />size<br />origin<br />**orientations**<br />grain_orientation_idx<br />grain_phase_label_idx<br />phase_labels<br />voxel_grain_idx<br />voxel_homogenization_idx | generate_volume_element                   | visualise_volume_element<br />simulate_volume_element_loading |
34 | | load_case               | total_time<br />num_increments<br />def_grad_aim<br />def_grad_rate<br />stress<br />rotation | generate_load_case                        | simulate_volume_element_loading                              |
35 | | volume_element_response | ...                                                          | simulate_volume_element_loading           |                                                              |
36 | 
37 | ## Specifying default run options
38 | 
39 | Default run options (i.e. options passed to the scheduler) can be specified in a few ways. Firstly, within the workflow file, `run_options` specified at the top-level will be used for any tasks that do not have a `run_options` specified. If a task *does* have a `run_options` key specified, the global `run_options` will not be used at all for that task.
40 | 
41 | Additionally, you can specify default run options in the MatFlow configuration file (`config.yml`, by default generated in `~/.matflow`) with the options `default_run_options` and `default_sticky_run_options`. The "sticky" defaults are merged with any run options specified in the workflow file (with workflow-specified options taking precedence), whereas the "non-sticky" defaults are only used if no run options are supplied for a task. If no run options are supplied for a task, then both the "sticky" and "non-sticky" defaults will be used (with the "non-sticky" defaults taking precedence over the "sticky" defaults). Similar keys exist for task preparation and processing run options: `default_preparation_run_options`, `default_sticky_preparation_run_options` and `default_processing_run_options`, `default_sticky_processing_run_options`.
42 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: matflow_env
 2 | dependencies:
 3 | - python
 4 | - pip
 5 | - pylint
 6 | - ipykernel
 7 | - rope
 8 | - autopep8
 9 | - twine
10 | 


--------------------------------------------------------------------------------
/matflow/__init__.py:
--------------------------------------------------------------------------------
 1 | """`matflow.__init__.py`"""
 2 | 
 3 | from matflow._version import __version__
 4 | from matflow.api import (
 5 |     make_workflow,
 6 |     submit_workflow,
 7 |     load_workflow,
 8 |     append_schema_source,
 9 |     prepend_schema_source,
10 |     validate,
11 |     get_task_schemas,
12 | )
13 | 


--------------------------------------------------------------------------------
/matflow/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.27"
2 | 


--------------------------------------------------------------------------------
/matflow/api.py:
--------------------------------------------------------------------------------
  1 | """`matflow.api.py`
  2 | 
  3 | This module contains the application programming interface (API) to `matflow`,
  4 | and includes functions that are called by the command line interface (CLI; in
  5 | `matflow.cli.py`).
  6 | 
  7 | """
  8 | 
  9 | import copy
 10 | from pathlib import Path
 11 | 
 12 | import pyperclip
 13 | from hpcflow import kill as hpcflow_kill
 14 | from hpcflow import cloud_connect as hpcflow_cloud_connect
 15 | 
 16 | from matflow.config import Config
 17 | from matflow.extensions import load_extensions
 18 | from matflow.profile import parse_workflow_profile
 19 | from matflow.models.workflow import Workflow
 20 | 
 21 | 
 22 | def make_workflow(profile_path, directory=None, write_dirs=True):
 23 |     """Generate a new Workflow from a profile file.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     profile : str or Path
 28 |         Path to the profile file.
 29 |     directory : str or Path, optional
 30 |         The directory in which the Workflow will be generated. By default, this
 31 |         is the working (i.e. invoking) directory.    
 32 | 
 33 |     Returns
 34 |     -------
 35 |     workflow : Workflow
 36 | 
 37 |     """
 38 | 
 39 |     load_extensions()
 40 | 
 41 |     profile_path = Path(profile_path)
 42 |     workflow_dict = parse_workflow_profile(profile_path)
 43 | 
 44 |     with profile_path.open('r') as handle:
 45 |         profile_str = handle.read()
 46 | 
 47 |     profile = {'file': profile_str, 'parsed': copy.deepcopy(workflow_dict)}
 48 | 
 49 |     iterate_run_opts = {
 50 |         **Config.get('default_sticky_iterate_run_options'),
 51 |         **Config.get('default_iterate_run_options'),
 52 |     }
 53 |     workflow_dict.update({'iterate_run_options': iterate_run_opts})
 54 | 
 55 |     workflow = Workflow(**workflow_dict, stage_directory=directory, profile=profile)
 56 |     workflow.set_ids()
 57 | 
 58 |     if write_dirs:
 59 |         workflow.write_HDF5_file()
 60 |         workflow.write_directories()
 61 |         workflow.prepare_iteration(iteration_idx=0)
 62 |         workflow.dump_hpcflow_workflow_file('hpcflow_workflow.yml')
 63 | 
 64 |         # Copy profile to workflow directory:
 65 |         workflow.path.joinpath(profile_path.name).write_bytes(profile_path.read_bytes())
 66 | 
 67 |         # Copy workflow human_id to clipboard, if supported:
 68 |         try:
 69 |             pyperclip.copy(workflow.human_id)
 70 |         except:
 71 |             pass
 72 | 
 73 |     return workflow
 74 | 
 75 | 
 76 | def submit_workflow(workflow_path, directory=None):
 77 |     """Generate and submit a new workflow from a profile file.    
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     workflow_path : str or Path
 82 |         Path to either a profile file or a workflow project directory that contains a 
 83 |         previously generated workflow HDF5 file.
 84 |     directory : str or Path, optional
 85 |         Applicable if `workflow_path` points to a profile file. The directory in which the
 86 |         Workflow will be generated. By default, this is the working (i.e. invoking)
 87 |         directory.
 88 | 
 89 |     Returns
 90 |     -------
 91 |     None
 92 | 
 93 |     """
 94 | 
 95 |     if Path(workflow_path).is_file():
 96 |         workflow = make_workflow(workflow_path, directory=directory, write_dirs=True)
 97 |     else:
 98 |         load_extensions()
 99 |         workflow = load_workflow(workflow_path)
100 | 
101 |     workflow.submit()
102 | 
103 | 
104 | def load_workflow(directory, full_path=False):
105 |     Config.set_config()
106 |     path = Path(directory or '').resolve()
107 |     workflow = Workflow.load_HDF5_file(path, full_path)
108 | 
109 |     return workflow
110 | 
111 | 
112 | def prepare_task(task_idx, iteration_idx, directory, is_array=False):
113 |     """Prepare a task (iteration) for execution by setting inputs and running input
114 |     maps."""
115 | 
116 |     load_extensions()
117 |     workflow = load_workflow(directory)
118 |     workflow.prepare_task(task_idx, iteration_idx, is_array=is_array)
119 | 
120 | 
121 | def prepare_task_element(task_idx, element_idx, directory, is_array=False):
122 |     """Prepare a task element for execution by setting inputs and running input maps."""
123 |     load_extensions()
124 |     workflow = load_workflow(directory)
125 |     workflow.prepare_task_element(task_idx, element_idx, is_array=is_array)
126 | 
127 | 
128 | def process_task(task_idx, iteration_idx, directory, is_array=False):
129 |     """Process a completed task (iteration) by running the output map."""
130 |     load_extensions()
131 |     workflow = load_workflow(directory)
132 |     workflow.process_task(task_idx, iteration_idx, is_array=is_array)
133 | 
134 | 
135 | def process_task_element(task_idx, element_idx, directory, is_array=False):
136 |     """Process a task element for execution by running output maps and saving outputs."""
137 |     load_extensions()
138 |     workflow = load_workflow(directory)
139 |     workflow.process_task_element(task_idx, element_idx, is_array=is_array)
140 | 
141 | 
142 | def run_python_task(task_idx, element_idx, directory):
143 |     """Run a (commandless) Python task."""
144 |     load_extensions()
145 |     workflow = load_workflow(directory)
146 |     workflow.run_python_task(task_idx, element_idx)
147 | 
148 | 
149 | def prepare_sources(task_idx, iteration_idx, directory):
150 |     """Prepare source files."""
151 |     load_extensions()
152 |     workflow = load_workflow(directory)
153 |     workflow.prepare_sources(task_idx, iteration_idx)
154 | 
155 | 
156 | def append_schema_source(schema_source_path):
157 |     """Add a task schema source file to the end of the schema source list."""
158 |     Config.append_schema_source(schema_source_path)
159 | 
160 | 
161 | def prepend_schema_source(schema_source_path):
162 |     """Add a task schema source file to the front of the schema source list."""
163 |     Config.prepend_schema_source(schema_source_path)
164 | 
165 | 
166 | def validate():
167 |     load_extensions()
168 | 
169 | 
170 | def kill(directory):
171 |     Config.set_config()
172 |     hpcflow_kill(dir_path=directory, config_dir=Config.get('hpcflow_config_dir'))
173 | 
174 | 
175 | def cloud_connect(provider):
176 |     Config.set_config()
177 |     hpcflow_cloud_connect(provider, config_dir=Config.get('hpcflow_config_dir'))
178 | 
179 | 
180 | def write_element_directories(iteration_idx, directory):
181 |     'Generate element directories for a given iteration.'
182 |     load_extensions()
183 |     workflow = load_workflow(directory)
184 |     if workflow.iterate:
185 |         num_iters = workflow.iterate['num_iterations']
186 |     else:
187 |         num_iters = workflow.num_iterations
188 |     if iteration_idx < num_iters:
189 |         workflow.write_element_directories(iteration_idx)
190 |         workflow.prepare_iteration(iteration_idx)
191 | 
192 | 
193 | def archive(directory, archive):
194 |     """Perform an on-demand archive of an existing workflow."""
195 |     workflow = load_workflow(directory)
196 |     workflow.do_archive(archive)
197 | 
198 | 
199 | def get_task_schemas():
200 |     Config.set_config()
201 |     return Config.get('task_schemas')
202 | 


--------------------------------------------------------------------------------
/matflow/cli.py:
--------------------------------------------------------------------------------
  1 | """`matflow.cli.py`
  2 | 
  3 | Module that exposes a command line interface for `matflow`.
  4 | 
  5 | """
  6 | import click
  7 | 
  8 | from matflow import __version__
  9 | from matflow import api
 10 | 
 11 | 
 12 | @click.group()
 13 | @click.version_option(version=__version__)
 14 | def cli():
 15 |     pass
 16 | 
 17 | 
 18 | @cli.command()
 19 | @click.option('--directory', '-d')
 20 | @click.argument('profile', type=click.Path(exists=True))
 21 | def make(profile, directory=None):
 22 |     """Generate a new Workflow."""
 23 |     print('matflow.cli.make', flush=True)
 24 |     api.make_workflow(profile_path=profile, directory=directory)
 25 | 
 26 | 
 27 | @cli.command()
 28 | @click.option('--directory', '-d')
 29 | @click.argument('workflow_path', type=click.Path(exists=True))
 30 | def go(workflow_path, directory=None):
 31 |     """Generate and submit a new Workflow."""
 32 |     print('matflow.cli.go', flush=True)
 33 |     api.submit_workflow(workflow_path, directory=directory)
 34 | 
 35 | 
 36 | @cli.command()
 37 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 38 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
 39 | @click.option('--directory', '-d', type=click.Path(exists=True))
 40 | @click.option('--array', is_flag=True)
 41 | def prepare_task(task_idx, iteration_idx, directory=None, array=False):
 42 |     print('matflow.cli.prepare_task', flush=True)
 43 |     api.prepare_task(task_idx, iteration_idx, directory, is_array=array)
 44 | 
 45 | 
 46 | @cli.command()
 47 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 48 | @click.option('--element-idx', '-e', type=click.INT, required=True)
 49 | @click.option('--directory', '-d', type=click.Path(exists=True))
 50 | @click.option('--array', is_flag=True)
 51 | def prepare_task_element(task_idx, element_idx, directory=None, array=False):
 52 |     print('matflow.cli.prepare_task_element', flush=True)
 53 |     api.prepare_task_element(task_idx, element_idx, directory, is_array=array)
 54 | 
 55 | 
 56 | @cli.command()
 57 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 58 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
 59 | @click.option('--directory', '-d', type=click.Path(exists=True))
 60 | @click.option('--array', is_flag=True)
 61 | def process_task(task_idx, iteration_idx, directory=None, array=False):
 62 |     print('matflow.cli.process_task', flush=True)
 63 |     api.process_task(task_idx, iteration_idx, directory, is_array=array)
 64 | 
 65 | 
 66 | @cli.command()
 67 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 68 | @click.option('--element-idx', '-e', type=click.INT, required=True)
 69 | @click.option('--directory', '-d', type=click.Path(exists=True))
 70 | @click.option('--array', is_flag=True)
 71 | def process_task_element(task_idx, element_idx, directory=None, array=False):
 72 |     print('matflow.cli.process_task_element', flush=True)
 73 |     api.process_task_element(task_idx, element_idx, directory, is_array=array)
 74 | 
 75 | 
 76 | @cli.command()
 77 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 78 | @click.option('--element-idx', '-e', type=click.INT, required=True)
 79 | @click.option('--directory', '-d', type=click.Path(exists=True))
 80 | def run_python_task(task_idx, element_idx, directory=None):
 81 |     print('matflow.cli.run_python_task', flush=True)
 82 |     api.run_python_task(task_idx, element_idx, directory)
 83 | 
 84 | 
 85 | @cli.command()
 86 | @click.option('--task-idx', '-t', type=click.INT, required=True)
 87 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
 88 | @click.option('--directory', '-d', type=click.Path(exists=True))
 89 | def prepare_sources(task_idx, iteration_idx, directory=None):
 90 |     print('matflow.cli.prepare_sources', flush=True)
 91 |     api.prepare_sources(task_idx, iteration_idx, directory)
 92 | 
 93 | 
 94 | @cli.command()
 95 | @click.argument('schema_source_path', type=click.Path(exists=True))
 96 | def append_schema_source(schema_source_path):
 97 |     api.append_schema_source(schema_source_path)
 98 | 
 99 | 
100 | @cli.command()
101 | @click.argument('schema_source_path', type=click.Path(exists=True))
102 | def prepend_schema_source(schema_source_path):
103 |     api.prepend_schema_source(schema_source_path)
104 | 
105 | 
106 | @cli.command()
107 | def validate():
108 |     """Load and validate task schemas against available extensions."""
109 |     api.validate()
110 | 
111 | 
112 | @cli.command()
113 | @click.option('--provider', '-p', required=True)
114 | def cloud_connect(provider):
115 |     api.cloud_connect(provider)
116 | 
117 | 
118 | @cli.command()
119 | @click.argument('directory', type=click.Path(exists=True))
120 | def kill(directory):
121 |     """Kill all pending and executing tasks."""
122 |     api.kill(directory)
123 | 
124 | 
125 | @cli.command()
126 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
127 | @click.option('--directory', '-d', type=click.Path(exists=True))
128 | def write_element_directories(iteration_idx, directory=None):
129 |     api.write_element_directories(iteration_idx, directory)
130 | 
131 | 
132 | @cli.command()
133 | @click.argument('directory', type=click.Path(exists=True))
134 | @click.argument('archive')
135 | def archive(directory, archive):
136 |     api.archive(directory, archive)
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     cli()
141 | 


--------------------------------------------------------------------------------
/matflow/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from warnings import warn
  4 | 
  5 | from ruamel.yaml import YAML, safe_load
  6 | 
  7 | 
  8 | from matflow.errors import ConfigurationError, MatflowExtensionError
  9 | from matflow.models.task import TaskSchema
 10 | from matflow.models.software import SoftwareInstance
 11 | 
 12 | 
 13 | class Config(object):
 14 | 
 15 |     __ALLOWED_CONFIG = [
 16 |         'task_schema_sources',
 17 |         'software_sources',
 18 |         'default_run_options',
 19 |         'default_preparation_run_options',
 20 |         'default_processing_run_options',
 21 |         'default_iterate_run_options',
 22 |         'default_sticky_run_options',
 23 |         'default_sticky_preparation_run_options',
 24 |         'default_sticky_processing_run_options',
 25 |         'default_sticky_iterate_run_options',
 26 |         'parallel_modes',
 27 |         'archive_locations',
 28 |         'default_metadata',
 29 |     ]
 30 | 
 31 |     __conf = {}
 32 | 
 33 |     _is_set = False
 34 |     _is_extension_locked = True
 35 | 
 36 |     @staticmethod
 37 |     def append_schema_source(schema_source_path, config_dir=None):
 38 |         yaml = YAML(typ='rt')
 39 |         config_dat, config_file = Config.get_config_file(config_dir=config_dir)
 40 |         config_dat['task_schema_sources'].append(str(schema_source_path))
 41 |         yaml.dump(config_dat, config_file)
 42 | 
 43 |     @staticmethod
 44 |     def prepend_schema_source(schema_source_path, config_dir=None):
 45 |         yaml = YAML(typ='rt')
 46 |         config_dat, config_file = Config.get_config_file(config_dir=config_dir)
 47 |         config_dat['task_schema_sources'] = (
 48 |             str(schema_source_path) + config_dat['task_schema_sources']
 49 |         )
 50 |         yaml.dump(config_dat, config_file)
 51 | 
 52 |     @staticmethod
 53 |     def resolve_config_dir(config_dir=None):
 54 | 
 55 |         if not config_dir:
 56 |             config_dir = Path(os.getenv('MATFLOW_CONFIG_DIR', '~/.matflow')).expanduser()
 57 |         else:
 58 |             config_dir = Path(config_dir)
 59 | 
 60 |         if Config._is_set:
 61 |             if config_dir != Config.get('config_dir'):
 62 |                 warn(f'Config is already set, but `config_dir` changed from '
 63 |                      f'"{Config.get("config_dir")}" to "{config_dir}".')
 64 | 
 65 |         if not config_dir.is_dir():
 66 |             print('Configuration directory does not exist. Generating.')
 67 |             config_dir.mkdir()
 68 | 
 69 |         return config_dir
 70 | 
 71 |     @staticmethod
 72 |     def get_config_file(config_dir):
 73 | 
 74 |         yaml = YAML()
 75 |         config_file = config_dir.joinpath('config.yml')
 76 |         def_schema_file = config_dir.joinpath('task_schemas.yml')
 77 |         def_software_file = config_dir.joinpath('software.yml')
 78 |         if not config_file.is_file():
 79 |             print('No config.yml found. Generating a config.yml file.')
 80 |             def_config = {
 81 |                 'task_schema_sources': [str(def_schema_file)],
 82 |                 'software_sources': [str(def_software_file)],
 83 |                 'parallel_modes': {
 84 |                     'MPI': {'command': 'mpirun -np <<num_cores>>'},
 85 |                     'OpenMP': {'env': 'export OMP_NUM_THREADS=<<num_cores>>'},
 86 |                 }
 87 |             }
 88 |             yaml.dump(def_config, config_file)
 89 | 
 90 |         if not def_schema_file.is_file():
 91 |             print('Generating a default task schema file.')
 92 |             yaml.dump([], def_schema_file)
 93 | 
 94 |         if not def_software_file.is_file():
 95 |             print('Generating a default software file.')
 96 |             yaml.dump({}, def_software_file)
 97 | 
 98 |         print(f'Loading matflow config from {config_file}')
 99 |         with config_file.open() as handle:
100 |             config_dat = safe_load(handle)
101 |         bad_keys = list(set(config_dat.keys()) - set(Config.__ALLOWED_CONFIG))
102 |         if bad_keys:
103 |             bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
104 |             raise ConfigurationError(f'Unknown configuration options: {bad_keys_fmt}.')
105 | 
106 |         if 'task_schema_sources' not in config_dat:
107 |             msg = (f'Missing `task_schema_sources` from configuration file: '
108 |                    f'{config_file}.')
109 |             raise ConfigurationError(msg)
110 | 
111 |         if 'software_sources' not in config_dat:
112 |             msg = f'Missing `software_sources` from configuration file: {config_file}'
113 |             raise ConfigurationError(msg)
114 | 
115 |         return config_dat, config_file
116 | 
117 |     @staticmethod
118 |     def set_config(config_dir=None, raise_on_set=False, refresh=False):
119 |         """Load configuration from a YAML file."""
120 | 
121 |         config_dir = Config.resolve_config_dir(config_dir)
122 | 
123 |         if Config._is_set:
124 |             if raise_on_set:
125 |                 raise ConfigurationError('Configuration is already set.')
126 |             elif not refresh:
127 |                 return
128 | 
129 |         config_dat, _ = Config.get_config_file(config_dir)
130 |         schema_sources = [Path(i).expanduser() for i in config_dat['task_schema_sources']]
131 |         software_sources = [Path(i).expanduser() for i in config_dat['software_sources']]
132 | 
133 |         # Validate parallel_modes:
134 |         ALLOWED_PARA_MODES = ['MPI', 'OpenMP']
135 |         ALLOWED_PARA_MODES_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_MODES])
136 |         ALLOWED_PARA_CONFIGS = ['env', 'command']
137 |         ALLOWED_PARA_CONFIGS_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_CONFIGS])
138 |         para_modes = {}
139 |         for name, mode_config in config_dat.get('parallel_modes', {}).items():
140 |             if name.lower() not in [i.lower() for i in ALLOWED_PARA_MODES]:
141 |                 msg = (f'Parallel mode "{name}" not known. Allowed parallel modes are '
142 |                        f'{ALLOWED_PARA_MODES_FMT}.')
143 |                 raise ConfigurationError(msg)
144 |             if not mode_config:
145 |                 msg = (f'Specify at least one of {ALLOWED_PARA_CONFIGS_FMT} for parallel '
146 |                        f'mode configuration: "{name}".')
147 |                 raise ConfigurationError(msg)
148 |             bad_keys = set(mode_config.keys()) - set(ALLOWED_PARA_CONFIGS)
149 |             if bad_keys:
150 |                 bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys])
151 |                 msg = (f'Unknown parallel mode configuration keys: {bad_keys_fmt} for '
152 |                        f'mode "{name}".')
153 |                 raise ConfigurationError(msg)
154 | 
155 |             if 'env' in mode_config:
156 |                 # Split into list of lines:
157 |                 mode_config['env'] = mode_config['env'].splitlines()
158 | 
159 |             # Update to be lowercase:
160 |             para_modes.update({name.lower(): mode_config})
161 | 
162 |         # Load task_schemas list from all specified task schema files:
163 |         task_schema_dicts = {}
164 |         yaml = YAML(typ='safe')
165 |         for task_schema_file in schema_sources[::-1]:
166 |             if not task_schema_file.is_file():
167 |                 msg = f'Task schema source is not a file: "{task_schema_file}".'
168 |                 raise ConfigurationError(msg)
169 |             for i in yaml.load(task_schema_file):
170 |                 if 'name' not in i:
171 |                     raise ValueError('Task schema definition is missing a "name" key.')
172 |                 # Overwrite any task schema with the same name (hence we order files in
173 |                 # reverse so e.g. the first task schema file takes precedence):
174 |                 task_schema_dicts.update({i['name']: i})
175 | 
176 |         # Convert to lists:
177 |         task_schema_dicts = [v for k, v in task_schema_dicts.items()]
178 | 
179 |         # Load and validate self-consistency of task schemas:
180 |         print(f'Loading task schemas from {len(schema_sources)} file(s)...', end='')
181 |         try:
182 |             task_schemas = TaskSchema.load_from_hierarchy(task_schema_dicts)
183 |         except Exception as err:
184 |             print('Failed.')
185 |             raise err
186 |         print('OK!')
187 | 
188 |         print(f'Loading software definitions from {len(software_sources)} '
189 |               f'file(s)...', end='')
190 |         software = {}
191 |         for software_file in software_sources:
192 |             if not software_file.is_file():
193 |                 msg = f'Software source is not a file: "{software_file}".'
194 |                 raise ConfigurationError(msg)
195 |             try:
196 |                 soft_loaded = SoftwareInstance.load_multiple(yaml.load(software_file))
197 |             except Exception as err:
198 |                 print(f'\nFailed to load software definitions from: "{software_file}".')
199 |                 raise err
200 | 
201 |             # Combine software instances from multiple software source files:
202 |             for soft_name, instances in soft_loaded.items():
203 |                 if soft_name in software:
204 |                     software[soft_name].extend(instances)
205 |                 else:
206 |                     software.update({soft_name: instances})
207 |         print('OK!')
208 | 
209 |         archive_locs = config_dat.get('archive_locations', {})
210 |         for arch_name, arch in archive_locs.items():
211 |             ALLOWED_ARCH_KEYS = ['path', 'cloud_provider']
212 |             if 'path' not in arch:
213 |                 msg = f'Missing `path` for archive location "{arch_name}".'
214 |                 raise ConfigurationError(msg)
215 |             bad_keys = set(arch.keys()) - set(ALLOWED_ARCH_KEYS)
216 |             if bad_keys:
217 |                 bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys])
218 |                 msg = (f'Unknown archive location keys for archive "{arch_name}": '
219 |                        f'{bad_keys_fmt}')
220 |                 raise ConfigurationError(msg)
221 | 
222 |             ALLOWED_CLOUD_PROVIDERS = ['dropbox']
223 |             cloud_provider = arch.get('cloud_provider')
224 |             if cloud_provider and cloud_provider not in ALLOWED_CLOUD_PROVIDERS:
225 |                 msg = (f'Unsupported cloud provider for archive "{arch_name}": '
226 |                        f'"{cloud_provider}". Supported cloud providers are: '
227 |                        f'{ALLOWED_CLOUD_PROVIDERS}.')
228 |                 raise ConfigurationError(msg)
229 | 
230 |         Config.__conf['config_dir'] = config_dir
231 | 
232 |         for i in [
233 |             'default_run_options',
234 |             'default_preparation_run_options',
235 |             'default_processing_run_options',
236 |             'default_iterate_run_options',
237 |             'default_sticky_run_options',
238 |             'default_sticky_preparation_run_options',
239 |             'default_sticky_processing_run_options',
240 |             'default_sticky_iterate_run_options',
241 |             'default_metadata',
242 |         ]:
243 |             Config.__conf[i] = config_dat.get(i, {})
244 | 
245 |         hpcflow_config_dir = config_dir.joinpath('.hpcflow')
246 |         Config.__conf['hpcflow_config_dir'] = hpcflow_config_dir
247 |         Config.__conf['software'] = software
248 |         Config.__conf['task_schemas'] = task_schemas
249 |         Config.__conf['parallel_modes'] = para_modes
250 |         Config.__conf['archive_locations'] = archive_locs
251 | 
252 |         Config.__conf['input_maps'] = {}
253 |         Config.__conf['output_maps'] = {}
254 |         Config.__conf['func_maps'] = {}
255 |         Config.__conf['CLI_arg_maps'] = {}
256 |         Config.__conf['sources_maps'] = {}
257 |         Config.__conf['output_file_maps'] = {}
258 |         Config.__conf['software_versions'] = {}
259 |         Config.__conf['extension_info'] = {}
260 |         Config.__conf['schema_validity'] = {}
261 | 
262 |         Config._is_set = True
263 | 
264 |     @staticmethod
265 |     def get(name):
266 |         if not Config._is_set:
267 |             raise ConfigurationError('Configuration is not yet set.')
268 |         return Config.__conf[name]
269 | 
270 |     @staticmethod
271 |     def lock_extensions():
272 |         Config._is_extension_locked = True
273 | 
274 |     @staticmethod
275 |     def unlock_extensions():
276 |         Config._is_extension_locked = False
277 | 
278 |     @staticmethod
279 |     def _get_software_safe(software_name):
280 |         return SoftwareInstance.get_software_safe(software_name)
281 | 
282 |     @staticmethod
283 |     def _get_key_safe(key):
284 |         return key[0], key[1], Config._get_software_safe(key[2])
285 | 
286 |     @staticmethod
287 |     def _validate_extension_setter():
288 |         if not Config._is_set:
289 |             warn(f'Configuration is not yet set. Matflow extension functions will not '
290 |                  'be mapped to task schemas unless matflow is loaded.')
291 |             return False
292 |         if Config._is_extension_locked:
293 |             msg = 'Configuration is locked against modifying extension data.'
294 |             raise ConfigurationError(msg)
295 |         return True
296 | 
297 |     @staticmethod
298 |     def set_input_map(key, input_file, func):
299 |         if Config._validate_extension_setter():
300 |             key = Config._get_key_safe(key)
301 |             if key not in Config.__conf['input_maps']:
302 |                 Config.__conf['input_maps'].update({key: {}})
303 |             if input_file in Config.__conf['input_maps'][key]:
304 |                 msg = f'Input file name "{input_file}" already exists in the input map.'
305 |                 raise MatflowExtensionError(msg)
306 |             Config.__conf['input_maps'][key][input_file] = func
307 | 
308 |     @staticmethod
309 |     def set_output_map(key, output_name, func):
310 |         if Config._validate_extension_setter():
311 |             key = Config._get_key_safe(key)
312 |             if key not in Config.__conf['output_maps']:
313 |                 Config.__conf['output_maps'].update({key: {}})
314 |             if output_name in Config.__conf['output_maps'][key]:
315 |                 msg = f'Output name "{output_name}" already exists in the output map.'
316 |                 raise MatflowExtensionError(msg)
317 |             Config.__conf['output_maps'][key][output_name] = func
318 | 
319 |     @staticmethod
320 |     def set_func_map(key, func):
321 |         if Config._validate_extension_setter():
322 |             key = Config._get_key_safe(key)
323 |             if key in Config.__conf['func_maps']:
324 |                 msg = f'Function map "{key}" already exists in the function map.'
325 |                 raise MatflowExtensionError(msg)
326 |             Config.__conf['func_maps'][key] = func
327 | 
328 |     @staticmethod
329 |     def set_CLI_arg_map(key, input_name, func):
330 |         if Config._validate_extension_setter():
331 |             key = Config._get_key_safe(key)
332 |             if key not in Config.__conf['CLI_arg_maps']:
333 |                 Config.__conf['CLI_arg_maps'].update({key: {}})
334 |             if input_name in Config.__conf['CLI_arg_maps'][key]:
335 |                 msg = (f'Input name "{input_name}" already exists in the CLI formatter '
336 |                        f'map.')
337 |                 raise MatflowExtensionError(msg)
338 |             Config.__conf['CLI_arg_maps'][key][input_name] = func
339 | 
340 |     @staticmethod
341 |     def set_source_map(key, func, **sources_dict):
342 |         if Config._validate_extension_setter():
343 |             key = Config._get_key_safe(key)
344 |             if key in Config.__conf['sources_maps']:
345 |                 msg = f'Sources map for key: {key} already exists in.'
346 |                 raise MatflowExtensionError(msg)
347 |             Config.__conf['sources_maps'].update({
348 |                 key: {'func': func, 'sources': sources_dict}
349 |             })
350 | 
351 |     @staticmethod
352 |     def set_software_version_func(software, func):
353 |         if Config._validate_extension_setter():
354 |             software = Config._get_software_safe(software)
355 |             if software in Config.__conf['software_versions']:
356 |                 msg = (f'Software "{software}" has already registered a '
357 |                        f'`software_versions` function.')
358 |                 raise MatflowExtensionError(msg)
359 |             Config.__conf['software_versions'][software] = func
360 | 
361 |     @staticmethod
362 |     def set_output_file_map(key, file_reference, file_name):
363 |         if Config._validate_extension_setter():
364 |             key = Config._get_key_safe(key)
365 |             if key not in Config.__conf['output_file_maps']:
366 |                 Config.__conf['output_file_maps'].update({key: {}})
367 |             file_ref_full = '__file__' + file_reference
368 |             if file_ref_full in Config.__conf['output_file_maps'][key]:
369 |                 msg = f'File name "{file_name}" already exists in the output files map.'
370 |                 raise MatflowExtensionError(msg)
371 |             Config.__conf['output_file_maps'][key].update({file_ref_full: file_name})
372 | 
373 |     @staticmethod
374 |     def set_extension_info(name, info):
375 |         if Config._validate_extension_setter():
376 |             if name in Config.__conf['extension_info']:
377 |                 msg = f'Extension with name "{name}" already loaded.'
378 |                 raise MatflowExtensionError(msg)
379 |             Config.__conf['extension_info'][name] = info
380 | 
381 |     @staticmethod
382 |     def set_schema_validities(validities):
383 |         if Config._validate_extension_setter():
384 |             Config.__conf['schema_validity'].update(validities)
385 | 
386 |     @staticmethod
387 |     def unload_extension(software_name):
388 | 
389 |         name = Config._get_software_safe(software_name)
390 | 
391 |         in_map = [k for k in Config.__conf['input_maps'] if k[2] == name]
392 |         for k in in_map:
393 |             del Config.__conf['input_maps'][k]
394 | 
395 |         out_map = [k for k in Config.__conf['output_maps'] if k[2] == name]
396 |         for k in out_map:
397 |             del Config.__conf['output_maps'][k]
398 | 
399 |         func_map = [k for k in Config.__conf['func_maps'] if k[2] == name]
400 |         for k in func_map:
401 |             del Config.__conf['func_maps'][k]
402 | 
403 |         CLI_map = [k for k in Config.__conf['CLI_arg_maps'] if k[2] == name]
404 |         for k in CLI_map:
405 |             del Config.__conf['CLI_arg_maps'][k]
406 | 
407 |         out_file_map = [k for k in Config.__conf['output_file_maps'] if k[2] == name]
408 |         for k in out_file_map:
409 |             del Config.__conf['output_file_maps'][k]
410 | 
411 |         soft_vers = [k for k in Config.__conf['software_versions'] if k == name]
412 |         for k in soft_vers:
413 |             del Config.__conf['software_versions'][k]
414 | 
415 |         ext_info = [k for k in Config.__conf['extension_info'] if k == name]
416 |         for k in ext_info:
417 |             del Config.__conf['extension_info'][k]
418 | 
419 |         schema_valid = [k for k in Config.__conf['schema_validity'] if k[2] == name]
420 |         for k in schema_valid:
421 |             del Config.__conf['schema_validity'][k]
422 | 
423 |         source_map = [k for k in Config.__conf['sources_maps'] if k[2] == name]
424 |         for k in source_map:
425 |             del Config.__conf['sources_maps'][k]
426 | 


--------------------------------------------------------------------------------
/matflow/errors.py:
--------------------------------------------------------------------------------
 1 | class IncompatibleWorkflow(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class IncompatibleTaskNesting(IncompatibleWorkflow):
 6 |     pass
 7 | 
 8 | 
 9 | class MissingMergePriority(IncompatibleTaskNesting):
10 |     pass
11 | 
12 | 
13 | class IncompatibleSequence(Exception):
14 |     """For task sequence definitions that are not logically consistent."""
15 | 
16 | 
17 | class SequenceError(Exception):
18 |     """For malformed sequence definitions."""
19 | 
20 | 
21 | class TaskError(Exception):
22 |     """For malformed task definitions."""
23 | 
24 | 
25 | class TaskSchemaError(Exception):
26 |     """For nonsensical task schema definitions."""
27 | 
28 | 
29 | class TaskParameterError(Exception):
30 |     """For incorrectly parametrised tasks."""
31 | 
32 | 
33 | class ProfileError(Exception):
34 |     """For malformed profile file data."""
35 | 
36 | 
37 | class MissingSoftware(Exception):
38 |     """For specified software that cannot be satisfied."""
39 | 
40 | 
41 | class WorkflowPersistenceError(Exception):
42 |     """For problems related to saving and loading the persistent HDF5 files."""
43 | 
44 | 
45 | class UnsatisfiedGroupParameter(Exception):
46 |     """For when an input has a group, but that group does not exist in the Workflow."""
47 | 
48 | 
49 | class MatflowExtensionError(Exception):
50 |     """For problems when loading extensions."""
51 | 
52 | 
53 | class MissingSchemaError(Exception):
54 |     """For when a suitable schema does not exist."""
55 | 
56 | 
57 | class UnsatisfiedSchemaError(Exception):
58 |     """For when a suitable extension function cannot be found for a task schema."""
59 | 
60 | 
61 | class TaskElementExecutionError(Exception):
62 |     """For when the execution of an task element fails."""
63 | 
64 | 
65 | class ConfigurationError(Exception):
66 |     """For malformed configuration files."""
67 | 
68 | 
69 | class SoftwareInstanceError(Exception):
70 |     """For malformed SoftwareInstance definitions."""
71 |     pass
72 | 
73 | 
74 | class MissingSoftwareSourcesError(Exception):
75 |     """For when a software instance requires source variables, but none are forthcoming."""
76 | 
77 | 
78 | class UnexpectedSourceMapReturnError(Exception):
79 |     """For when a source map function does not return the expected dict."""
80 | 
81 | 
82 | class CommandError(Exception):
83 |     """For problems with command groups and commands."""
84 | 
85 | 
86 | class WorkflowIterationError(Exception):
87 |     """For issues with resolving requested iterations."""
88 | 
89 | 
90 | class ParameterImportError(Exception):
91 |     """For issues with importing parameters from pre-existing workflows."""
92 | 


--------------------------------------------------------------------------------
/matflow/extensions.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import pkg_resources
  3 | import warnings
  4 | 
  5 | from matflow.config import Config
  6 | from matflow.validation import validate_task_schemas
  7 | 
  8 | 
  9 | def load_extensions():
 10 | 
 11 |     Config.set_config(raise_on_set=True)
 12 |     Config.unlock_extensions()
 13 | 
 14 |     extensions_entries = pkg_resources.iter_entry_points('matflow.extension')
 15 |     if extensions_entries:
 16 |         print('Loading extensions...')
 17 |         for entry_point in extensions_entries:
 18 | 
 19 |             print(f'  "{entry_point.name}"...', end='', flush=True)
 20 | 
 21 |             try:
 22 |                 loaded = entry_point.load()
 23 |             except (ImportError, SyntaxError) as ex:
 24 |                 print(f'Failed: {ex!r}', flush=True)
 25 |                 continue
 26 | 
 27 |             unload = False
 28 | 
 29 |             if not hasattr(loaded, 'SOFTWARE'):
 30 |                 print('Failed.', flush=True)
 31 |                 warnings.warn(f'Matflow extension "{entry_point.module_name}" has no '
 32 |                               f'`SOFTWARE` attribute. This extension will not be loaded.')
 33 |                 unload = True
 34 | 
 35 |             if not hasattr(loaded, '__version__'):
 36 |                 print('Failed.', flush=True)
 37 |                 warnings.warn(f'Matflow extension "{entry_point.module_name}" has no '
 38 |                               f'`__version__` attribute. This extension will not be '
 39 |                               f'loaded.')
 40 |                 unload = True
 41 | 
 42 |             software_safe = Config._get_software_safe(loaded.SOFTWARE)
 43 | 
 44 |             if (
 45 |                 not unload and
 46 |                 Config.get('software_versions').get(software_safe) is None
 47 |             ):
 48 | 
 49 |                 # Every defined SoftwareInstance must have a specified version_info:
 50 |                 version_defined = True
 51 |                 soft_instances = Config.get('software').get(software_safe)
 52 |                 if not soft_instances:
 53 |                     version_defined = False
 54 |                 else:
 55 |                     for i in soft_instances:
 56 |                         if i.version_info is None:
 57 |                             version_defined = False
 58 |                             break
 59 | 
 60 |                 if not version_defined:
 61 |                     print('Failed.', flush=True)
 62 |                     msg = (f'Matflow extension "{entry_point.module_name}" does not '
 63 |                            f'register a function for getting software versions and one '
 64 |                            f'or more of its software instance definitions do not '
 65 |                            f'specify `version_info`. This extension will not be loaded.')
 66 |                     warnings.warn(msg)
 67 |                     unload = True
 68 | 
 69 |             if unload:
 70 |                 Config.unload_extension(software_safe)
 71 |                 continue
 72 | 
 73 |             Config.set_extension_info(
 74 |                 entry_point.name,
 75 |                 {'module_name': entry_point.module_name, 'version': loaded.__version__},
 76 |             )
 77 |             print(f'(software: "{software_safe}") from '
 78 |                   f'{entry_point.module_name} (version {loaded.__version__})', flush=True)
 79 | 
 80 |         # Validate task schemas against loaded extensions:
 81 |         print('Validating task schemas against loaded extensions...', end='')
 82 |         try:
 83 |             Config.set_schema_validities(
 84 |                 validate_task_schemas(
 85 |                     Config.get('task_schemas'),
 86 |                     Config.get('input_maps'),
 87 |                     Config.get('output_maps'),
 88 |                     Config.get('func_maps'),
 89 |                 )
 90 |             )
 91 |         except Exception as err:
 92 |             print('Failed.', flush=True)
 93 |             raise err
 94 | 
 95 |         schema_validity = Config.get('schema_validity')
 96 |         schema_invalids = [(k, v[1]) for k, v in schema_validity.items() if not v[0]]
 97 |         num_valid = sum([i[0] for i in schema_validity.values()])
 98 |         num_total = len(schema_validity)
 99 |         print(f'OK! {num_valid}/{num_total} schemas are valid.', flush=True)
100 |         if schema_invalids:
101 |             sch_invalids_fmt = '\n  '.join([f'{i[0]}: {i[1]}' for i in schema_invalids])
102 |             msg = f'The following schemas are invalid:\n  {sch_invalids_fmt}\n'
103 |             print(msg, flush=True)
104 | 
105 |     else:
106 |         print('No extensions found.')
107 | 
108 |     Config.lock_extensions()
109 | 
110 | 
111 | def input_mapper(input_file, task, method, software):
112 |     """Function decorator for adding input maps from extensions."""
113 |     def _input_mapper(func):
114 |         @functools.wraps(func)
115 |         def func_wrap(*args, **kwargs):
116 |             return func(*args, **kwargs)
117 |         key = (task, method, software)
118 |         Config.set_input_map(key, input_file, func_wrap)
119 |         return func_wrap
120 |     return _input_mapper
121 | 
122 | 
123 | def output_mapper(output_name, task, method, software):
124 |     """Function decorator for adding output maps from extensions."""
125 |     def _output_mapper(func):
126 |         @functools.wraps(func)
127 |         def func_wrap(*args, **kwargs):
128 |             return func(*args, **kwargs)
129 |         key = (task, method, software)
130 |         Config.set_output_map(key, output_name, func_wrap)
131 |         return func_wrap
132 |     return _output_mapper
133 | 
134 | 
135 | def func_mapper(task, method, software):
136 |     """Function decorator for adding function maps from extensions."""
137 |     def _func_mapper(func):
138 |         @functools.wraps(func)
139 |         def func_wrap(*args, **kwargs):
140 |             return func(*args, **kwargs)
141 |         key = (task, method, software)
142 |         Config.set_func_map(key, func_wrap)
143 |         return func_wrap
144 |     return _func_mapper
145 | 
146 | 
147 | def cli_format_mapper(input_name, task, method, software):
148 |     """Function decorator for adding CLI arg formatter functions from extensions."""
149 |     def _cli_format_mapper(func):
150 |         @functools.wraps(func)
151 |         def func_wrap(*args, **kwargs):
152 |             return func(*args, **kwargs)
153 |         key = (task, method, software)
154 |         Config.set_CLI_arg_map(key, input_name, func_wrap)
155 |         return func_wrap
156 |     return _cli_format_mapper
157 | 
158 | 
159 | def software_versions(software):
160 |     """Function decorator to register an extension function as the function that returns
161 |     a dict of pertinent software versions for that extension."""
162 |     def _software_versions(func):
163 |         @functools.wraps(func)
164 |         def func_wrap(*args, **kwargs):
165 |             return func(*args, **kwargs)
166 |         Config.set_software_version_func(software, func_wrap)
167 |         return func_wrap
168 |     return _software_versions
169 | 
170 | 
171 | def sources_mapper(task, method, software, **sources_dict):
172 |     """Function decorator to register an extension function that generate task source
173 |     files."""
174 |     def _sources_mapper(func):
175 |         @functools.wraps(func)
176 |         def func_wrap(*args, **kwargs):
177 |             return func(*args, **kwargs)
178 |         key = (task, method, software)
179 |         Config.set_source_map(key, func_wrap, **sources_dict)
180 |         return func_wrap
181 |     return _sources_mapper
182 | 
183 | 
184 | def register_output_file(file_reference, file_name, task, method, software):
185 |     key = (task, method, software)
186 |     Config.set_output_file_map(key, file_reference, file_name)
187 | 


--------------------------------------------------------------------------------
/matflow/hicklable.py:
--------------------------------------------------------------------------------
 1 | """`matflow.hicklable.py`"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | HICKLABLE_PRIMITIVES = (
 6 |     int,
 7 |     float,
 8 |     str,
 9 |     np.ndarray,
10 |     np.int32,
11 |     np.int64,
12 |     type(None),
13 | )
14 | 
15 | 
16 | def to_hicklable(obj):
17 |     """Get an object representation that can be saved to an HDF5 file using `hickle`.
18 | 
19 |     Parameters
20 |     ----------
21 |     obj : object
22 |         Object whose hicklable representation is to be returned.
23 | 
24 |     """
25 | 
26 |     if isinstance(obj, (list, tuple, set)):
27 |         obj_valid = []
28 |         for item in obj:
29 |             obj_valid.append(to_hicklable(item))
30 |         if isinstance(obj, tuple):
31 |             obj_valid = tuple(obj_valid)
32 |         elif isinstance(obj, set):
33 |             obj_valid = set(obj_valid)
34 | 
35 |     elif isinstance(obj, dict):
36 |         obj_valid = {}
37 |         for dct_key, dct_val in obj.items():
38 |             obj_valid.update({dct_key: to_hicklable(dct_val)})
39 | 
40 |     elif isinstance(obj, HICKLABLE_PRIMITIVES):
41 |         obj_valid = obj
42 | 
43 |     else:
44 |         all_attrs = {}
45 |         if hasattr(obj, '__dict__'):
46 |             all_attrs.update(getattr(obj, '__dict__'))
47 |         if hasattr(obj, '__slots__'):
48 |             all_attrs.update({k: getattr(obj, k) for k in getattr(obj, '__slots__')
49 |                               if k != '__dict__'})
50 |         if not hasattr(obj, '__dict__') and not hasattr(obj, '__slots__'):
51 |             raise ValueError(f'Object not understood: {obj}.')
52 | 
53 |         obj_valid = {}
54 |         for attr, value in all_attrs.items():
55 |             obj_valid.update({attr: to_hicklable(value)})
56 | 
57 |     return obj_valid
58 | 


--------------------------------------------------------------------------------
/matflow/models/__init__.py:
--------------------------------------------------------------------------------
1 | """`matflow.models.__init__.py`"""
2 | 
3 | from matflow.models.command import Command, CommandGroup
4 | from matflow.models.task import Task, TaskSchema
5 | 


--------------------------------------------------------------------------------
/matflow/models/command.py:
--------------------------------------------------------------------------------
  1 | """`matflow.models.command.py`
  2 | 
  3 | Module containing functionality for executing commands.
  4 | 
  5 | """
  6 | 
  7 | import copy
  8 | 
  9 | import numpy as np
 10 | 
 11 | from matflow.errors import CommandError
 12 | from matflow.utils import dump_to_yaml_string
 13 | from matflow.hicklable import to_hicklable
 14 | 
 15 | 
 16 | def list_formatter(lst):
 17 |     return ' '.join([f'{i}' for i in lst])
 18 | 
 19 | 
 20 | DEFAULT_FORMATTERS = {
 21 |     str: lambda x: x,
 22 |     int: lambda number: str(number),
 23 |     float: lambda number: f'{number:.6f}',
 24 |     list: list_formatter,
 25 |     set: list_formatter,
 26 |     tuple: list_formatter,
 27 | }
 28 | 
 29 | 
 30 | class CommandGroup(object):
 31 |     """Class to represent a group of commands."""
 32 | 
 33 |     def __init__(self, commands, command_files=None, command_pathways=None):
 34 |         """
 35 |         Parameters
 36 |         ----------
 37 |         all_commands : list of Command objects
 38 |         command_files : dict, optional
 39 |         command_pathways : list of dict, optional
 40 | 
 41 |         """
 42 | 
 43 |         self.commands = [Command(**i) for i in commands]
 44 |         self.command_files = command_files or {}
 45 |         self.command_pathways = command_pathways or []
 46 | 
 47 |         self._validate_command_pathways()
 48 |         self.resolve_command_pathways()
 49 | 
 50 |     @property
 51 |     def all_commands(self):
 52 |         return self.commands
 53 | 
 54 |     def __repr__(self):
 55 |         out = f'{self.__class__.__name__}(commands=['
 56 |         out += ', '.join([f'{i!r}' for i in self.all_commands]) + ']'
 57 |         out += ')'
 58 |         return out
 59 | 
 60 |     def __str__(self):
 61 |         return dump_to_yaml_string(self.as_dict())
 62 | 
 63 |     def as_dict(self):
 64 |         return to_hicklable(self)
 65 | 
 66 |     def check_pathway_conditions(self, inputs_list):
 67 |         """Check the command pathway conditions are compatible with a list of schema
 68 |         inputs.
 69 | 
 70 |         Parameters
 71 |         ----------
 72 |         inputs_list : list of str
 73 | 
 74 |         """
 75 | 
 76 |         for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
 77 |             condition = cmd_pth.get('condition')
 78 |             if condition:
 79 |                 bad_keys = set(condition) - set(inputs_list)
 80 |                 if bad_keys:
 81 |                     bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys])
 82 |                     msg = ((f'Unknown command pathway condition inputs for command '
 83 |                             f'pathway index {cmd_pth_idx}: {bad_keys_fmt}.'))
 84 |                     raise CommandError(msg)
 85 | 
 86 |     def _validate_command_pathways(self):
 87 | 
 88 |         if not self.command_pathways:
 89 |             self.command_pathways = [
 90 |                 {'commands_idx': list(range(len(self.all_commands)))}
 91 |             ]
 92 | 
 93 |         req_keys = ['commands_idx']
 94 |         allowed_keys = req_keys + ['condition', 'commands']
 95 | 
 96 |         # Check the condition list is a list of input labels for this task (have to be invoked by schema)
 97 |         no_condition_count = 0
 98 |         for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
 99 | 
100 |             bad_keys = set(cmd_pth) - set(allowed_keys)
101 |             miss_keys = set(req_keys) - set(cmd_pth)
102 | 
103 |             if bad_keys:
104 |                 bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys])
105 |                 msg = ((f'Unknown command pathway keys for command pathway index '
106 |                         f'{cmd_pth_idx}: {bad_keys_fmt}.'))
107 |                 raise CommandError(msg)
108 | 
109 |             if miss_keys:
110 |                 miss_keys_fmt = ', '.join(['"{}"'.format(i) for i in miss_keys])
111 |                 msg = (f'Missing required command pathway keys for command pathway '
112 |                        f'index {cmd_pth_idx}: {miss_keys_fmt}.')
113 |                 raise CommandError(msg)
114 | 
115 |             if 'condition' not in cmd_pth:
116 |                 no_condition_count += 1
117 | 
118 |             cmds_idx = cmd_pth['commands_idx']
119 |             if (
120 |                 not isinstance(cmds_idx, list) or
121 |                 not all([i in range(len(self.all_commands)) for i in cmds_idx])
122 |             ):
123 |                 msg = (f'`commands_idx` must be a list of integer indices into '
124 |                        f'`all_commands`.')
125 |                 raise CommandError(msg)
126 | 
127 |         if no_condition_count > 1:
128 |             msg = (f'Only one command pathway may be specified without a `condition` key '
129 |                    f'(the default command pathway).')
130 |             raise CommandError(msg)
131 | 
132 |     def resolve_command_pathways(self):
133 |         """Add a `commands` list to each `commands_pathway`, according to its 
134 |         `commands_idx`."""
135 | 
136 |         for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
137 |             commands = [copy.deepcopy(self.all_commands[i])
138 |                         for i in cmd_pth['commands_idx']]
139 |             cmd_pth.update({'commands': commands})
140 |             self.resolve_command_files(cmd_pth_idx)
141 | 
142 |     def resolve_command_files(self, cmd_pathway_idx):
143 | 
144 |         # Validate command_files dict first:
145 |         for cmd_fn_label, cmd_fn in self.command_files.items():
146 |             if not isinstance(cmd_fn, str) or '<<inc>>' not in cmd_fn:
147 |                 msg = ('`command_files` must be a dict that maps a command file label to '
148 |                        'a file name template that must include the substring "<<inc>>", '
149 |                        'which is substituted by increasing integers.')
150 |                 raise CommandError(msg)
151 | 
152 |         file_names = self.get_command_file_names(cmd_pathway_idx)
153 | 
154 |         for cmd_idx, command in enumerate(self.get_commands(cmd_pathway_idx)):
155 | 
156 |             for opt_idx, opt in enumerate(command.options):
157 |                 for opt_token_idx, opt_token in enumerate(opt):
158 |                     options_files = file_names['all_commands'][cmd_idx]['options']
159 |                     for cmd_fn_label, cmd_fn in options_files.items():
160 |                         if f'<<{cmd_fn_label}>>' in opt_token:
161 |                             new_fmt_opt = opt_token.replace(f'<<{cmd_fn_label}>>', cmd_fn)
162 |                             command.options[opt_idx][opt_token_idx] = new_fmt_opt
163 | 
164 |             for param_idx, param in enumerate(command.parameters):
165 |                 params_files = file_names['all_commands'][cmd_idx]['parameters']
166 |                 for cmd_fn_label, cmd_fn in params_files.items():
167 |                     if f'<<{cmd_fn_label}>>' in param:
168 |                         new_param = param.replace(f'<<{cmd_fn_label}>>', cmd_fn)
169 |                         command.parameters[param_idx] = new_param
170 | 
171 |             if command.stdin:
172 |                 stdin_files = file_names['all_commands'][cmd_idx]['stdin']
173 |                 for cmd_fn_label, cmd_fn in stdin_files.items():
174 |                     if f'<<{cmd_fn_label}>>' in command.stdin:
175 |                         new_stdin = command.stdin.replace(f'<<{cmd_fn_label}>>', cmd_fn)
176 |                         command.stdin = new_stdin
177 | 
178 |             if command.stdout:
179 |                 new_stdout = command.stdout
180 |                 stdout_files = file_names['all_commands'][cmd_idx]['stdout']
181 |                 for cmd_fn_label, cmd_fn in stdout_files.items():
182 |                     if f'<<{cmd_fn_label}>>' in command.stdout:
183 |                         new_stdout = command.stdout.replace(f'<<{cmd_fn_label}>>', cmd_fn)
184 |                         command.stdout = new_stdout
185 | 
186 |             if command.stderr:
187 |                 stderr_files = file_names['all_commands'][cmd_idx]['stderr']
188 |                 for cmd_fn_label, cmd_fn in stderr_files.items():
189 |                     if f'<<{cmd_fn_label}>>' in command.stderr:
190 |                         new_stderr = command.stderr.replace(f'<<{cmd_fn_label}>>', cmd_fn)
191 |                         command.stderr = new_stderr
192 | 
193 |     def get_commands(self, cmd_pathway_idx):
194 |         return self.command_pathways[cmd_pathway_idx]['commands']
195 | 
196 |     def select_command_pathway(self, inputs):
197 |         """Get the correct command pathway index, give a set of input names and values.
198 | 
199 |         Parameters
200 |         ----------
201 |         inputs : dict of (str: list)
202 |             Dict whose keys are input names and whose values are lists of input values
203 |             (i.e. one element for each task sequence item).
204 | 
205 |         Returns
206 |         -------
207 |         cmd_pathway_idx : int
208 | 
209 |         """
210 | 
211 |         # Consider an input defined if any of its values (in the sequence) are not `None`:
212 |         inputs_defined = [k for k, v in inputs.items() if any([i is not None for i in v])]
213 | 
214 |         # Sort pathways by most-specific first:
215 |         order_idx = np.argsort([len(i.get('condition', []))
216 |                                 for i in self.command_pathways])[::-1]
217 | 
218 |         cmd_pathway_idx = None
219 |         for cmd_pth_idx in order_idx:
220 |             cmd_pth = self.command_pathways[cmd_pth_idx]
221 |             condition = cmd_pth.get('condition', [])
222 |             if not (set(condition) - set(inputs_defined)):
223 |                 # All inputs named in condition are defined
224 |                 cmd_pathway_idx = cmd_pth_idx
225 |                 break
226 | 
227 |         if cmd_pathway_idx is None:
228 |             raise CommandError('Could not find suitable command pathway.')
229 | 
230 |         return cmd_pathway_idx
231 | 
232 |     def get_command_file_names(self, cmd_pathway_idx):
233 | 
234 |         out = {
235 |             'input_map': {},
236 |             'output_map': {},
237 |             'all_commands': [],
238 |         }
239 | 
240 |         file_name_increments = {k: 0 for k in self.command_files.keys()}
241 | 
242 |         # Input map should use the first increment:
243 |         for cmd_fn_label in self.command_files.keys():
244 |             new_fn = self.command_files[cmd_fn_label].replace(
245 |                 '<<inc>>',
246 |                 str(file_name_increments[cmd_fn_label]),
247 |             )
248 |             out['input_map'].update({cmd_fn_label: new_fn})
249 | 
250 |         for command in self.get_commands(cmd_pathway_idx):
251 | 
252 |             file_names_i = {
253 |                 'stdin': {},
254 |                 'options': {},
255 |                 'parameters': {},
256 |                 'stdout': {},
257 |                 'stderr': {},
258 |             }
259 | 
260 |             cmd_fn_is_incremented = {k: False for k in self.command_files.keys()}
261 |             for cmd_fn_label in self.command_files.keys():
262 | 
263 |                 for opt in command.options_raw:
264 |                     fmt_opt = list(opt)
265 |                     for opt_token in fmt_opt:
266 |                         if f'<<{cmd_fn_label}>>' in opt_token:
267 |                             new_fn = self.command_files[cmd_fn_label].replace(
268 |                                 '<<inc>>',
269 |                                 str(file_name_increments[cmd_fn_label]),
270 |                             )
271 |                             file_names_i['stdin'].update({cmd_fn_label: new_fn})
272 | 
273 |                 for param in command.parameters_raw:
274 |                     if f'<<{cmd_fn_label}>>' in param:
275 |                         new_fn = self.command_files[cmd_fn_label].replace(
276 |                             '<<inc>>',
277 |                             str(file_name_increments[cmd_fn_label]),
278 |                         )
279 |                         file_names_i['parameters'].update({cmd_fn_label: new_fn})
280 | 
281 |                 if command.stdin_raw:
282 |                     if f'<<{cmd_fn_label}>>' in command.stdin_raw:
283 |                         new_fn = self.command_files[cmd_fn_label].replace(
284 |                             '<<inc>>',
285 |                             str(file_name_increments[cmd_fn_label]),
286 |                         )
287 |                         file_names_i['stdin'].update({cmd_fn_label: new_fn})
288 | 
289 |                 if command.stdout_raw:
290 |                     if f'<<{cmd_fn_label}>>' in command.stdout_raw:
291 |                         file_name_increments[cmd_fn_label] += 1
292 |                         cmd_fn_is_incremented[cmd_fn_label] = True
293 |                         new_fn = self.command_files[cmd_fn_label].replace(
294 |                             '<<inc>>',
295 |                             str(file_name_increments[cmd_fn_label]),
296 |                         )
297 |                         file_names_i['stdout'].update({cmd_fn_label: new_fn})
298 | 
299 |                 if command.stderr_raw:
300 |                     if f'<<{cmd_fn_label}>>' in command.stderr_raw:
301 |                         if not cmd_fn_is_incremented[cmd_fn_label]:
302 |                             file_name_increments[cmd_fn_label] += 1
303 |                         new_fn = self.command_files[cmd_fn_label].replace(
304 |                             '<<inc>>',
305 |                             str(file_name_increments[cmd_fn_label]),
306 |                         )
307 | 
308 |                         if not cmd_fn_is_incremented[cmd_fn_label]:
309 |                             cmd_fn_is_incremented[cmd_fn_label] = True
310 |                             file_names_i['stderr'].update({cmd_fn_label: new_fn})
311 | 
312 |             out['all_commands'].append(file_names_i)
313 | 
314 |         # Output map should use the final increment:
315 |         for cmd_fn_label in self.command_files.keys():
316 |             new_fn = self.command_files[cmd_fn_label].replace(
317 |                 '<<inc>>',
318 |                 str(file_name_increments[cmd_fn_label]),
319 |             )
320 |             out['output_map'].update({cmd_fn_label: new_fn})
321 | 
322 |         return out
323 | 
324 |     def get_formatted_commands(self, inputs_list, num_cores, cmd_pathway_idx):
325 |         """Format commands into strings with hpcflow variable substitutions where
326 |         required.
327 | 
328 |         Parameters
329 |         ----------
330 |         inputs_list : list of str
331 |             List of input names from which a subset of hpcflow variables may be defined.
332 |         num_cores : int
333 |             Number of CPU cores to use for this task. This is required to determine
334 |             whether a "parallel_mode" should be included in the formatted commands.
335 |         cmd_pathway_idx : int
336 |             Which command pathway should be returned.
337 | 
338 |         Returns
339 |         -------
340 |         tuple of (fmt_commands, var_names)
341 |             fmt_commands : list of dict
342 |                 Each list item is a dict that contains keys corresponding to an individual
343 |                 command to be run.
344 |             var_names : dict of (str, str)
345 |                 A dict that maps a parameter name to an hpcflow variable name.
346 | 
347 |         """
348 | 
349 |         fmt_commands = []
350 | 
351 |         var_names = {}
352 |         for command in self.get_commands(cmd_pathway_idx):
353 | 
354 |             fmt_opts = []
355 |             for opt in command.options:
356 |                 fmt_opt = list(opt)
357 |                 for opt_token_idx, opt_token in enumerate(fmt_opt):
358 |                     if opt_token in inputs_list:
359 |                         # Replace with an `hpcflow` variable:
360 |                         var_name = 'matflow_input_{}'.format(opt_token)
361 |                         fmt_opt[opt_token_idx] = '<<{}>>'.format(var_name)
362 |                         if opt_token not in var_names:
363 |                             var_names.update({opt_token: var_name})
364 | 
365 |                 fmt_opt_joined = ' '.join(fmt_opt)
366 |                 fmt_opts.append(fmt_opt_joined)
367 | 
368 |             fmt_params = []
369 |             for param in command.parameters:
370 | 
371 |                 fmt_param = param
372 |                 if param in inputs_list:
373 |                     # Replace with an `hpcflow` variable:
374 |                     var_name = 'matflow_input_{}'.format(param)
375 |                     fmt_param = '<<{}>>'.format(var_name)
376 | 
377 |                     if param not in var_names:
378 |                         var_names.update({param: var_name})
379 | 
380 |                 fmt_params.append(fmt_param)
381 | 
382 |             cmd_fmt = ' '.join([command.command] + fmt_opts + fmt_params)
383 | 
384 |             if command.stdin:
385 |                 cmd_fmt += ' < {}'.format(command.stdin)
386 | 
387 |             if command.stdout:
388 |                 cmd_fmt += ' >> {}'.format(command.stdout)
389 | 
390 |             if command.stderr:
391 |                 if command.stderr == command.stdout:
392 |                     cmd_fmt += ' 2>&1'
393 |                 else:
394 |                     cmd_fmt += ' 2>> {}'.format(command.stderr)
395 | 
396 |             cmd_dict = {'line': cmd_fmt}
397 |             if command.parallel_mode and num_cores > 1:
398 |                 cmd_dict.update({'parallel_mode': command.parallel_mode})
399 | 
400 |             fmt_commands.append(cmd_dict)
401 | 
402 |         return fmt_commands, var_names
403 | 
404 | 
405 | class Command(object):
406 |     """Class to represent a command to be executed by a shell."""
407 | 
408 |     def __init__(self, command, options=None, parameters=None, stdin=None, stdout=None,
409 |                  stderr=None, parallel_mode=None):
410 | 
411 |         self.command = command
412 |         self.parallel_mode = parallel_mode
413 | 
414 |         # Raw versions may include command file name variables:
415 |         self.options_raw = options or []
416 |         self.parameters_raw = parameters or []
417 |         self.stdin_raw = stdin
418 |         self.stdout_raw = stdout
419 |         self.stderr_raw = stderr
420 | 
421 |         # Non-raw versions modified by the parent CommandGroup to include any resolved
422 |         # command file name:
423 |         self.options = copy.deepcopy(self.options_raw)
424 |         self.parameters = copy.deepcopy(self.parameters_raw)
425 |         self.stdin = self.stdin_raw
426 |         self.stdout = self.stdout_raw
427 |         self.stderr = self.stderr_raw
428 | 
429 |     def __repr__(self):
430 |         out = f'{self.__class__.__name__}({self.command!r}'
431 |         if self.options:
432 |             out += f', options={self.options!r}'
433 |         if self.parameters:
434 |             out += f', parameters={self.parameters!r}'
435 |         if self.stdin:
436 |             out += f', stdin={self.stdin!r}'
437 |         if self.stdout:
438 |             out += f', stdout={self.stdout!r}'
439 |         if self.stderr:
440 |             out += f', stderr={self.stderr!r}'
441 |         out += ')'
442 |         return out
443 | 
444 |     def __str__(self):
445 | 
446 |         cmd_fmt = ' '.join(
447 |             [self.command] +
448 |             [' '.join(i) for i in self.options] +
449 |             self.parameters
450 |         )
451 | 
452 |         if self.stdin:
453 |             cmd_fmt += ' < {}'.format(self.stdin)
454 |         if self.stdout:
455 |             cmd_fmt += ' > {}'.format(self.stdout)
456 |         if self.stderr:
457 |             if self.stderr == self.stdout:
458 |                 cmd_fmt += ' 2>&1'
459 |             else:
460 |                 cmd_fmt += ' 2> {}'.format(self.stderr)
461 | 
462 |         return cmd_fmt
463 | 


--------------------------------------------------------------------------------
/matflow/models/element.py:
--------------------------------------------------------------------------------
  1 | """matflow.models.element.py"""
  2 | 
  3 | import copy
  4 | 
  5 | import hickle
  6 | import h5py
  7 | 
  8 | from matflow.models.parameters import Parameters, Files
  9 | 
 10 | 
 11 | class Element(object):
 12 | 
 13 |     __slots__ = [
 14 |         '_task',
 15 |         '_element_idx',
 16 |         '_inputs',
 17 |         '_outputs',
 18 |         '_files',
 19 |         '_resource_usage',
 20 |     ]
 21 | 
 22 |     def __init__(self, task, element_idx, inputs_data_idx=None, outputs_data_idx=None,
 23 |                  files_data_idx=None, resource_usage=None):
 24 | 
 25 |         self._task = task
 26 |         self._element_idx = element_idx
 27 |         self._resource_usage = resource_usage
 28 | 
 29 |         self._inputs = Parameters(self, inputs_data_idx)
 30 |         self._outputs = Parameters(self, outputs_data_idx)
 31 |         self._files = Files(self, files_data_idx)
 32 | 
 33 |     def __repr__(self):
 34 |         out = (
 35 |             f'{self.__class__.__name__}('
 36 |             f'inputs={self.inputs!r}, '
 37 |             f'outputs={self.outputs!r}, '
 38 |             f'files={self.files!r}'
 39 |             f')'
 40 |         )
 41 |         return out
 42 | 
 43 |     @property
 44 |     def task(self):
 45 |         return self._task
 46 | 
 47 |     @property
 48 |     def element_idx(self):
 49 |         return self._element_idx
 50 | 
 51 |     @property
 52 |     def resource_usage(self):
 53 |         return self._resource_usage
 54 | 
 55 |     def as_dict(self):
 56 |         """Return attributes dict with preceding underscores removed."""
 57 |         self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__}
 58 |         self_dict.pop('task')
 59 |         self_dict['inputs_data_idx'] = self_dict.pop('inputs').as_dict()
 60 |         self_dict['outputs_data_idx'] = self_dict.pop('outputs').as_dict()
 61 |         self_dict['files_data_idx'] = self_dict.pop('files').as_dict()
 62 |         return self_dict
 63 | 
 64 |     def get_parameter_data_idx(self, parameter_name):
 65 |         try:
 66 |             out = self.outputs.get_data_idx(parameter_name)
 67 |         except KeyError:
 68 |             out = self.inputs.get_data_idx(parameter_name)
 69 | 
 70 |         return out
 71 | 
 72 |     def get_input_data_idx(self, input_name, safe_name=False):
 73 |         return self.inputs.get_data_idx(input_name, safe_name)
 74 | 
 75 |     def get_output_data_idx(self, output_name, safe_name=False):
 76 |         return self.outputs.get_data_idx(output_name, safe_name)
 77 | 
 78 |     def get_file_data_idx(self, file_name, safe_name=False):
 79 |         return self.files.get_data_idx(file_name, safe_name)
 80 | 
 81 |     def get_input(self, input_name, safe_name=False):
 82 |         if not safe_name:
 83 |             input_name = self.inputs.get_name_map()[input_name]
 84 |         return getattr(self.inputs, input_name)
 85 | 
 86 |     def get_output(self, output_name, safe_name=False):
 87 |         if not safe_name:
 88 |             output_name = self.outputs.get_name_map()[output_name]
 89 |         return getattr(self.outputs, output_name)
 90 | 
 91 |     def get_file(self, file_name, safe_name=False):
 92 |         if not safe_name:
 93 |             file_name = self.files.get_name_map()[file_name]
 94 |         return getattr(self.files, file_name)
 95 | 
 96 |     def get_file_lines(self, file_name, lines_slice=(10,), safe_name=False):
 97 |         return self.files.get_lines(file_name, lines_slice, safe_name)
 98 | 
 99 |     def print_file_lines(self, file_name, lines_slice=(10,), safe_name=False):
100 |         self.files.print_lines(file_name, lines_slice, safe_name)
101 | 
102 |     @property
103 |     def inputs(self):
104 |         return self._inputs
105 | 
106 |     @property
107 |     def outputs(self):
108 |         return self._outputs
109 | 
110 |     @property
111 |     def files(self):
112 |         return self._files
113 | 
114 |     @property
115 |     def HDF5_path(self):
116 |         return self.task.HDF5_path + f'/\'elements\'/data/data_{self.element_idx}'
117 | 
118 |     def add_input(self, input_name, value=None, data_idx=None):
119 |         return self.inputs.add_parameter(input_name, 'inputs', value, data_idx)
120 | 
121 |     def add_output(self, output_name, value=None, data_idx=None):
122 |         return self.outputs.add_parameter(output_name, 'outputs', value, data_idx)
123 | 
124 |     def add_file(self, file_name, value=None, data_idx=None):
125 |         return self.files.add_parameter(file_name, 'files', value, data_idx)
126 | 
127 |     def add_resource_usage(self, resource_usage):
128 | 
129 |         with h5py.File(self.task.workflow.loaded_path, 'r+') as handle:
130 | 
131 |             # Load and save attributes of parameter index dict:
132 |             path = self.HDF5_path + "/'resource_usage'"
133 |             attributes = dict(handle[path].attrs)
134 |             del handle[path]
135 | 
136 |             # Dump resource usage:
137 |             hickle.dump(resource_usage, handle, path=path)
138 | 
139 |             # Update dict attributes to maintain /workflow_obj loadability
140 |             for k, v in attributes.items():
141 |                 handle[path].attrs[k] = v
142 | 
143 |     def get_element_dependencies(self, recurse=False):
144 |         """Get the task/element indices of elements that a given element depends on.
145 | 
146 |         Parameters
147 |         ----------
148 |         recurse : bool, optional
149 |             If False, only include task/element indices that are direct dependencies of
150 |             the given element. If True, also include task/element indices that indirect
151 |             dependencies of the given element.
152 | 
153 |         Returns
154 |         -------
155 |         dict of (int : list)
156 |             Dict whose keys are task indices and whose values are lists of element indices
157 |             for a given task.
158 | 
159 |         Notes
160 |         -----
161 |         For the inverse, see `get_dependent_elements`.
162 | 
163 |         """
164 | 
165 |         task = self.task
166 |         workflow = task.workflow
167 |         elem_deps = {}
168 |         for inp_alias, ins in workflow.elements_idx[task.task_idx]['inputs'].items():
169 |             if ins['task_idx'][self.element_idx] is not None:
170 |                 dep_elem_idx = ins['element_idx'][self.element_idx]
171 |                 # (maybe not needed)
172 |                 if ins['task_idx'][self.element_idx] not in elem_deps:
173 |                     elem_deps.update({ins['task_idx'][self.element_idx]: []})
174 |                 elem_deps[ins['task_idx'][self.element_idx]].extend(dep_elem_idx)
175 | 
176 |         if recurse:
177 |             new_elem_deps = copy.deepcopy(elem_deps)
178 |             for task_idx, element_idx in elem_deps.items():
179 |                 for element_idx_i in element_idx:
180 |                     element_i = workflow.tasks[task_idx].elements[element_idx_i]
181 |                     add_elem_deps = element_i.get_element_dependencies(recurse=True)
182 |                     for k, v in add_elem_deps.items():
183 |                         if k not in new_elem_deps:
184 |                             new_elem_deps.update({k: []})
185 |                         new_elem_deps[k].extend(v)
186 | 
187 |             elem_deps = new_elem_deps
188 | 
189 |         # Remove repeats:
190 |         for k, v in elem_deps.items():
191 |             elem_deps[k] = list(set(v))
192 | 
193 |         return elem_deps
194 | 
195 |     def get_dependent_elements(self, recurse=False):
196 |         """Get the task/element indices of elements that depend on a given element.
197 | 
198 |         Parameters
199 |         ----------
200 |         recurse : bool, optional
201 |             If False, only include task/element indices that depend directly on the given
202 |             element. If True, also include task/element indices that depend indirectly on
203 |             the given element.
204 | 
205 |         Returns
206 |         -------
207 |         dict of (int : list)
208 |             Dict whose keys are task indices and whose values are lists of element indices
209 |             for a given task.
210 | 
211 |         Notes
212 |         -----
213 |         For the inverse, see `get_element_dependencies`.
214 | 
215 |         """
216 | 
217 |         task = self.task
218 |         workflow = task.workflow
219 |         dep_elems = {}
220 | 
221 |         for task_idx, elems_idx in enumerate(workflow.elements_idx):
222 |             for inp_alias, ins in elems_idx['inputs'].items():
223 |                 if ins.get('task_idx') == task.task_idx:
224 |                     for element_idx, i in enumerate(ins['element_idx']):
225 |                         if self.element_idx in i:
226 |                             if task_idx not in dep_elems:
227 |                                 dep_elems.update({task_idx: []})
228 |                             dep_elems[task_idx].append(element_idx)
229 | 
230 |         if recurse:
231 |             new_dep_elems = copy.deepcopy(dep_elems)
232 |             for task_idx, element_idx in dep_elems.items():
233 |                 for element_idx_i in element_idx:
234 |                     element_i = workflow.tasks[task_idx].elements[element_idx_i]
235 |                     add_elem_deps = element_i.get_dependent_elements(recurse=True)
236 |                     for k, v in add_elem_deps.items():
237 |                         if k not in new_dep_elems:
238 |                             new_dep_elems.update({k: []})
239 |                         new_dep_elems[k].extend(v)
240 | 
241 |             dep_elems = new_dep_elems
242 | 
243 |         # Remove repeats:
244 |         for k, v in dep_elems.items():
245 |             dep_elems[k] = list(set(v))
246 | 
247 |         return dep_elems
248 | 
249 |     def get_parameter_dependency_value(self, parameter_dependency_name):
250 | 
251 |         workflow = self.task.workflow
252 | 
253 |         in_tasks = workflow.get_input_tasks(parameter_dependency_name)
254 |         out_tasks = workflow.get_output_tasks(parameter_dependency_name)
255 |         elem_deps = self.get_element_dependencies(recurse=True)
256 | 
257 |         if parameter_dependency_name in self.task.schema.input_names:
258 |             param_vals = [self.get_input(parameter_dependency_name)]
259 | 
260 |         elif out_tasks:
261 |             elems = []
262 |             out_tasks_valid = set(out_tasks) & set(elem_deps)
263 |             if not out_tasks_valid:
264 |                 msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of '
265 |                        f'given element of task "{self.task.name}".')
266 |                 raise ValueError(msg)
267 |             for task_idx in out_tasks_valid:
268 |                 for i in elem_deps[task_idx]:
269 |                     elems.append(workflow.tasks[task_idx].elements[i])
270 |             param_vals = [elem.get_output(parameter_dependency_name) for elem in elems]
271 | 
272 |         elif in_tasks:
273 |             elems = []
274 |             in_tasks_valid = set(in_tasks) & set(elem_deps)
275 |             if not in_tasks_valid:
276 |                 msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of '
277 |                        f'given element of task "{self.task.name}".')
278 |                 raise ValueError(msg)
279 |             for task_idx in in_tasks_valid:
280 |                 for i in elem_deps[task_idx]:
281 |                     elems.append(workflow.tasks[task_idx].elements[i])
282 |             param_vals = [elem.get_input(parameter_dependency_name) for elem in elems]
283 |         else:
284 |             msg = (f'Parameter "{parameter_dependency_name}" is not an input or output '
285 |                    f'parameter for any workflow task.')
286 |             raise ValueError(msg)
287 | 
288 |         if len(param_vals) == 1:
289 |             param_vals = param_vals[0]
290 | 
291 |         return param_vals
292 | 
293 |     def get_dependent_parameter_value(self, dependent_parameter_name):
294 | 
295 |         workflow = self.task.workflow
296 | 
297 |         out_tasks = workflow.get_output_tasks(dependent_parameter_name)
298 |         dep_elems = self.get_dependent_elements(recurse=True)
299 | 
300 |         if dependent_parameter_name in self.task.schema.outputs:
301 |             param_vals = [self.get_output(dependent_parameter_name)]
302 | 
303 |         elif out_tasks:
304 |             elems = []
305 |             out_tasks_valid = set(out_tasks) & set(dep_elems)
306 |             if not out_tasks_valid:
307 |                 msg = (f'Parameter "{dependent_parameter_name}" does not depend on the '
308 |                        f'given element of task "{self.task.name}".')
309 |                 raise ValueError(msg)
310 |             for task_idx in out_tasks_valid:
311 |                 for i in dep_elems[task_idx]:
312 |                     elems.append(workflow.tasks[task_idx].elements[i])
313 |             param_vals = [elem.get_output(dependent_parameter_name) for elem in elems]
314 |         else:
315 |             msg = (f'Parameter "{dependent_parameter_name}" is not an output parameter '
316 |                    f'for any workflow task.')
317 |             raise ValueError(msg)
318 | 
319 |         if len(param_vals) == 1:
320 |             param_vals = param_vals[0]
321 | 
322 |         return param_vals
323 | 


--------------------------------------------------------------------------------
/matflow/models/parameters.py:
--------------------------------------------------------------------------------
  1 | """matflow.models.parameters.py"""
  2 | 
  3 | import re
  4 | import keyword
  5 | 
  6 | import h5py
  7 | import hickle
  8 | 
  9 | from matflow.utils import zeropad
 10 | 
 11 | 
 12 | class Parameters(object):
 13 | 
 14 |     """
 15 |     Attributes
 16 |     ----------
 17 |     _element : Element
 18 |     _parameters : dict
 19 |         Dict mapping the safe names of the parameters to their data indices within the
 20 |         HDF5 element_idx group.
 21 |     _name_map : dict
 22 |         Dict mapping the non-safe names of the parameters to their safe names. A safe name
 23 |         refers to a name that can be used as a variable name within Python. For example,
 24 |         spaces and dots are removed from non-safe names to become safe names. The reason
 25 |         for doing this is to allow the use of dot-notation to access element data/files.
 26 | 
 27 |     """
 28 | 
 29 |     def __init__(self, element, parameters):
 30 | 
 31 |         self._element = element
 32 |         self._parameters, self._name_map = self._normalise_params_dict(parameters)
 33 | 
 34 |     def __getattr__(self, safe_name):
 35 |         if safe_name in self._parameters:
 36 |             wkflow = self._element.task.workflow
 37 |             names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()}
 38 |             name = names_inv[safe_name]
 39 |             data_idx = self.get_data_idx(name)
 40 |             return wkflow.get_element_data(data_idx)
 41 |         else:
 42 |             msg = f'{self.__class__.__name__!r} object has no attribute {safe_name!r}.'
 43 |             raise AttributeError(msg)
 44 | 
 45 |     def __setattr__(self, name, value):
 46 |         if name in ['_element', '_parameters', '_name_map']:
 47 |             super().__setattr__(name, value)
 48 |         else:
 49 |             raise AttributeError
 50 | 
 51 |     def __dir__(self):
 52 |         return super().__dir__() + list(self._parameters.keys())
 53 | 
 54 |     def __repr__(self):
 55 |         names_fmt = ', '.join([f'{i!r}' for i in self._parameters.keys()])
 56 |         out = f'{self.__class__.__name__}({names_fmt})'
 57 |         return out
 58 | 
 59 |     def _normalise_params_dict(self, parameters):
 60 | 
 61 |         normed_data_idx = {}
 62 |         name_map = {}
 63 |         for name, v in (parameters or {}).items():
 64 |             safe_name = self._normalise_param_name(name, normed_data_idx.keys())
 65 |             normed_data_idx.update({safe_name: v})
 66 |             name_map.update({name: safe_name})
 67 | 
 68 |         return normed_data_idx, name_map
 69 | 
 70 |     @staticmethod
 71 |     def get_element_data_key(element_idx, param_name):
 72 |         return f'{zeropad(element_idx, 1000)}_{param_name}'
 73 | 
 74 |     @staticmethod
 75 |     def _normalise_param_name(param_name, existing_names):
 76 |         """Transform a string so that it is a valid Python variable name."""
 77 |         param_name_old = param_name
 78 |         safe_name = param_name.replace('.', '_dot_').replace(' ', '_').replace('-', '_')
 79 |         if (
 80 |             re.match(r'\d', safe_name) or
 81 |             safe_name in dir(Parameters) or
 82 |             keyword.iskeyword(safe_name) or
 83 |             safe_name in existing_names
 84 |         ):
 85 |             safe_name = 'param_' + safe_name
 86 | 
 87 |         if re.search(r'[^a-zA-Z0-9_]', safe_name) or not safe_name:
 88 |             raise ValueError(f'Invalid parameter name: "{param_name_old}".')
 89 | 
 90 |         return safe_name
 91 | 
 92 |     def as_dict(self):
 93 |         return self.get_parameters(safe_names=False)
 94 | 
 95 |     def get_parameters(self, safe_names=True):
 96 |         if not safe_names:
 97 |             names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()}
 98 |             return {names_inv[safe_name]: v for safe_name, v in self._parameters.items()}
 99 |         return self._parameters
100 | 
101 |     def get(self, name, safe_name=False):
102 |         if not safe_name:
103 |             name = self._name_map[name]
104 |         return getattr(self, name)
105 | 
106 |     def get_all(self, safe_names=False):
107 |         return {
108 |             k: self.get(k, safe_names)
109 |             for k in (self._parameters if safe_names else self._name_map).keys()
110 |         }
111 | 
112 |     def get_element(self):
113 |         """Not a property to reduce chance of attribute collisions."""
114 |         return self._element
115 | 
116 |     def get_name_map(self):
117 |         """Not a property to reduce chance of attribute collisions."""
118 |         return self._name_map
119 | 
120 |     def get_data_idx(self, name, safe_name=False):
121 |         if not safe_name:
122 |             name = self._name_map[name]
123 |         out = self._parameters[name]
124 |         if isinstance(out, list):
125 |             out = tuple(out)
126 |         return out
127 | 
128 |     def add_parameter(self, name, param_type, value=None, data_idx=None):
129 | 
130 |         if name in self._name_map:
131 |             raise ValueError(f'Parameter "{name}" already exists.')
132 | 
133 |         safe_name = self._normalise_param_name(name, self._parameters.keys())
134 |         loaded_path = self._element.task.workflow.loaded_path
135 | 
136 |         with h5py.File(loaded_path, 'r+') as handle:
137 | 
138 |             if data_idx is None:
139 |                 # Add data to the `element_data` group if required:
140 |                 path = '/element_data'
141 |                 next_idx = len(handle[path])
142 |                 element_data_key = self.get_element_data_key(next_idx, name)
143 |                 new_group = handle[path].create_group(element_data_key)
144 |                 hickle.dump(value, handle, path=new_group.name)
145 |                 data_idx = next_idx
146 | 
147 |             # Load and save attributes of parameter index dict:
148 |             path = self._element.HDF5_path + f"/'{param_type}_data_idx'"
149 |             attributes = dict(handle[path].attrs)
150 |             param_index = hickle.load(handle, path=path)
151 |             del handle[path]
152 | 
153 |             # Update and re-dump parameter index dict:
154 |             param_index.update({name: data_idx})
155 |             hickle.dump(param_index, handle, path=path)
156 | 
157 |             # Update parameter index dict attributes to maintain /workflow_obj loadability
158 |             for k, v in attributes.items():
159 |                 handle[path].attrs[k] = v
160 | 
161 |         self._name_map.update({name: safe_name})
162 |         self._parameters.update({safe_name: data_idx})
163 | 
164 |         return data_idx
165 | 
166 | 
167 | class Files(Parameters):
168 | 
169 |     def get_lines(self, file_name, lines_slice=(1, 10), safe_name=False):
170 | 
171 |         if not safe_name:
172 |             file_name = self.get_name_map()[file_name]
173 | 
174 |         if not isinstance(lines_slice, slice):
175 |             if isinstance(lines_slice, int):
176 |                 lines_slice = (lines_slice,)
177 |             lines_slice = slice(*lines_slice)
178 | 
179 |         return getattr(self, file_name).split('\n')[lines_slice]
180 | 
181 |     def print_lines(self, file_name, lines_slice=(1, 10), safe_name=False):
182 | 
183 |         lns = self.get_lines(file_name, lines_slice, safe_name)
184 |         print('\n'.join(lns))
185 | 


--------------------------------------------------------------------------------
/matflow/models/software.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import socket
  3 | 
  4 | from matflow.errors import SoftwareInstanceError, MissingSoftwareSourcesError
  5 | from matflow.utils import extract_variable_names
  6 | 
  7 | 
  8 | class SourcesPreparation(object):
  9 | 
 10 |     __slots__ = ['_commands', '_env']
 11 | 
 12 |     def __init__(self, commands=None, env=None):
 13 |         self._commands = commands
 14 |         self._env = EnvironmentSpec(env)
 15 | 
 16 |     def __repr__(self):
 17 |         return f'{self.__class__.__name__}(commands={self.commands!r}, env={self.env!r})'
 18 | 
 19 |     def __bool__(self):
 20 |         return True if self.commands else False
 21 | 
 22 |     @property
 23 |     def commands(self):
 24 |         return self._commands
 25 | 
 26 |     def get_formatted_commands(self, source_vars, sources_dir, task_idx):
 27 |         out = [{
 28 |             'line': (f'matflow prepare-sources '
 29 |                      f'--task-idx={task_idx} '
 30 |                      f'--iteration-idx=$ITER_IDX')
 31 |         }]
 32 |         if self.commands:
 33 |             for new_cmd in self.commands.splitlines():
 34 |                 new_cmd = new_cmd.replace('<<sources_dir>>', sources_dir)
 35 |                 for src_var_name, src_name in source_vars.items():
 36 |                     new_cmd = new_cmd.replace(f'<<{src_var_name}>>', src_name)
 37 |                 out.append({'line': new_cmd})
 38 |         return out
 39 | 
 40 |     @property
 41 |     def commands_fmt(self):
 42 |         return [{'line': i} for i in self._commands]
 43 | 
 44 |     @property
 45 |     def env(self):
 46 |         return self._env
 47 | 
 48 |     def as_dict(self):
 49 |         return {'commands': self.commands, 'env': self.env.value}
 50 | 
 51 | 
 52 | class AuxiliaryTaskSpec(object):
 53 | 
 54 |     __slots__ = ['_env']
 55 | 
 56 |     def __init__(self, env=None):
 57 |         self._env = EnvironmentSpec(env)
 58 | 
 59 |     def __repr__(self):
 60 |         return f'{self.__class__.__name__}(env={self.env!r})'
 61 | 
 62 |     @property
 63 |     def env(self):
 64 |         return self._env
 65 | 
 66 |     def as_dict(self):
 67 |         return {'env': self.env.value}
 68 | 
 69 | 
 70 | class EnvironmentSpec(object):
 71 | 
 72 |     __slots__ = ['_value']
 73 | 
 74 |     def __init__(self, value=None):
 75 |         self._value = value
 76 | 
 77 |     def __repr__(self):
 78 |         return f'{self.__class__.__name__}(value={self.value!r})'
 79 | 
 80 |     @property
 81 |     def value(self):
 82 |         return self._value
 83 | 
 84 |     def as_str(self):
 85 |         return self.value or ''
 86 | 
 87 |     def as_list(self):
 88 |         return self.as_str().splitlines()
 89 | 
 90 | 
 91 | class SoftwareInstance(object):
 92 | 
 93 |     __slots__ = [
 94 |         '_machine',
 95 |         '_software_friendly',
 96 |         '_label',
 97 |         '_env',
 98 |         '_cores_min',
 99 |         '_cores_max',
100 |         '_cores_step',
101 |         '_executable',
102 |         '_sources_preparation',
103 |         '_options',
104 |         '_required_scheduler_options',
105 |         '_version_info',
106 |         '_task_preparation',
107 |         '_task_processing',
108 |     ]
109 | 
110 |     def __init__(self, software, label=None, env=None, cores_min=1, cores_max=1,
111 |                  cores_step=1, executable=None, sources_preparation=None, options=None,
112 |                  required_scheduler_options=None, version_info=None,
113 |                  task_preparation=None, task_processing=None):
114 |         """Initialise a SoftwareInstance object.
115 | 
116 |         Parameters
117 |         ----------
118 |         software : str
119 |             Name of the software. This is the name that will be exposed as the `SOFTWARE`
120 |             attribute of a Matflow extension package.
121 |         label : str, optional
122 |             Label used to distinguish software instances for the same `software`. For
123 |             example, this could be a version string.
124 |         env : str, optional
125 |             Multi-line string containing commands to be executed by the shell that are
126 |             necessary to set up the environment for running this software.
127 |         executable : str, optional
128 |             The command that represents the executable for running this software.
129 |         cores_min : int, optional
130 |             Specifies the minimum number (inclusive) of cores this software instance
131 |             supports. By default, 1.
132 |         cores_max : int, optional
133 |             Specifies the maximum number (inclusive) of cores this software instance
134 |             supports. By default, 1.
135 |         cores_step : int, optional
136 |             Specifies the step size from `cores_min` to `cores_max` this software instance
137 |             supports. By default, 1.
138 |         sources_preparation : dict, optional
139 |             Dict containing the following keys:
140 |                 env : str
141 |                     Multi-line string containing commands to be executed by the shell that
142 |                     are necessary to set up the environment for running the preparation
143 |                     commands.
144 |                 commands : str
145 |                     Multi-line string containing commands to be executed within the
146 |                     preparation `environment` that are necessary to prepare the
147 |                     executable. For instance, this might contain commands that compile a
148 |                     source code file into an executable.
149 |         options : list of str, optional
150 |             Additional software options as string labels that this instance supports. This
151 |             can be used to label software instances for which add-ons are loaded.
152 |         required_scheduler_options : dict, optional
153 |             Scheduler options that are required for using this software instance.
154 |         version_info : dict, optional
155 |             If an extension does not provide a `software_version` function, then the
156 |             version info dict must be specified here. The keys are str names and the
157 |             values are dicts that must contain at least a key `version`.
158 |         task_preparation : dict, optional
159 |             Dict containing the following keys:
160 |                 env : str
161 |                     Multi-line string containing commands to be executed by the shell that
162 |                     are necessary to set up the environment for running
163 |                     `matflow prepare-task`.
164 |         task_processing : dict, optional
165 |             Dict containing the following keys:
166 |                 env : str
167 |                     Multi-line string containing commands to be executed by the shell that
168 |                     are necessary to set up the environment for running
169 |                     `matflow process-task`.
170 | 
171 |         """
172 | 
173 |         self._machine = None  # Set once by `set_machine`
174 | 
175 |         self._software_friendly = software
176 |         self._label = label
177 |         self._env = EnvironmentSpec(env)
178 |         self._cores_min = cores_min
179 |         self._cores_max = cores_max
180 |         self._cores_step = cores_step
181 |         self._sources_preparation = SourcesPreparation(**(sources_preparation or {}))
182 |         self._executable = executable
183 |         self._options = options or []
184 |         self._required_scheduler_options = required_scheduler_options or {}
185 |         self._version_info = version_info or None
186 |         self._task_preparation = AuxiliaryTaskSpec(**(task_preparation or {}))
187 |         self._task_processing = AuxiliaryTaskSpec(**(task_processing or {}))
188 | 
189 |         self._validate_num_cores()
190 |         self._validate_version_infos()
191 | 
192 |     def _validate_num_cores(self):
193 |         if self.cores_min < 1:
194 |             raise SoftwareInstanceError('`cores_min` must be greater than 0.')
195 |         if self.cores_min > self.cores_max:
196 |             msg = '`cores_max` must be greater than or equal to `cores_min`.'
197 |             raise SoftwareInstanceError(msg)
198 |         if self.cores_step < 1:
199 |             raise SoftwareInstanceError('`cores_step` must be greater than 0.')
200 | 
201 |     def _validate_version_infos(self):
202 |         if self.version_info:
203 |             REQUIRED = ['version']
204 |             for k, v in self.version_info.items():
205 |                 miss_keys = set(REQUIRED) - set(v.keys())
206 |                 if miss_keys:
207 |                     miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
208 |                     msg = (f'Missing required keys in version info dict for name "{k}" '
209 |                            f'for software definition "{self.software}": {miss_keys_fmt}.')
210 |                     raise SoftwareInstanceError(msg)
211 | 
212 |     def __repr__(self):
213 |         return (
214 |             f'{self.__class__.__name__}('
215 |             f'software={self.software!r}, '
216 |             f'label={self.label!r}, '
217 |             f'cores_range={self.cores_range!r}, '
218 |             f'executable={self.executable!r}, '
219 |             f'version_info={self.version_info!r}'
220 |             f')'
221 |         )
222 | 
223 |     def as_dict(self):
224 |         """Return attributes dict with preceding underscores removed."""
225 |         self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__}
226 |         self_dict['software'] = self_dict.pop('software_friendly')
227 |         self_dict['env'] = self_dict['env'].value
228 |         self_dict['sources_preparation'] = self_dict['sources_preparation'].as_dict()
229 |         self_dict['task_preparation'] = self_dict['task_preparation'].as_dict()
230 |         self_dict['task_processing'] = self_dict['task_processing'].as_dict()
231 |         return self_dict
232 | 
233 |     def validate_source_maps(self, task, method, software, all_sources_maps):
234 |         """Check that any sources required in the preparation commands or executable are
235 |         available in the sources map."""
236 | 
237 |         source_vars = self.source_variables
238 |         if source_vars:
239 |             if (task, method, software) not in all_sources_maps:
240 |                 msg = (f'No extension defines a sources map for the task "{task}" with '
241 |                        f'method "{method}" and software "{software}".')
242 |                 raise MissingSoftwareSourcesError(msg)
243 |             else:
244 |                 sources_map = all_sources_maps[(task, method, software)]
245 | 
246 |             for i in source_vars:
247 |                 if i not in sources_map['sources']:
248 |                     msg = (f'Source variable name "{i}" is not in the sources map for '
249 |                            f'task "{task}" with method "{method}" and software '
250 |                            f'"{software}".')
251 |                     raise MissingSoftwareSourcesError(msg)
252 | 
253 |     @classmethod
254 |     def load_multiple(cls, software_dict=None):
255 |         """Load many SoftwareInstance objects from a dict of software instance
256 |         definitions.
257 | 
258 |         Parameters
259 |         ----------
260 |         software_dict : dict of (str : dict)
261 |             Keys are software names and values are dicts with the following keys:
262 |                 instances : list of dict
263 |                     Each element is a dict
264 |                 instance_defaults : dict, optional
265 |                     Default values to apply to each dict in the `instances` list.
266 | 
267 |         Returns
268 |         -------
269 |         all_instances : dict of (str : list of SoftwareInstance)
270 | 
271 |         """
272 | 
273 |         software_dict = software_dict or {}
274 |         REQUIRED = ['instances']
275 |         ALLOWED = REQUIRED + ['instance_defaults']
276 | 
277 |         INST_REQUIRED = ['num_cores']
278 |         INST_DICT_KEYS = [
279 |             'required_scheduler_options',
280 |             'sources_preparation',
281 |         ]
282 |         INST_ALLOWED = INST_REQUIRED + INST_DICT_KEYS + [
283 |             'label',
284 |             'options',
285 |             'env',
286 |             'executable',
287 |             'version_info',
288 |             'task_preparation',
289 |             'task_processing',
290 |         ]
291 | 
292 |         all_instances = {}
293 |         for name, definition in software_dict.items():
294 | 
295 |             name_friendly = name
296 |             name = SoftwareInstance.get_software_safe(name)
297 | 
298 |             bad_keys = set(definition.keys()) - set(ALLOWED)
299 |             miss_keys = set(REQUIRED) - set(definition.keys())
300 |             if bad_keys:
301 |                 bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
302 |                 msg = (f'Unknown keys in software instance definitions for software '
303 |                        f'"{name}": {bad_keys_fmt}.')
304 |                 raise SoftwareInstanceError(msg)
305 |             if miss_keys:
306 |                 miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
307 |                 msg = (f'Software instance definitions for software "{name}" are missing '
308 |                        f'keys: {miss_keys_fmt}.')
309 |                 raise SoftwareInstanceError(msg)
310 | 
311 |             # Merge instance defaults with instance definition:
312 |             inst_defs = definition.get('instance_defaults', {})
313 |             all_name_instances = []
314 |             for inst in definition['instances']:
315 | 
316 |                 inst = dict(inst)
317 |                 inst_merged = dict(copy.deepcopy(inst_defs))
318 | 
319 |                 for key, val in inst.items():
320 |                     if key not in INST_DICT_KEYS:
321 |                         inst_merged.update({key: val})
322 | 
323 |                 # Merge values of any `INST_DICT_KEYS` individually.
324 |                 for key in INST_DICT_KEYS:
325 |                     if key in inst:
326 |                         if key not in inst_merged:
327 |                             inst_merged.update({key: {}})
328 |                         for subkey in inst[key]:
329 |                             inst_merged[key].update({subkey: inst[key][subkey]})
330 | 
331 |                 bad_keys = set(inst_merged.keys()) - set(INST_ALLOWED)
332 |                 miss_keys = set(INST_REQUIRED) - set(inst_merged.keys())
333 | 
334 |                 if bad_keys:
335 |                     bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
336 |                     msg = (f'Unknown keys in software instance definitions for software '
337 |                            f'"{name}": {bad_keys_fmt}.')
338 |                     raise SoftwareInstanceError(msg)
339 |                 if miss_keys:
340 |                     miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
341 |                     msg = (f'Software instance definitions for software "{name}" are '
342 |                            f'missing keys: {miss_keys_fmt}.')
343 |                     raise SoftwareInstanceError(msg)
344 | 
345 |                 inst_merged['software'] = name_friendly
346 |                 num_cores = inst_merged.pop('num_cores', None)
347 |                 cores_min = 1
348 |                 cores_max = 1
349 |                 cores_step = 1
350 |                 if num_cores is not None:
351 |                     if isinstance(num_cores, (list, tuple)):
352 |                         if len(num_cores) == 2:
353 |                             cores_min, cores_max = num_cores
354 |                         elif len(num_cores) == 3:
355 |                             cores_min, cores_max, cores_step = num_cores
356 |                         else:
357 |                             msg = (f'`num_cores` value not understood in software '
358 |                                    f'instance definition for software "{name}".')
359 |                             raise SoftwareInstanceError(msg)
360 |                     else:
361 |                         cores_min = num_cores
362 |                         cores_max = num_cores
363 |                         cores_step = num_cores
364 | 
365 |                 inst_merged.update({
366 |                     'cores_min': cores_min,
367 |                     'cores_max': cores_max,
368 |                     'cores_step': cores_step,
369 |                 })
370 | 
371 |                 soft_inst = cls(**inst_merged)
372 |                 soft_inst.set_machine()
373 |                 all_name_instances.append(soft_inst)
374 | 
375 |             all_instances.update({name: all_name_instances})
376 | 
377 |         return all_instances
378 | 
379 |     @property
380 |     def requires_sources(self):
381 |         if (
382 |             (
383 |                 self.sources_preparation and
384 |                 '<<sources_dir>>' in self.sources_preparation.commands
385 |             ) or
386 |             (self.executable and '<<sources_dir>>' in self.executable)
387 |         ):
388 |             return True
389 |         else:
390 |             return False
391 | 
392 |     @property
393 |     def source_variables(self):
394 |         if not self.requires_sources:
395 |             return []
396 |         else:
397 |             source_vars = []
398 |             if self.sources_preparation:
399 |                 source_vars += extract_variable_names(
400 |                     self.sources_preparation.commands,
401 |                     ['<<', '>>']
402 |                 )
403 |             if self.executable:
404 |                 source_vars += extract_variable_names(self.executable, ['<<', '>>'])
405 | 
406 |             return list(set(source_vars) - set(['sources_dir']))
407 | 
408 |     @property
409 |     def software(self):
410 |         return self.get_software_safe(self.software_friendly)
411 | 
412 |     @staticmethod
413 |     def get_software_safe(software_name):
414 |         return software_name.lower().replace(' ', '_')
415 | 
416 |     @property
417 |     def software_friendly(self):
418 |         return self._software_friendly
419 | 
420 |     @property
421 |     def label(self):
422 |         return self._label
423 | 
424 |     @property
425 |     def env(self):
426 |         return self._env
427 | 
428 |     @property
429 |     def task_preparation(self):
430 |         return self._task_preparation
431 | 
432 |     @property
433 |     def task_processing(self):
434 |         return self._task_processing
435 | 
436 |     @property
437 |     def cores_min(self):
438 |         return self._cores_min
439 | 
440 |     @property
441 |     def cores_max(self):
442 |         return self._cores_max
443 | 
444 |     @property
445 |     def cores_step(self):
446 |         return self._cores_step
447 | 
448 |     @property
449 |     def cores_range(self):
450 |         return range(self.cores_min, self.cores_max + 1, self.cores_step)
451 | 
452 |     @property
453 |     def sources_preparation(self):
454 |         return self._sources_preparation
455 | 
456 |     @property
457 |     def executable(self):
458 |         return self._executable
459 | 
460 |     @property
461 |     def options(self):
462 |         return self._options
463 | 
464 |     @property
465 |     def required_scheduler_options(self):
466 |         return self._required_scheduler_options
467 | 
468 |     @property
469 |     def version_info(self):
470 |         return self._version_info
471 | 
472 |     @property
473 |     def machine(self):
474 |         return self._machine
475 | 
476 |     @machine.setter
477 |     def machine(self, machine):
478 |         if self._machine:
479 |             raise ValueError('`machine` is already set.')
480 |         self._machine = machine
481 | 
482 |     def set_machine(self):
483 |         self.machine = socket.gethostname()
484 | 


--------------------------------------------------------------------------------
/matflow/profile.py:
--------------------------------------------------------------------------------
 1 | """`matflow.profile.py`"""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | from ruamel import yaml
 6 | 
 7 | from matflow.errors import ProfileError
 8 | from matflow.config import Config
 9 | 
10 | 
11 | def parse_workflow_profile(profile_path):
12 | 
13 |     with Path(profile_path).open() as handle:
14 |         profile = yaml.safe_load(handle)
15 | 
16 |     req_keys = ['name', 'tasks']
17 |     task_globals = ['run_options', 'stats']
18 |     good_keys = req_keys + task_globals + [
19 |         'extends',
20 |         'archive',
21 |         'archives',
22 |         'archive_excludes',
23 |         'figures',
24 |         'metadata',
25 |         'num_iterations',
26 |         'iterate',
27 |         'import',
28 |         'import_list',  # equivalent to 'import'; provides a Python-code-safe variant.
29 |     ]
30 | 
31 |     miss_keys = list(set(req_keys) - set(profile.keys()))
32 |     bad_keys = list(set(profile.keys()) - set(good_keys))
33 | 
34 |     if miss_keys:
35 |         miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
36 |         raise ProfileError(f'Missing keys in profile: {miss_keys_fmt}.')
37 |     if bad_keys:
38 |         bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
39 |         raise ProfileError(f'Unknown keys in profile: {bad_keys_fmt}.')
40 | 
41 |     if 'import' in profile and 'import_list' in profile:
42 |         raise ProfileError(f'Specify exactly one of `import` and `import_list`. '
43 |                            f'These options are functionally equivalent.')
44 | 
45 |     if 'archive' in profile and 'archives' in profile:
46 |         raise ValueError('Specify either `archive` or `archives` but not both. For '
47 |                          'either case, valid values are a string or list of strings.')
48 |     elif 'archive' in profile:
49 |         profile['archives'] = profile.pop('archive')
50 |     elif 'archives' not in profile:
51 |         profile['archives'] = []
52 | 
53 |     if isinstance(profile['archives'], str):
54 |         profile['archives'] = [profile['archives']]
55 | 
56 |     for i in task_globals:
57 |         if i in profile:
58 |             # Add to each task if it has none:
59 |             for idx, task in enumerate(profile['tasks']):
60 |                 if i not in task:
61 |                     profile['tasks'][idx][i] = profile[i]
62 | 
63 |     workflow_dict = {
64 |         'name': profile['name'],
65 |         'tasks': profile['tasks'],
66 |         'archives': profile['archives'],
67 |         'figures': profile.get('figures'),
68 |         'metadata': {**Config.get('default_metadata'), **profile.get('metadata', {})},
69 |         'num_iterations': profile.get('num_iterations'),
70 |         'iterate': profile.get('iterate'),
71 |         'extends': profile.get('extends'),
72 |         'archive_excludes': profile.get('archive_excludes'),
73 |         'import_list': profile.get('import') or profile.get('import_list'),
74 |     }
75 | 
76 |     return workflow_dict
77 | 


--------------------------------------------------------------------------------
/matflow/scripting.py:
--------------------------------------------------------------------------------
  1 | """Module containing functionality for generating Python scripts as task sources."""
  2 | 
  3 | import re
  4 | from textwrap import dedent
  5 | 
  6 | import black
  7 | import autopep8
  8 | 
  9 | from pkg_resources import resource_string
 10 | 
 11 | 
 12 | def main_func(func):
 13 |     """Decorator used to annotate which function within a snippet is the main function."""
 14 |     def main_inner(*args, **kwargs):
 15 |         func(*args, **kwargs)
 16 |     return main_inner
 17 | 
 18 | 
 19 | def get_snippet(package_name, snippet_name, decorator=True):
 20 |     """Get a Python snippet function (as a string) from the snippets directory."""
 21 |     out = resource_string(package_name, f'snippets/{snippet_name}').decode()
 22 |     if not decorator:
 23 |         # Remove the `@main_func` decorator and import.
 24 |         remove_lns = ['from matflow.scripting import main_func', '@main_func']
 25 |         for i in remove_lns:
 26 |             out = ''.join(out.split(i))
 27 |     return out
 28 | 
 29 | 
 30 | def parse_python_func_return(func_str):
 31 |     """Get a list of the variable names in a Python function return statement.
 32 | 
 33 |     The return statement may return a tuple (with parenthesis or not) or a single variable.
 34 | 
 35 |     """
 36 | 
 37 |     out = []
 38 |     match = re.search(r'return \(*([\S\s][^\)]+)\)*', func_str)
 39 |     if match:
 40 |         match_clean = match.group(1).strip().strip(',')
 41 |         out = [i.strip() for i in match_clean.split(',')]
 42 | 
 43 |     return out
 44 | 
 45 | 
 46 | def parse_python_func_imports(func_str):
 47 |     """Get a list of import statement lines from a (string) Python function."""
 48 | 
 49 |     import_lines = func_str.split('def ')[0].strip()
 50 |     match = re.search(r'((?:import|from)[\S\s]*)', import_lines)
 51 |     out = []
 52 |     if match:
 53 |         out = match.group(1).splitlines()
 54 | 
 55 |     return out
 56 | 
 57 | 
 58 | def extract_snippet_main(snippet_str):
 59 |     """Extract only the snippet main function (plus imports), as annotated by the
 60 |     `@mainfunc` decorator."""
 61 | 
 62 |     func_start_pat = r'((?:@main_func\n)?def\s(?:.*)\((?:[\s\S]*?)\):)'
 63 | 
 64 |     func_split_snip = re.split(func_start_pat, snippet_str)
 65 |     imports = func_split_snip[0]
 66 |     main_func_dec_str = '@main_func'
 67 | 
 68 |     main_func_str = None
 69 |     for idx in range(1, len(func_split_snip[1:]), 2):
 70 |         func_str = func_split_snip[idx] + func_split_snip[idx + 1]
 71 |         if main_func_dec_str in func_str:
 72 |             if main_func_str:
 73 |                 msg = (f'`{main_func_dec_str}` should decorate only one function within '
 74 |                        f'the snippet.')
 75 |                 raise ValueError(msg)
 76 |             else:
 77 |                 main_func_str = func_str.lstrip(f'{main_func_dec_str}\n')
 78 | 
 79 |     imports = ''.join(imports.split('from matflow_defdap import main_func'))
 80 | 
 81 |     return imports + '\n' + main_func_str
 82 | 
 83 | 
 84 | def get_snippet_signature(package_name, script_name):
 85 |     """Get imports, inputs and outputs of a Python snippet function."""
 86 | 
 87 |     snippet_str = get_snippet(package_name, script_name)
 88 |     snippet_str = extract_snippet_main(snippet_str)
 89 | 
 90 |     def_line = re.search(r'def\s(.*)\(([\s\S]*?)\):', snippet_str).groups()
 91 |     func_name = def_line[0]
 92 |     func_ins = [i.strip() for i in def_line[1].split(',') if i.strip()]
 93 | 
 94 |     if script_name != func_name + '.py':
 95 |         msg = ('For simplicity, the snippet main function name should be the same as the '
 96 |                'snippet file name.')
 97 |         raise ValueError(msg)
 98 | 
 99 |     func_outs = parse_python_func_return(snippet_str)
100 |     func_imports = parse_python_func_imports(snippet_str)
101 | 
102 |     out = {
103 |         'name': func_name,
104 |         'imports': func_imports,
105 |         'inputs': func_ins,
106 |         'outputs': func_outs,
107 |     }
108 |     return out
109 | 
110 | 
111 | def get_snippet_call(package_name, script_name):
112 |     sig = get_snippet_signature(package_name, script_name)
113 |     outs_fmt = ', '.join(sig['outputs'])
114 |     ins_fmt = ', '.join(sig['inputs'])
115 |     ret = f'{sig["name"]}({ins_fmt})'
116 |     if outs_fmt:
117 |         ret = f'{outs_fmt} = {ret}'
118 |     return ret
119 | 
120 | 
121 | def get_wrapper_script(package_name, script_name, snippets, outputs):
122 | 
123 |     ind = '    '
124 |     sigs = [get_snippet_signature(package_name, i['name']) for i in snippets]
125 |     all_ins = [j for i in sigs for j in i['inputs']]
126 |     all_outs = [j for i in sigs for j in i['outputs']]
127 | 
128 |     print(f'all_ins: {all_ins}')
129 |     print(f'all_outs: {all_outs}')
130 | 
131 |     for i in outputs:
132 |         if i not in all_outs:
133 |             raise ValueError(f'Cannot output "{i}". No functions return this name.')
134 | 
135 |     # Required inputs are those that are not output by any snippet
136 |     req_ins = list(set(all_ins) - set(all_outs))
137 |     req_ins_fmt = ', '.join(req_ins)
138 | 
139 |     print(f'req_ins: {req_ins}')
140 |     print(f'req_ins_fmt: {req_ins_fmt}')
141 | 
142 |     main_sig = [f'def main({req_ins_fmt}):']
143 |     main_body = [ind + get_snippet_call(package_name, i['name']) for i in snippets]
144 |     main_outs = ['\n' + ind + f'return {", ".join([i for i in outputs])}']
145 |     main_func = main_sig + main_body + main_outs
146 | 
147 |     req_imports = [
148 |         'import sys',
149 |         'import hickle',
150 |         'from pathlib import Path',
151 |     ]
152 |     out = req_imports
153 |     out += main_func
154 |     snippet_funcs = '\n'.join([get_snippet(package_name, i['name'], decorator=False)
155 |                                for i in snippets])
156 | 
157 |     out = '\n'.join(out) + '\n' + snippet_funcs + '\n'
158 |     out += dedent('''\
159 |         if __name__ == '__main__':        
160 |             inputs = hickle.load(sys.argv[1])
161 |             outputs = main(**inputs)
162 |             hickle.dump(outputs, 'outputs.hdf5')
163 | 
164 |     ''')
165 | 
166 |     print(f'out 1: \n----------\n{out}\n----------\n')
167 |     out = autopep8.fix_code(out)
168 |     print(f'out 2: \n----------\n{out}\n----------\n')
169 |     out = black.format_str(out, mode=black.FileMode())
170 |     print(f'out 3: \n----------\n{out}\n----------\n')
171 | 
172 |     return out
173 | 


--------------------------------------------------------------------------------
/matflow/utils.py:
--------------------------------------------------------------------------------
  1 | """`matflow.utils.py`"""
  2 | 
  3 | import os
  4 | import sys
  5 | import io
  6 | import collections
  7 | import copy
  8 | import itertools
  9 | import h5py
 10 | import numpy as np
 11 | import random
 12 | import re
 13 | import time
 14 | from contextlib import redirect_stdout, contextmanager
 15 | from datetime import datetime
 16 | from pathlib import Path
 17 | 
 18 | from ruamel.yaml import YAML
 19 | 
 20 | 
 21 | def parse_times(format_str):
 22 |     """Parse a string which contain time format code and one or
 23 |     more `%%r` to represent a random digit from 0 to 9."""
 24 | 
 25 |     time_parsed = time.strftime(format_str)
 26 |     rnd_all = ''
 27 |     while '%r' in time_parsed:
 28 |         rnd = str(random.randint(0, 9))
 29 |         rnd_all += rnd
 30 |         time_parsed = time_parsed.replace('%r', rnd, 1)
 31 | 
 32 |     return time_parsed, rnd_all
 33 | 
 34 | 
 35 | def zeropad(num, largest):
 36 |     """Return a zero-padded string of a number, given the largest number.
 37 | 
 38 |     TODO: want to support floating-point numbers as well? Or rename function
 39 |     accordingly.
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     num : int
 44 |         The number to be formatted with zeros padding on the left.
 45 |     largest : int
 46 |         The number that determines the number of zeros to pad with.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     padded : str
 51 |         The original number, `num`, formatted as a string with zeros added
 52 |         on the left.
 53 | 
 54 |     """
 55 | 
 56 |     num_digits = len('{:.0f}'.format(largest))
 57 |     padded = '{0:0{width}}'.format(num, width=num_digits)
 58 | 
 59 |     return padded
 60 | 
 61 | 
 62 | def combine_list_of_dicts(a):
 63 | 
 64 |     a = copy.deepcopy(a)
 65 | 
 66 |     for i in range(1, len(a)):
 67 |         update_dict(a[0], a[i])
 68 | 
 69 |     return a[0]
 70 | 
 71 | 
 72 | def update_dict(base, upd):
 73 |     """Update an arbitrarily-nested dict."""
 74 | 
 75 |     for key, val in upd.items():
 76 |         if isinstance(base, collections.Mapping):
 77 |             if isinstance(val, collections.Mapping):
 78 |                 r = update_dict(base.get(key, {}), val)
 79 |                 base[key] = r
 80 |             else:
 81 |                 base[key] = upd[key]
 82 |         else:
 83 |             base = {key: upd[key]}
 84 | 
 85 |     return base
 86 | 
 87 | 
 88 | def nest_lists(my_list):
 89 |     """
 90 |         `a` is a list of `N` sublists.
 91 | 
 92 |         E.g.
 93 |         my_list = [
 94 |             [1,2],
 95 |             [3,4,5],
 96 |             [6,7]
 97 |         ]
 98 | 
 99 |         returns a list of lists of length `N` such that all combinations of elements from sublists in
100 |         `a` are found
101 |         E.g
102 |         out = [
103 |             [1, 3, 6],
104 |             [1, 3, 7],
105 |             [1, 4, 6],
106 |             [1, 4, 7],
107 |             [1, 5, 6],
108 |             [1, 5, 7],
109 |             [2, 3, 6],
110 |             [2, 3, 7],
111 |             [2, 4, 6],
112 |             [2, 4, 7],
113 |             [2, 5, 6],
114 |             [2, 5, 7]
115 |         ]
116 | 
117 |     """
118 | 
119 |     N = len(my_list)
120 |     sub_len = [len(i) for i in my_list]
121 | 
122 |     products = np.array([1] * (N + 1))
123 |     for i in range(len(my_list) - 1, -1, -1):
124 |         products[:i + 1] *= len(my_list[i])
125 | 
126 |     out = [[None for x in range(N)] for y in range(products[0])]
127 | 
128 |     for row_idx, row in enumerate(out):
129 | 
130 |         for col_idx, col in enumerate(row):
131 | 
132 |             num_repeats = products[col_idx + 1]
133 |             sub_list_idx = int(row_idx / num_repeats) % len(my_list[col_idx])
134 |             out[row_idx][col_idx] = copy.deepcopy(
135 |                 my_list[col_idx][sub_list_idx])
136 | 
137 |     return out
138 | 
139 | 
140 | def repeat(lst, reps):
141 |     """Repeat 1D list elements."""
142 |     return list(itertools.chain.from_iterable(itertools.repeat(x, reps) for x in lst))
143 | 
144 | 
145 | def tile(lst, tiles):
146 |     """Tile a 1D list."""
147 |     return lst * tiles
148 | 
149 | 
150 | def index(lst, idx):
151 |     """Get elements of a list."""
152 |     return [lst[i] for i in idx]
153 | 
154 | 
155 | def arange(size):
156 |     """Get 1D list of increasing integers."""
157 |     return list(range(size))
158 | 
159 | 
160 | def extend_index_list(lst, repeats):
161 |     """Extend an integer index list by repeating some number of times such that the extra
162 |     indices added are new and follow the same ordering as the existing elements.
163 | 
164 |     Parameters
165 |     ----------
166 |     lst : list of int
167 |     repeats : int
168 | 
169 |     Returns
170 |     -------
171 |     new_idx : list of int
172 |         Returned list has length `len(lst) * repeats`.
173 | 
174 |     Examples
175 |     --------
176 |     >>> extend_index_list([0, 1, 2], 2)
177 |     [0, 1, 2, 3, 4, 5]
178 | 
179 |     >>> extend_index_list([0, 0, 1, 1], 3)
180 |     [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]
181 | 
182 |     >>> extend_index_list([4, 1, 2], 2)
183 |     [4, 1, 2, 8, 5, 6]
184 | 
185 |     """
186 | 
187 |     new_idx = []
188 |     for i in lst:
189 |         if i < 0:
190 |             raise ValueError('List elements must be positive or zero.')
191 |         new_idx.append(i)
192 | 
193 |     for _ in range(repeats - 1):
194 |         next_avail_idx = max(new_idx) + 1
195 |         new_idx.extend([next_avail_idx + i - min(lst) for i in lst])
196 | 
197 |     return new_idx
198 | 
199 | 
200 | def flatten_list(lst):
201 |     """Flatten a list of lists.
202 | 
203 |     Parameters
204 |     ----------
205 |     lst : list of list
206 | 
207 |     Returns
208 |     -------
209 |     list 
210 | 
211 |     Examples
212 |     --------
213 |     >>> flatten_list([[0, 2, 4], [9, 1]])
214 |     [0, 2, 4, 9, 1]
215 | 
216 |     """
217 |     return [j for i in lst for j in i]
218 | 
219 | 
220 | def to_sub_list(lst, sub_list_len):
221 |     """Transform a list into a list of sub lists of certain size.
222 | 
223 |     Parameters
224 |     ----------
225 |     lst : list
226 |         List to transform into a list of sub-lists.
227 |     sub_list_len : int
228 |         Size of sub-lists. Must be an integer factor of the length of the
229 |         original list, `lst`.
230 | 
231 |     Returns
232 |     -------
233 |     list of list
234 | 
235 |     Examples
236 |     --------
237 |     >>> to_sub_list([0, 1, 2, 3], 2)
238 |     [[0, 1], [2, 3]]
239 | 
240 |     """
241 | 
242 |     if (sub_list_len <= 0) or (len(lst) % sub_list_len != 0):
243 |         raise ValueError('`sub_list_len` must be a positive factor of `len(lst)`.')
244 |     out = [lst[(i * sub_list_len):((i * sub_list_len) + sub_list_len)]
245 |            for i in range(len(lst) // sub_list_len)]
246 |     return out
247 | 
248 | 
249 | def datetime_to_dict(dt):
250 |     return {
251 |         'year': dt.year,
252 |         'month': dt.month,
253 |         'day': dt.day,
254 |         'hour': dt.hour,
255 |         'minute': dt.minute,
256 |         'second': dt.second,
257 |         'microsecond': dt.microsecond,
258 |     }
259 | 
260 | 
261 | def dump_to_yaml_string(data):
262 |     yaml = YAML()
263 |     yaml.indent(mapping=2, sequence=4, offset=2)
264 |     with redirect_stdout(io.StringIO()) as buffer:
265 |         yaml.dump(data, sys.stdout)
266 |         output = buffer.getvalue()
267 |     return output
268 | 
269 | 
270 | def get_specifier_dict(key, name_key=None, base_key=None, defaults=None,
271 |                        list_specifiers=None, cast_types=None):
272 |     """Resolve a string key with additional specifiers using square-brackets into a dict.
273 | 
274 |     Parameters
275 |     ----------
276 |     key : str or dict
277 |     name_key : str
278 |     base_key : str
279 |     defaults : dict
280 |     list_specifiers : list of str
281 |         Any specifier in this list will be added to the returned dict as a list element.
282 |     cast_types : dict
283 |         Dict of (key: type) to cast those keys' values to.
284 | 
285 |     Returns
286 |     -------
287 |     dict
288 | 
289 |     Examples
290 |     --------
291 |     >>> get_specifier_dict(
292 |         'parameter_1[hey, label_2=hi]',        
293 |         name_key='param_name',
294 |         base_key='label_1',
295 |         defaults={'a': 1},
296 |     )
297 |     {
298 |         'param_name': 'parameter_1',
299 |         'label_1': 'hey'
300 |         'label_2': 'hi',
301 |         'a': 1,
302 |     }
303 | 
304 |     """
305 | 
306 |     list_specifiers = list_specifiers or []
307 |     cast_types = cast_types or {}
308 |     out = {}
309 | 
310 |     if isinstance(key, str):
311 | 
312 |         if name_key is None:
313 |             raise TypeError('`name_key` must be specified.')
314 | 
315 |         match = re.search(r'([\w\-\s]+)(\[(.*?)\])*', key)
316 |         name = match.group(1)
317 |         out.update({name_key: name})
318 | 
319 |         specifiers_str = match.group(3)
320 |         if specifiers_str:
321 |             base_keys = []
322 |             for s in specifiers_str.split(','):
323 |                 if not s:
324 |                     continue
325 |                 if '=' in s:
326 |                     s_key, s_val = [i.strip() for i in s.split('=')]
327 |                     if s_key in list_specifiers:
328 |                         if s_key in out:
329 |                             out[s_key].append(s_val)
330 |                         else:
331 |                             out[s_key] = [s_val]
332 |                     else:
333 |                         if s_key in out:
334 |                             raise ValueError(
335 |                                 f'Specifier "{s_key}" multiply defined. Add this '
336 |                                 f'specifier to `list_specifiers` to add multiple values '
337 |                                 f'to the returned dict (in a list).'
338 |                             )
339 |                         out.update({s_key: s_val})
340 |                 else:
341 |                     base_keys.append(s.strip())
342 | 
343 |             if len(base_keys) > 1:
344 |                 raise ValueError('Only one specifier may be specified without a key.')
345 | 
346 |             if base_keys:
347 |                 if base_key is None:
348 |                     raise ValueError('Base key found but `base_key` name not specified.')
349 |                 out.update({base_key: base_keys[0]})
350 | 
351 |     elif isinstance(key, dict):
352 |         out.update(key)
353 | 
354 |     else:
355 |         raise TypeError('`key` must be a dict or str to allow specifiers to be resolved.')
356 | 
357 |     for k, v in (defaults or {}).items():
358 |         if k not in out:
359 |             out[k] = copy.deepcopy(v)
360 | 
361 |     for key, cast_type in cast_types.items():
362 |         if key in out:
363 |             if cast_type is bool:
364 |                 new_val = cast_bool(out[key])
365 |             else:
366 |                 new_val = cast_type(out[key])
367 |             out[key] = new_val
368 | 
369 |     return out
370 | 
371 | 
372 | def extract_variable_names(source_str, delimiters):
373 |     """Given a specified syntax for embedding variable names within a string,
374 |     extract all variable names.
375 | 
376 |     Parameters
377 |     ----------
378 |     source_str : str
379 |         The string within which to search for variable names.
380 |     delimiters : two-tuple of str
381 |         The left and right delimiters of a variable name.
382 | 
383 |     Returns
384 |     -------
385 |     var_names : list of str
386 |         The variable names embedded in the original string.   
387 | 
388 |     """
389 | 
390 |     delim_esc = [re.escape(i) for i in delimiters]
391 |     pattern = delim_esc[0] + r'(.\S+?)' + delim_esc[1]
392 |     var_names = re.findall(pattern, source_str)
393 | 
394 |     return var_names
395 | 
396 | 
397 | def get_nested_item(obj, address):
398 |     out = obj
399 |     for i in address:
400 |         out = out[i]
401 |     return out
402 | 
403 | 
404 | def get_workflow_paths(base_dir, quiet=True):
405 |     base_dir = Path(base_dir)
406 |     wkflows = []
407 |     for i in base_dir.glob('**/*'):
408 |         if i.name == 'workflow.hdf5':
409 |             wk_full_path = i
410 |             wk_rel_path = wk_full_path.relative_to(base_dir)
411 |             wk_disp_path = wk_rel_path.parent
412 |             with h5py.File(wk_full_path, 'r') as handle:
413 |                 try:
414 |                     try:
415 |                         handle["/workflow_obj/data/'figures'"]
416 |                     except KeyError:
417 |                         if not quiet:
418 |                             print(f'No "figures" key for workflow: {wk_disp_path}.')
419 |                         continue
420 |                     timestamp_path = "/workflow_obj/data/'history'/data/data_0/'timestamp'/data"
421 |                     timestamp_dict = {k[1:-1]: v['data'][()]
422 |                                       for k, v in handle[timestamp_path].items()}
423 |                     timestamp = datetime(**timestamp_dict)
424 |                     wkflows.append({
425 |                         'ID': handle.attrs['workflow_id'],
426 |                         'full_path': str(wk_full_path),
427 |                         'display_path': str(wk_disp_path),
428 |                         'timestamp': timestamp,
429 |                         'display_timestamp': timestamp.strftime(r'%Y-%m-%d %H:%M:%S'),
430 |                     })
431 |                 except:
432 |                     if not quiet:
433 |                         print(f'No timestamp for workflow: {wk_disp_path}')
434 |     return wkflows
435 | 
436 | 
437 | def order_workflow_paths_by_date(workflow_paths):
438 |     return sorted(workflow_paths, key=lambda x: x['timestamp'])
439 | 
440 | 
441 | def nested_dict_arrays_to_list(obj):
442 |     if isinstance(obj, np.ndarray):
443 |         obj = obj.tolist()
444 |     elif isinstance(obj, dict):
445 |         for key, val in obj.items():
446 |             obj[key] = nested_dict_arrays_to_list(val)
447 |     return obj
448 | 
449 | 
450 | def move_element_forward(lst, index, position, return_map=True):
451 |     """Move a list element forward in the list to a new index position."""
452 | 
453 |     if index > position:
454 |         raise ValueError('`index` cannot be larger than `position`, since that would '
455 |                          'not be a "forward" move!')
456 | 
457 |     if position > len(lst) - 1:
458 |         raise ValueError('`position` must be a valid list index.')
459 | 
460 |     sub_list_1 = lst[:position + 1]
461 |     sub_list_2 = lst[position + 1:]
462 |     elem = sub_list_1.pop(index)
463 |     out = sub_list_1 + [elem] + sub_list_2
464 | 
465 |     # Indices to the left of the element that is to be moved do not change:
466 |     idx_map_left = {i: i for i in range(0, index)}
467 | 
468 |     # The index of the moved element changes to `position`
469 |     idx_map_element = {index: position}
470 | 
471 |     # Indicies to the right of the element up to the new position are decremented:
472 |     idx_map_middle = {i: i - 1 for i in range(index + 1, position + 1)}
473 | 
474 |     # Indices to the right of the new position do not change:
475 |     idx_map_right = {i: i for i in range(position + 1, len(lst))}
476 | 
477 |     idx_map = {
478 |         **idx_map_left,
479 |         **idx_map_element,
480 |         **idx_map_middle,
481 |         **idx_map_right
482 |     }
483 | 
484 |     if return_map:
485 |         return out, idx_map
486 |     else:
487 |         return out
488 | 
489 | 
490 | def cast_bool(bool_str):
491 |     if isinstance(bool_str, bool):
492 |         return bool_str
493 |     elif bool_str.lower() == 'true':
494 |         return True
495 |     elif bool_str.lower() == 'false':
496 |         return False
497 |     else:
498 |         raise ValueError(f'"{bool_str}" cannot be cast to True or False.')
499 | 
500 | 
501 | @contextmanager
502 | def working_directory(path):
503 |     """Change to a working directory and return to previous working directory on exit."""
504 |     prev_cwd = Path.cwd()
505 |     os.chdir(path)
506 |     try:
507 |         yield
508 |     finally:
509 |         os.chdir(prev_cwd)
510 | 


--------------------------------------------------------------------------------
/matflow/validation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import inspect
  3 | 
  4 | from matflow.errors import UnsatisfiedSchemaError
  5 | 
  6 | 
  7 | def validate_input_mapper_func(func, task_inputs):
  8 |     """Using `inspect`, validate an input mapper callable from a Matflow extension.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     func : callable
 13 |     task_inputs : list of str
 14 |         List of the input name aliases associated with the task schema.
 15 | 
 16 |     Notes
 17 |     -----
 18 |     Checks performed on `func`:
 19 |       - check the first argument is named "path"; raise `TypeError` if not;
 20 |       - check for one or more additional arguments which are named according to
 21 |         a subset of task parameters (passed in `task_inputs`).
 22 | 
 23 |     """
 24 | 
 25 |     func_params = inspect.signature(func).parameters
 26 | 
 27 |     # Check first argument must be "path":
 28 |     first_arg = list(func_params.items())[0]
 29 |     if first_arg[0] != 'path':
 30 |         msg = (f'The first parameter of an input mapper function must be "path" '
 31 |                f'but for {func.__name__} is actually "{first_arg[0]}".')
 32 |         raise TypeError(msg)
 33 |     else:
 34 |         # Remove "path" from argument list, for further analysis:
 35 |         func_params = dict(func_params)
 36 |         del func_params[first_arg[0]]
 37 | 
 38 |     bad_params = list(set(func_params) - set(task_inputs))
 39 |     if bad_params:
 40 |         bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
 41 |         msg = (f'The following arguments to the input mapper function "{func.__name__}" '
 42 |                f'are not known by the schema: {bad_params_fmt}.')
 43 |         raise TypeError(msg)
 44 | 
 45 | 
 46 | def validate_output_mapper_func(func, num_file_paths, option_names, input_names):
 47 |     """Using `inspect`, validate an output mapper callable from a Matflow extension.
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     func : callable
 52 |     num_file_paths : int
 53 |         Number of output files specified in the schema's output map.
 54 |     option_names : list of str
 55 |         List of the names of output map options.
 56 |     input_names : list of str
 57 |         List of the names of output map inputs.
 58 | 
 59 |     Notes
 60 |     -----
 61 |     Checks performed on `func`:
 62 |       - After the first `num_file_paths` arguments, check the remaining arguments names
 63 |         coincide exactly with `option_names` + `inputs`.
 64 | 
 65 |     """
 66 | 
 67 |     func_params = inspect.signature(func).parameters
 68 | 
 69 |     # Check num args first
 70 |     exp_num_params = num_file_paths + len(option_names) + len(input_names)
 71 |     if len(func_params) != exp_num_params:
 72 |         msg = (
 73 |             f'The output mapper function "{func.__name__}" does not have the expected '
 74 |             f'number of arguments: found {len(func_params)} but expected '
 75 |             f'{exp_num_params} ({num_file_paths} file path(s) + {len(option_names)} '
 76 |             f'options parameters + {len(input_names)} inputs).'
 77 |         )
 78 |         raise TypeError(msg)
 79 | 
 80 |     # Check option names:
 81 |     params = list(func_params.items())[num_file_paths:]
 82 |     params_func = [i[0] for i in params]
 83 | 
 84 |     miss_params = list(set(option_names + input_names) - set(params_func))
 85 |     bad_params = list(set(params_func) - set(option_names + input_names))
 86 | 
 87 |     if bad_params:
 88 |         bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
 89 |         msg = (f'The following arguments in the output mapper function "{func.__name__}" '
 90 |                f'are not output map options or inputs: {bad_params_fmt}.')
 91 |         raise TypeError(msg)
 92 | 
 93 |     if miss_params:
 94 |         miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params])
 95 |         msg = (f'The following output mapper options and/or inputs are missing from the '
 96 |                f'signature of the output mapper function "{func.__name__}": '
 97 |                f'{miss_params_fmt}.')
 98 |         raise TypeError(msg)
 99 | 
100 | 
101 | def validate_func_mapper_func(func, task_inputs):
102 |     """Using `inspect`, validate an input mapper callable from a Matflow extension.
103 | 
104 |     Parameters
105 |     ----------
106 |     func : callable
107 |     task_inputs : list of str
108 |         List of the input name aliases associated with the task schema.
109 | 
110 |     Notes
111 |     -----
112 |     Checks performed on `func`:
113 |       - check function arguments are named according to all task parameters (passed in
114 |       `task_inputs`).
115 | 
116 |     """
117 | 
118 |     func_params = inspect.signature(func).parameters
119 | 
120 |     bad_params = list(set(func_params) - set(task_inputs))
121 |     miss_params = list(set(task_inputs) - set(func_params))
122 | 
123 |     if bad_params:
124 |         bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
125 |         msg = (f'The function mapper function "{func.__name__}" contains the following '
126 |                f'arguments that are not consistent with the schema: {bad_params_fmt}.')
127 |         raise TypeError(msg)
128 | 
129 |     if miss_params:
130 |         miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params])
131 |         msg = (f'The following task inputs are missing from the signature of the '
132 |                f'function mapper function "{func.__name__}": {miss_params_fmt}.')
133 |         raise TypeError(msg)
134 | 
135 | 
136 | def validate_task_schemas(task_schemas, task_input_map, task_output_map, task_func_map):
137 |     """
138 |     Determine whether each task schema is valid.
139 | 
140 |     Parameters
141 |     ----------
142 |     task_schemas : dict of (tuple : TaskSchema)
143 |         Dict keys are (task_name, task_method, software).
144 |     task_input_map : dict of (tuple : dict of (str : callable))
145 |         Outer dict keys are (task_name, task_method, software); inner dicts map a string
146 |         input file name to a MatFlow extension callable which writes that input file.
147 |     task_output_map : dict of (tuple : dict of (str : callable))
148 |         Outer dict keys are (task_name, task_method, software); inner dicts map a string
149 |         output name to a MatFlow extension callable which return that output.
150 |     task_func_map : dict of (tuple : callable)
151 |         Dict keys are (task_name, task_method, software); values are MatFlow extension
152 |         callables.
153 | 
154 |     Returns
155 |     -------
156 |     schema_is_valid : dict of (tuple : tuple of (bool, str))
157 |         Dict keys are (task_name, task_method, software); values are tuples whose first
158 |         values are boolean values indicating if a given schema is valid. If False, this
159 |         indicates that one of extension functions (input map, output map or function map)
160 |         is missing. Note that this function does not raise any exception in this case ---
161 |         but the task schema will be noted as invalid. The second value of the dict value
162 |         tuple is a string description of the reason why the schema is invalid.
163 | 
164 |     Raises
165 |     ------
166 |     UnsatisfiedSchemaError
167 |         Raised if any of the extension callables (input/output/func maps) are not
168 |         consistent with their associated task schema.
169 | 
170 |     """
171 | 
172 |     schema_is_valid = {}
173 | 
174 |     for key, schema in task_schemas.items():
175 | 
176 |         schema_is_valid.update({key: (True, '')})
177 | 
178 |         key_msg = (f'Unresolved task schema for task "{schema.name}" with method '
179 |                    f'"{schema.method}" and software "{schema.implementation}".')
180 | 
181 |         for inp_map in schema.input_map:
182 | 
183 |             extension_inp_maps = task_input_map.get(key)
184 |             msg = (
185 |                 f'{key_msg} No matching extension function found for the input '
186 |                 f'map that generates the input file "{inp_map["file"]}".'
187 |             )
188 | 
189 |             if not extension_inp_maps:
190 |                 reason = (f'No input map function found for input map that generates file'
191 |                           f' "{inp_map["file"]}". ')
192 |                 schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
193 |                 continue
194 |             else:
195 |                 inp_map_func = extension_inp_maps.get(inp_map['file'])
196 |                 if not inp_map_func:
197 |                     raise UnsatisfiedSchemaError(msg)
198 | 
199 |             # Validate signature of input map function:
200 |             try:
201 |                 validate_input_mapper_func(inp_map_func, inp_map['inputs'])
202 |             except TypeError as err:
203 |                 raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
204 | 
205 |         for out_map in schema.output_map:
206 | 
207 |             extension_out_maps = task_output_map.get(key)
208 |             msg = (
209 |                 f'{key_msg} No matching extension function found for the output '
210 |                 f'map that generates the output "{out_map["output"]}".'
211 |             )
212 | 
213 |             if not extension_out_maps:
214 |                 reason = (f'No output map function found for output map that generates '
215 |                           f'output "{out_map["output"]}". ')
216 |                 schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
217 |                 continue
218 |             else:
219 |                 out_map_func = extension_out_maps.get(out_map['output'])
220 |                 if not out_map_func:
221 |                     raise UnsatisfiedSchemaError(msg)
222 | 
223 |             # Validate signature of output map function:
224 |             try:
225 |                 validate_output_mapper_func(
226 |                     func=out_map_func,
227 |                     num_file_paths=len(out_map['files']),
228 |                     option_names=[i['name'] for i in out_map.get('options', [])],
229 |                     input_names=[i['name'] for i in out_map.get('inputs', [])],
230 |                 )
231 |             except TypeError as err:
232 |                 raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
233 | 
234 |         if schema.is_func:
235 | 
236 |             func = task_func_map.get(key)
237 |             if not func:
238 |                 reason = 'No function mapper function found. '
239 |                 schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
240 |                 continue
241 | 
242 |             # Validate signature of func mapper function:
243 |             try:
244 |                 validate_func_mapper_func(func, schema.input_aliases)
245 |             except TypeError as err:
246 |                 raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
247 | 
248 |     return schema_is_valid
249 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pylint
2 | ipykernel
3 | rope
4 | autopep8
5 | twine
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Pip installation script for `matflow`."""
 2 | 
 3 | import os
 4 | import re
 5 | from setuptools import find_packages, setup
 6 | 
 7 | 
 8 | def get_version():
 9 | 
10 |     ver_file = 'matflow/_version.py'
11 |     with open(ver_file) as handle:
12 |         ver_str_line = handle.read()
13 | 
14 |     ver_pattern = r'^__version__ = [\'"]([^\'"]*)[\'"]'
15 |     match = re.search(ver_pattern, ver_str_line, re.M)
16 |     if match:
17 |         ver_str = match.group(1)
18 |     else:
19 |         msg = 'Unable to find version string in "{}"'.format(ver_file)
20 |         raise RuntimeError(msg)
21 | 
22 |     return ver_str
23 | 
24 | 
25 | def get_long_description():
26 | 
27 |     readme_file = 'README.md'
28 |     with open(readme_file, encoding='utf-8') as handle:
29 |         contents = handle.read()
30 | 
31 |     return contents
32 | 
33 | 
34 | package_data = [
35 |     os.path.join(*os.path.join(root, f).split(os.path.sep)[1:])
36 |     for root, dirs, files in os.walk(os.path.join('matflow', 'data'))
37 |     for f in files
38 | ]
39 | 
40 | setup(
41 |     name='matflow',
42 |     version=get_version(),
43 |     description=('Computational workflow management for materials science.'),
44 |     long_description=get_long_description(),
45 |     long_description_content_type='text/markdown',
46 |     author='Adam J. Plowman',
47 |     author_email='adam.plowman@manchester.ac.uk',
48 |     packages=find_packages(),
49 |     package_data={
50 |         'matflow': package_data,
51 |     },
52 |     install_requires=[
53 |         'matflow-demo-extension',
54 |         'hpcflow>=0.1.16',
55 |         'click>7.0',
56 |         'hickle==4.0.4',
57 |         'h5py==2.10.0',
58 |         'numpy<1.24',        
59 |         'sqlalchemy<2',
60 |         'ruamel.yaml==0.16.10',
61 |         'pyperclip',
62 |         'black',
63 |         'autopep8',
64 |     ],
65 |     project_urls={
66 |         'Github': 'https://github.com/Lightform-group/matflow',
67 |     },
68 |     classifiers=[
69 |         'Development Status :: 3 - Alpha',
70 |         'Intended Audience :: Science/Research',
71 |         'Topic :: Scientific/Engineering',
72 |         'Programming Language :: Python :: 3.7',
73 |         'Programming Language :: Python :: 3.8',
74 |         'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)',
75 |         'Operating System :: OS Independent',
76 |     ],
77 |     entry_points="""
78 |         [console_scripts]
79 |         matflow=matflow.cli:cli
80 |     """
81 | )
82 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LightForm-group/matflow/4a40bd27a5c97778bdf902f1a7f47a882c5fb889/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_hicklable.py:
--------------------------------------------------------------------------------
 1 | """Module containing unit tests on the `hicklable.to_hicklable` function."""
 2 | 
 3 | import unittest
 4 | from tempfile import TemporaryFile
 5 | 
 6 | import numpy as np
 7 | import hickle
 8 | 
 9 | from matflow.hicklable import to_hicklable
10 | 
11 | 
12 | class ConversionTestCase(unittest.TestCase):
13 |     """Tests on `to_hicklable`."""
14 | 
15 |     def test_built_ins(self):
16 |         """Test expected output for some built-in types."""
17 | 
18 |         obj = {
19 |             'a': 1,
20 |             'b': 2.0,
21 |             'c': [3, 4, 5.0],
22 |             'd': (6, 7, 8),
23 |             'e': {9, 10, 11},
24 |             'f': {'f1': 1, 'f2': 2},
25 |             'g': 'hello',
26 |         }
27 |         obj_expected = {
28 |             'a': 1,
29 |             'b': 2.0,
30 |             'c': [3, 4, 5.0],
31 |             'd': (6, 7, 8),
32 |             'e': {9, 10, 11},
33 |             'f': {'f1': 1, 'f2': 2},
34 |             'g': 'hello',
35 |         }
36 |         obj_valid = to_hicklable(obj)
37 |         self.assertTrue(obj_valid == obj_expected)
38 | 
39 |     def test_arrays(self):
40 |         """Test expected output for some arrays."""
41 | 
42 |         obj = {
43 |             'int_array': np.array([1, 2, 3]),
44 |             'float_array': np.array([3.3, 2.5, -2.1]),
45 |             'bool_array': np.array([1, 0, 0, 1]).astype(bool),
46 |         }
47 |         obj_valid = to_hicklable(obj)
48 |         self.assertTrue(obj_valid == obj)
49 | 
50 |     def test_object_dict(self):
51 |         """Test expected output for an object with a __dict__ attribute."""
52 | 
53 |         class myClassObject(object):
54 |             def __init__(self, a=1): self.a = a
55 | 
56 |         my_class_obj = myClassObject(a=3.5)
57 | 
58 |         obj = {'my_class_obj': my_class_obj}
59 |         expected_obj = {'my_class_obj': {'a': 3.5}}
60 |         obj_valid = to_hicklable(obj)
61 |         self.assertTrue(obj_valid == expected_obj)
62 | 
63 |     def test_object_slots(self):
64 |         """Test expected output for an object with a __slots__ attribute."""
65 | 
66 |         class myClassObject(object):
67 |             __slots__ = ['a']
68 |             def __init__(self, a=1): self.a = a
69 | 
70 |         my_class_obj = myClassObject(a=3.5)
71 | 
72 |         obj = {'my_class_obj': my_class_obj}
73 |         expected_obj = {'my_class_obj': {'a': 3.5}}
74 |         obj_valid = to_hicklable(obj)
75 |         self.assertTrue(obj_valid == expected_obj)
76 | 
77 |     def test_object_dict_slots(self):
78 |         """Test expected output for an object with __dict__ and __slots__ attributes."""
79 | 
80 |         class myClassObject(object):
81 |             __slots__ = ['a', '__dict__']
82 |             def __init__(self, a=1): self.a = a
83 | 
84 |         my_class_obj = myClassObject(a=3.5)
85 |         my_class_obj.b = 2
86 | 
87 |         obj = {'my_class_obj': my_class_obj}
88 |         expected_obj = {'my_class_obj': {'a': 3.5, 'b': 2}}
89 |         obj_valid = to_hicklable(obj)
90 |         self.assertTrue(obj_valid == expected_obj)
91 | 


--------------------------------------------------------------------------------
/tests/test_task.py:
--------------------------------------------------------------------------------
  1 | """Module containing unit tests on Task logic."""
  2 | 
  3 | import copy
  4 | import unittest
  5 | 
  6 | from matflow.models import TaskSchema
  7 | from matflow.models.construction import normalise_local_inputs, get_local_inputs
  8 | from matflow.errors import (
  9 |     IncompatibleSequence,
 10 |     TaskSchemaError,
 11 |     TaskParameterError,
 12 |     SequenceError,
 13 | )
 14 | 
 15 | # TODO: add test that warn is issued when an input is in base but also has a sequence.
 16 | 
 17 | 
 18 | class TaskSchemaTestCase(unittest.TestCase):
 19 |     """Tests on TaskSchema"""
 20 | 
 21 |     def test_raise_on_input_is_output(self):
 22 |         with self.assertRaises(TaskSchemaError):
 23 |             TaskSchema('schema_1', inputs=['parameter_1'], outputs=['parameter_1'])
 24 | 
 25 |     def test_raise_on_input_map_bad_inputs(self):
 26 |         """Check inputs defined in the schema input map are in the schema inputs list."""
 27 | 
 28 |         with self.assertRaises(TaskSchemaError):
 29 |             TaskSchema(
 30 |                 'schema_1',
 31 |                 inputs=['parameter_7', 'parameter_9'],
 32 |                 outputs=['parameter_8'],
 33 |                 input_map=[
 34 |                     {
 35 |                         'inputs': [
 36 |                             # "parameter_10" is not in the inputs list.
 37 |                             'parameter_10',
 38 |                         ],
 39 |                         'file': 'input_file_1',
 40 |                     }
 41 |                 ]
 42 |             )
 43 | 
 44 |     def test_raise_on_output_map_bad_outputs(self):
 45 |         """Check outputs defined in the schema output map are in the schema outputs list."""
 46 | 
 47 |         with self.assertRaises(TaskSchemaError):
 48 |             TaskSchema(
 49 |                 'schema_1',
 50 |                 inputs=['parameter_7', 'parameter_9'],
 51 |                 outputs=['parameter_8'],
 52 |                 output_map=[
 53 |                     {
 54 |                         'files': [
 55 |                             'output_file_1',
 56 |                         ],
 57 |                         # "parameter_10" is not in the outputs list.
 58 |                         'output': 'parameter_10',
 59 |                     }
 60 |                 ]
 61 |             )
 62 | 
 63 | 
 64 | class TaskParameterTestCase(unittest.TestCase):
 65 |     """Tests of correct behaviour when defining tasks."""
 66 | 
 67 |     def test_raise_on_unknown_input(self):
 68 |         with self.assertRaises(TaskParameterError):
 69 |             schema = TaskSchema(
 70 |                 'schema_1',
 71 |                 inputs=['parameter_1'],
 72 |                 outputs=['parameter_2'],
 73 |             )
 74 |             schema.check_surplus_inputs(['parameter_3'])
 75 | 
 76 |     def test_raise_on_missing_input(self):
 77 |         with self.assertRaises(TaskParameterError):
 78 |             schema = TaskSchema(
 79 |                 'schema1',
 80 |                 inputs=['parameter_1', 'parameter_2'],
 81 |                 outputs=['parameter_3'],
 82 |             )
 83 |             schema.check_missing_inputs(['parameter_2'])
 84 | 
 85 | 
 86 | class NormaliseLocalTestCase(unittest.TestCase):
 87 |     """Testing `normalise_local_inputs`."""
 88 | 
 89 |     def test_raise_on_bad_nest_idx_float(self):
 90 |         """Check raises on non-integer (float) nest index for any sequence."""
 91 |         sequences = [{'name': 'p1', 'nest_idx': 1.0, 'vals': [101, 102]}]
 92 |         with self.assertRaises(SequenceError):
 93 |             normalise_local_inputs(sequences=sequences)
 94 | 
 95 |     def test_raise_on_bad_nest_idx_string(self):
 96 |         """Check raises on non-integer (str) nest index for any sequence."""
 97 |         sequences = [{'name': 'p1', 'nest_idx': '0', 'vals': [101, 102]}]
 98 |         with self.assertRaises(SequenceError):
 99 |             normalise_local_inputs(sequences=sequences)
100 | 
101 |     def test_raise_on_bad_nest_idx_list(self):
102 |         """Check raises on non-integer (list) nest index for any sequence."""
103 |         sequences = [{'name': 'p1', 'nest_idx': [1, 0], 'vals': [101, 102]}]
104 |         with self.assertRaises(SequenceError):
105 |             normalise_local_inputs(sequences=sequences)
106 | 
107 | 
108 | class GetLocalInputsExceptionTestCase(unittest.TestCase):
109 |     """Testing exceptions and warnings from `get_local_inputs`."""
110 | 
111 |     def test_raise_on_missing_nest_idx(self):
112 |         """Check raises when more than one sequence, but nest_idx is missing from any
113 |         sequence."""
114 |         sequences = [
115 |             {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
116 |             {'name': 'p3', 'vals': [301, 302]},
117 |         ]
118 |         with self.assertRaises(SequenceError):
119 |             get_local_inputs([], sequences=sequences)
120 | 
121 |     def test_raise_on_bad_sequence_vals_type_str(self):
122 |         """Test raises when sequence vals is a string."""
123 |         sequences = [{'name': 'p1', 'vals': '120'}]
124 |         with self.assertRaises(SequenceError):
125 |             get_local_inputs([], sequences=sequences)
126 | 
127 |     def test_raise_on_bad_sequence_vals_type_number(self):
128 |         """Test raises when sequence vals is a number."""
129 |         sequences = [{'name': 'p1', 'vals': 120}]
130 |         with self.assertRaises(SequenceError):
131 |             get_local_inputs([], sequences=sequences)
132 | 
133 |     def test_raise_on_bad_sequences_type(self):
134 |         """Test raises when sequences is not a list."""
135 |         sequences = {'name': 'p1', 'vals': [1, 2]}
136 |         with self.assertRaises(SequenceError):
137 |             get_local_inputs([], sequences=sequences)
138 | 
139 |     def test_warn_on_unrequired_nest_idx(self):
140 |         """Test warning on unrequired nest idx."""
141 |         sequences = [{'name': 'p1', 'vals': [101, 102], 'nest_idx': 0}]
142 |         with self.assertWarns(Warning):
143 |             get_local_inputs([], sequences=sequences)
144 | 
145 |     def test_raise_on_bad_sequence_keys(self):
146 |         """Test raises when a sequence has unknown keys."""
147 |         sequences = [{'name': 'p1', 'vals': [101, 102], 'bad_key': 4}]
148 |         with self.assertRaises(SequenceError):
149 |             get_local_inputs([], sequences=sequences)
150 | 
151 |     def test_raise_on_missing_sequence_keys(self):
152 |         """Test raises when a sequence has missing keys."""
153 |         sequences = [{'vals': [101, 102]}]
154 |         with self.assertRaises(SequenceError):
155 |             get_local_inputs([], sequences=sequences)
156 | 
157 |     def test_raise_on_incompatible_nesting(self):
158 |         """Test error raised on logically inconsistent Task sequence."""
159 |         sequences = [
160 |             {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102]},
161 |             {'name': 'p2', 'nest_idx': 0, 'vals': [201]},
162 |         ]
163 |         with self.assertRaises(IncompatibleSequence):
164 |             get_local_inputs([], sequences=sequences)
165 | 
166 | 
167 | class GetLocalInputsInputsTestCase(unittest.TestCase):
168 |     """Tests on the `inputs` dict generated by `get_local_inputs`."""
169 | 
170 |     def test_base_only(self):
171 |         """Check expected output for no sequences."""
172 |         base = {'p1': 101}
173 |         local_ins = get_local_inputs([], base=base)['inputs']
174 |         local_ins_exp = {'p1': {'vals': [101], 'vals_idx': [0]}}
175 |         self.assertTrue(local_ins == local_ins_exp)
176 | 
177 |     def test_base_and_sequence(self):
178 |         """Check expected output for base and one sequence."""
179 |         base = {'p1': 101}
180 |         sequences = [{'name': 'p2', 'vals': [201, 202]}]
181 |         local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
182 |         local_ins_exp = {
183 |             'p1': {'vals': [101], 'vals_idx': [0, 0]},
184 |             'p2': {'vals': [201, 202], 'vals_idx': [0, 1]},
185 |         }
186 |         self.assertTrue(local_ins == local_ins_exp)
187 | 
188 |     def test_base_and_multi_nested_sequences(self):
189 |         """Check expected output for base and two nested sequences."""
190 |         base = {'p1': 101}
191 |         sequences = [
192 |             {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
193 |             {'name': 'p3', 'vals': [301, 302, 303], 'nest_idx': 1},
194 |         ]
195 |         local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
196 |         local_ins_exp = {
197 |             'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]},
198 |             'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]},
199 |             'p3': {'vals': [301, 302, 303], 'vals_idx': [0, 1, 2, 0, 1, 2]},
200 |         }
201 |         self.assertTrue(local_ins == local_ins_exp)
202 | 
203 |     def test_base_and_multi_merged_sequences(self):
204 |         """Check expected output for base and two merged sequences."""
205 |         base = {'p1': 101}
206 |         sequences = [
207 |             {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
208 |             {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0},
209 |         ]
210 |         local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
211 |         local_ins_exp = {
212 |             'p1': {'vals': [101], 'vals_idx': [0, 0]},
213 |             'p2': {'vals': [201, 202], 'vals_idx': [0, 1]},
214 |             'p3': {'vals': [301, 302], 'vals_idx': [0, 1]},
215 |         }
216 |         self.assertTrue(local_ins == local_ins_exp)
217 | 
218 |     def test_base_and_merged_and_nested_sequences(self):
219 |         """Check expected output for base and two merged sequences."""
220 |         base = {'p1': 101}
221 |         sequences = [
222 |             {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
223 |             {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0},
224 |             {'name': 'p4', 'vals': [401, 402, 403], 'nest_idx': 1},
225 |         ]
226 |         local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
227 |         local_ins_exp = {
228 |             'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]},
229 |             'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]},
230 |             'p3': {'vals': [301, 302], 'vals_idx': [0, 0, 0, 1, 1, 1]},
231 |             'p4': {'vals': [401, 402, 403], 'vals_idx': [0, 1, 2, 0, 1, 2]},
232 |         }
233 |         self.assertTrue(local_ins == local_ins_exp)
234 | 
235 |     def test_equivalent_relative_nesting_idx(self):
236 |         """Check the actual value of `nest_idx` is inconsequential."""
237 |         sequences_1 = [
238 |             {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
239 |             {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
240 |         ]
241 |         sequences_2 = copy.deepcopy(sequences_1)
242 |         sequences_2[0]['nest_idx'] = 105
243 |         sequences_2[1]['nest_idx'] = 2721
244 | 
245 |         local_ins_1 = get_local_inputs([], sequences=sequences_1)['inputs']
246 |         local_ins_2 = get_local_inputs([], sequences=sequences_2)['inputs']
247 | 
248 |         self.assertTrue(local_ins_1 == local_ins_2)
249 | 
250 |     def test_correct_number_of_local_inputs_all_nesting(self):
251 |         """Check the correct number of elements for a given input."""
252 |         sequences = [
253 |             {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
254 |             {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
255 |         ]
256 |         local_ins = get_local_inputs([], sequences=sequences)['inputs']
257 |         self.assertTrue(len(local_ins['p1']['vals_idx']) == 6)
258 | 
259 |     def test_all_inputs_local_inputs_size(self):
260 |         """Check all inputs have the same number of elements."""
261 |         sequences = [
262 |             {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
263 |             {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
264 |         ]
265 |         local_ins = get_local_inputs([], sequences=sequences)['inputs']
266 |         self.assertTrue(
267 |             len(local_ins['p1']['vals_idx']) == len(local_ins['p2']['vals_idx'])
268 |         )
269 | 
270 |     def test_correct_number_of_local_inputs_all_merge(self):
271 |         """Check the correct number of local inputs for merging three sequences."""
272 |         sequences = [
273 |             {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]},
274 |             {'name': 'p2', 'nest_idx': 3, 'vals': [201, 202]},
275 |             {'name': 'p3', 'nest_idx': 3, 'vals': [301, 302]},
276 |         ]
277 |         local_ins = get_local_inputs([], sequences=sequences)['inputs']
278 |         self.assertTrue(
279 |             len(local_ins['p1']['vals_idx']) ==
280 |             len(local_ins['p2']['vals_idx']) ==
281 |             len(local_ins['p3']['vals_idx']) == 2
282 |         )
283 | 
284 |     def test_correct_number_of_local_inputs_one_merge(self):
285 |         """Check the correct number of local inputs for merging/nesting three sequences."""
286 |         sequences = [
287 |             {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]},
288 |             {'name': 'p2', 'nest_idx': 4, 'vals': [201, 202]},
289 |             {'name': 'p3', 'nest_idx': 4, 'vals': [301, 302]},
290 |         ]
291 |         local_ins = get_local_inputs([], sequences=sequences)['inputs']
292 |         self.assertTrue(
293 |             len(local_ins['p1']['vals_idx']) ==
294 |             len(local_ins['p2']['vals_idx']) ==
295 |             len(local_ins['p3']['vals_idx']) == 4
296 |         )
297 | 
298 |     def test_base_is_merged_into_sequence(self):
299 |         """Check the base dict is merged into a sequence."""
300 |         base = {'p1': 101}
301 |         sequences = [{'name': 'p2', 'nest_idx': 0, 'vals': [201, 202]}]
302 |         local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
303 |         self.assertTrue(
304 |             local_ins['p1']['vals_idx'] == [0, 0] and
305 |             local_ins['p2']['vals_idx'] == [0, 1]
306 |         )
307 | 
308 |     def test_unit_length_sequence(self):
309 |         """Check specifying sequences of length one has the same effect as specifying the 
310 |         parameter in the base dict."""
311 |         base = {'p1': 101}
312 |         sequences = [{'name': 'p1', 'nest_idx': 0, 'vals': [101]}]
313 |         local_ins_1 = get_local_inputs([], sequences=sequences)['inputs']
314 |         local_ins_2 = get_local_inputs([], base=base)['inputs']
315 |         self.assertTrue(local_ins_1 == local_ins_2)
316 | 
317 | 
318 | class GetLocalInputsFullTestCase(unittest.TestCase):
319 |     """Explicit checks on the full outputs of `get_local_inputs`."""
320 | 
321 |     def full_test_1(self):
322 |         pass
323 | 


--------------------------------------------------------------------------------
/tests/test_workflow.py:
--------------------------------------------------------------------------------
  1 | """Module containing unit tests on Workflow initialisation."""
  2 | 
  3 | import unittest
  4 | 
  5 | from matflow.errors import IncompatibleWorkflow
  6 | from matflow.models import TaskSchema
  7 | from matflow.models.construction import get_dependency_idx
  8 | 
  9 | """
 10 | tests for inputs/outputs_idx:
 11 | - for a variety of scenarios, check all parameters from the same task have the same number of elements_idx.
 12 | - for a few scenarios, check expected elements_idx and task_idx.
 13 | - check all keys of output (i.e. `task_idx`) are exactly the set of task_idx values in downstream + upstream tasks.
 14 | - check works when no upstream tasks.
 15 | 
 16 | tests for resolve_task_num_elements:
 17 | - check works when no upstream tasks
 18 | 
 19 | """
 20 | 
 21 | 
 22 | def init_schemas(task_lst):
 23 |     """Construct TaskSchema objects for TaskDependencyTestCase tests."""
 24 |     for idx, i in enumerate(task_lst):
 25 |         task_lst[idx]['schema'] = TaskSchema(**i['schema'])
 26 |     return task_lst
 27 | 
 28 | 
 29 | class TaskDependencyTestCase(unittest.TestCase):
 30 |     """Tests on `get_dependency_idx`"""
 31 | 
 32 |     def test_single_dependency(self):
 33 |         """Test correct dependency index for a single task dependency."""
 34 |         task_lst = [
 35 |             {
 36 |                 'context': '',
 37 |                 'schema': {
 38 |                     'name': 'one',
 39 |                     'inputs': [
 40 |                         {'name': 'p1', 'context': None},
 41 |                         {'name': 'p2', 'context': None},
 42 |                     ],
 43 |                     'outputs': ['p3'],
 44 |                 },
 45 |             },
 46 |             {
 47 |                 'context': '',
 48 |                 'schema': {
 49 |                     'name': 'one',
 50 |                     'inputs': [
 51 |                         {'name': 'p3', 'context': None},
 52 |                         {'name': 'p4', 'context': None},
 53 |                     ],
 54 |                     'outputs': ['p5'],
 55 |                 },
 56 |             },
 57 |         ]
 58 |         dep_idx = get_dependency_idx(init_schemas(task_lst))
 59 |         dep_idx_exp = [[], [0]]
 60 |         self.assertTrue(dep_idx == dep_idx_exp)
 61 | 
 62 |     def test_single_dependency_two_contexts(self):
 63 |         """Test single dependencies for two parallel contexts."""
 64 |         task_lst = [
 65 |             {
 66 |                 'context': 'context_A',
 67 |                 'schema': {
 68 |                     'name': 'one',
 69 |                     'inputs': [
 70 |                         {'name': 'p1', 'context': None},
 71 |                         {'name': 'p2', 'context': None},
 72 |                     ],
 73 |                     'outputs': ['p3'],
 74 |                 },
 75 |             },
 76 |             {
 77 |                 'context': 'context_A',
 78 |                 'schema': {
 79 |                     'name': 'one',
 80 |                     'inputs': [
 81 |                         {'name': 'p3', 'context': None},
 82 |                         {'name': 'p4', 'context': None},
 83 |                     ],
 84 |                     'outputs': ['p5'],
 85 |                 },
 86 |             },
 87 |             {
 88 |                 'context': 'context_B',
 89 |                 'schema': {
 90 |                     'name': 'one',
 91 |                     'inputs': [
 92 |                         {'name': 'p1', 'context': None},
 93 |                         {'name': 'p2', 'context': None},
 94 |                     ],
 95 |                     'outputs': ['p3'],
 96 |                 },
 97 |             },
 98 |             {
 99 |                 'context': 'context_B',
100 |                 'schema': {
101 |                     'name': 'one',
102 |                     'inputs': [
103 |                         {'name': 'p3', 'context': None},
104 |                         {'name': 'p4', 'context': None},
105 |                     ],
106 |                     'outputs': ['p5'],
107 |                 },
108 |             },
109 |         ]
110 |         dep_idx = get_dependency_idx(init_schemas(task_lst))
111 |         dep_idx_exp = [[], [0], [], [2]]
112 |         self.assertTrue(dep_idx == dep_idx_exp)
113 | 
114 |     def test_two_dependencies(self):
115 |         """Test where a task depends on two tasks."""
116 |         task_lst = [
117 |             {
118 |                 'context': 'contextA',
119 |                 'schema': {
120 |                     'name': 'one',
121 |                     'inputs': [
122 |                         {'name': 'p1', 'context': None},
123 |                         {'name': 'p2', 'context': None},
124 |                     ],
125 |                     'outputs': ['p3', 'p4'],
126 |                 },
127 |             },
128 |             {
129 |                 'context': 'contextB',
130 |                 'schema': {
131 |                     'name': 'one',
132 |                     'inputs': [
133 |                         {'name': 'p1', 'context': None},
134 |                         {'name': 'p2', 'context': None},
135 |                     ],
136 |                     'outputs': ['p3', 'p4'],
137 |                 },
138 |             },
139 |             {
140 |                 'context': '',
141 |                 'schema': {
142 |                     'name': 'one',
143 |                     'inputs': [
144 |                         {'name': 'p3', 'context': 'contextA'},
145 |                         {'name': 'p4', 'context': 'contextB'},
146 |                     ],
147 |                     'outputs': ['p5'],
148 |                 },
149 |             },
150 |         ]
151 |         dep_idx = get_dependency_idx(init_schemas(task_lst))
152 |         dep_idx_exp = [[], [], [0, 1]]
153 |         self.assertTrue(dep_idx == dep_idx_exp)
154 | 
155 |     def test_raise_on_output_non_exclusivity(self):
156 |         """Test raises on multiple tasks that include the same output (and context)."""
157 |         task_lst = [
158 |             {
159 |                 'context': '',
160 |                 'schema': {
161 |                     'name': 'one',
162 |                     'inputs': [
163 |                         {'name': 'p1', 'context': None},
164 |                         {'name': 'p2', 'context': None},
165 |                     ],
166 |                     'outputs': ['p3'],
167 |                 },
168 |             },
169 |             {
170 |                 'context': '',
171 |                 'schema': {
172 |                     'name': 'two',
173 |                     'inputs': [
174 |                         {'name': 'p4', 'context': None},
175 |                     ],
176 |                     'outputs': ['p3'],
177 |                 },
178 |             },
179 |         ]
180 |         with self.assertRaises(IncompatibleWorkflow):
181 |             get_dependency_idx(init_schemas(task_lst))
182 | 
183 |     def test_raise_on_circular_reference(self):
184 |         """Test raises on circularly referential Tasks."""
185 |         task_lst = [
186 |             {
187 |                 'context': '',
188 |                 'schema': {
189 |                     'name': 'one',
190 |                     'inputs': [
191 |                         {'name': 'p1', 'context': None},
192 |                     ],
193 |                     'outputs': ['p2'],
194 |                 },
195 |             },
196 |             {
197 |                 'context': '',
198 |                 'schema': {
199 |                     'name': 'two',
200 |                     'inputs': [
201 |                         {'name': 'p2', 'context': None},
202 |                     ],
203 |                     'outputs': ['p1'],
204 |                 },
205 |             },
206 |         ]
207 |         with self.assertRaises(IncompatibleWorkflow):
208 |             get_dependency_idx(init_schemas(task_lst))
209 | 


--------------------------------------------------------------------------------
/workflow_viz.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="210mm"
 13 |    height="297mm"
 14 |    viewBox="0 0 210 297"
 15 |    version="1.1"
 16 |    id="svg8"
 17 |    inkscape:version="0.92.1 r15371"
 18 |    sodipodi:docname="workflow_viz.svg">
 19 |   <defs
 20 |      id="defs2" />
 21 |   <sodipodi:namedview
 22 |      id="base"
 23 |      pagecolor="#ffffff"
 24 |      bordercolor="#666666"
 25 |      borderopacity="1.0"
 26 |      inkscape:pageopacity="0.0"
 27 |      inkscape:pageshadow="2"
 28 |      inkscape:zoom="1.4142136"
 29 |      inkscape:cx="-155.63809"
 30 |      inkscape:cy="749.732"
 31 |      inkscape:document-units="mm"
 32 |      inkscape:current-layer="layer1"
 33 |      showgrid="false"
 34 |      showborder="false"
 35 |      inkscape:snap-global="true"
 36 |      inkscape:snap-bbox="true"
 37 |      inkscape:bbox-paths="true"
 38 |      inkscape:bbox-nodes="true"
 39 |      inkscape:snap-bbox-edge-midpoints="true"
 40 |      inkscape:snap-bbox-midpoints="true"
 41 |      inkscape:object-paths="true"
 42 |      inkscape:snap-intersection-paths="true"
 43 |      inkscape:snap-smooth-nodes="true"
 44 |      inkscape:snap-midpoints="true"
 45 |      inkscape:snap-object-midpoints="true"
 46 |      inkscape:window-width="1920"
 47 |      inkscape:window-height="1137"
 48 |      inkscape:window-x="-8"
 49 |      inkscape:window-y="-8"
 50 |      inkscape:window-maximized="1"
 51 |      inkscape:snap-center="true"
 52 |      inkscape:snap-text-baseline="true"
 53 |      showguides="false" />
 54 |   <metadata
 55 |      id="metadata5">
 56 |     <rdf:RDF>
 57 |       <cc:Work
 58 |          rdf:about="">
 59 |         <dc:format>image/svg+xml</dc:format>
 60 |         <dc:type
 61 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
 62 |         <dc:title />
 63 |       </cc:Work>
 64 |     </rdf:RDF>
 65 |   </metadata>
 66 |   <g
 67 |      inkscape:label="Layer 1"
 68 |      inkscape:groupmode="layer"
 69 |      id="layer1">
 70 |     <rect
 71 |        style="fill:#f2f2f2;fill-opacity:1;stroke:none;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none"
 72 |        id="rect4485"
 73 |        width="134.59308"
 74 |        height="69.82309"
 75 |        x="-53.091934"
 76 |        y="69.176842" />
 77 |     <circle
 78 |        style="fill:#000000;fill-opacity:1;stroke:#855c75;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 79 |        id="path4487"
 80 |        cx="-44.634014"
 81 |        cy="92.538803"
 82 |        r="3.2072341" />
 83 |     <circle
 84 |        r="3.2072341"
 85 |        cy="92.538803"
 86 |        cx="-31.270535"
 87 |        id="circle4493"
 88 |        style="fill:#000000;fill-opacity:1;stroke:#d9af6b;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
 89 |     <circle
 90 |        style="fill:#000000;fill-opacity:1;stroke:#af6458;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 91 |        id="circle4499"
 92 |        cx="-17.907059"
 93 |        cy="92.538803"
 94 |        r="3.2072341" />
 95 |     <circle
 96 |        r="3.2072341"
 97 |        cy="92.538803"
 98 |        cx="-4.5435858"
 99 |        id="circle4505"
100 |        style="fill:#000000;fill-opacity:1;stroke:#736f4c;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
101 |     <circle
102 |        style="fill:#000000;fill-opacity:1;stroke:#526a83;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
103 |        id="circle4511"
104 |        cx="8.819891"
105 |        cy="92.538803"
106 |        r="3.2072341" />
107 |     <circle
108 |        r="3.2072341"
109 |        cy="92.538803"
110 |        cx="22.183369"
111 |        id="circle4531"
112 |        style="fill:#000000;fill-opacity:1;stroke:#625377;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
113 |     <circle
114 |        r="3.2072341"
115 |        cy="105.90227"
116 |        cx="-44.634014"
117 |        id="circle4489"
118 |        style="fill:#e58606;fill-opacity:1;stroke:#855c75;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0" />
119 |     <circle
120 |        style="fill:#e58606;fill-opacity:1;stroke:#855c75;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0"
121 |        id="circle4495"
122 |        cx="-31.270535"
123 |        cy="105.90227"
124 |        r="3.2072341" />
125 |     <circle
126 |        r="3.2072341"
127 |        cy="105.90227"
128 |        cx="-17.907059"
129 |        id="circle4501"
130 |        style="fill:#e58606;fill-opacity:1;stroke:#d9af6b;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0" />
131 |     <circle
132 |        style="fill:#e58606;fill-opacity:1;stroke:#d9af6b;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0"
133 |        id="circle4507"
134 |        cx="-4.5435858"
135 |        cy="105.90227"
136 |        r="3.2072341" />
137 |     <circle
138 |        r="3.2072341"
139 |        cy="105.90227"
140 |        cx="8.819891"
141 |        id="circle4513"
142 |        style="fill:#e58606;fill-opacity:1;stroke:#af6458;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0" />
143 |     <circle
144 |        style="fill:#e58606;fill-opacity:1;stroke:#af6458;stroke-width:1.00000003;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;stroke-dashoffset:0"
145 |        id="circle4533"
146 |        cx="22.183369"
147 |        cy="105.90227"
148 |        r="3.2072341" />
149 |     <circle
150 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#855c75;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
151 |        id="circle4491"
152 |        cx="-44.634014"
153 |        cy="119.26575"
154 |        r="3.2072341" />
155 |     <circle
156 |        r="3.2072341"
157 |        cy="119.26575"
158 |        cx="-31.270535"
159 |        id="circle4497"
160 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#855c75;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
161 |     <circle
162 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#855c75;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
163 |        id="circle4503"
164 |        cx="-17.907059"
165 |        cy="119.26575"
166 |        r="3.2072341" />
167 |     <circle
168 |        r="3.2072341"
169 |        cy="119.26575"
170 |        cx="-4.5435858"
171 |        id="circle4509"
172 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#d9af6b;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
173 |     <circle
174 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#d9af6b;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
175 |        id="circle4515"
176 |        cx="8.819891"
177 |        cy="119.26575"
178 |        r="3.2072341" />
179 |     <circle
180 |        r="3.2072341"
181 |        cy="119.26575"
182 |        cx="22.183369"
183 |        id="circle4535"
184 |        style="fill:#5d69b1;fill-opacity:0.94117647;stroke:#d9af6b;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
185 |     <rect
186 |        y="127.75124"
187 |        x="-10.161601"
188 |        height="8.2846394"
189 |        width="11.236035"
190 |        id="rect4671"
191 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none" />
192 |     <rect
193 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none"
194 |        id="rect4673"
195 |        width="11.236035"
196 |        height="8.2846394"
197 |        x="3.2018731"
198 |        y="127.75124" />
199 |     <rect
200 |        y="127.75124"
201 |        x="16.565351"
202 |        height="8.2846394"
203 |        width="11.236035"
204 |        id="rect4675"
205 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none" />
206 |     <rect
207 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none"
208 |        id="rect4677"
209 |        width="11.236035"
210 |        height="8.2846394"
211 |        x="-50.252029"
212 |        y="127.75124" />
213 |     <rect
214 |        y="127.75124"
215 |        x="-36.88855"
216 |        height="8.2846394"
217 |        width="11.236035"
218 |        id="rect4679"
219 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none" />
220 |     <rect
221 |        style="fill:#999999;fill-opacity:1;stroke:none;stroke-width:0.09999997;stroke-miterlimit:4;stroke-dasharray:none"
222 |        id="rect4681"
223 |        width="11.236035"
224 |        height="8.2846394"
225 |        x="-23.525076"
226 |        y="127.75124" />
227 |     <flowRoot
228 |        transform="matrix(0.26458333,0,0,0.26458333,111.772,-0.05023439)"
229 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"
230 |        id="flowRoot4705"
231 |        xml:space="preserve"><flowRegion
232 |          id="flowRegion4701"><rect
233 |            y="316.55557"
234 |            x="-297"
235 |            height="183.59572"
236 |            width="330.33502"
237 |            id="rect4699" /></flowRegion><flowPara
238 |          style="font-size:18.66666603px"
239 |          id="flowPara4703">Default group</flowPara></flowRoot>    <flowRoot
240 |        xml:space="preserve"
241 |        id="flowRoot4713"
242 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#e58606;fill-opacity:1;stroke:none;"
243 |        transform="matrix(0.26458333,0,0,0.26458333,111.772,13.443516)"><flowRegion
244 |          id="flowRegion4709"
245 |          style="fill:#e58606;fill-opacity:1;"><rect
246 |            id="rect4707"
247 |            width="330.33502"
248 |            height="183.59572"
249 |            x="-297"
250 |            y="316.55557"
251 |            style="fill:#e58606;fill-opacity:1;" /></flowRegion><flowPara
252 |          id="flowPara4711"
253 |          style="font-size:18.66666603px;fill:#e58606;fill-opacity:1;">Group A</flowPara></flowRoot>    <flowRoot
254 |        transform="matrix(0.26458333,0,0,0.26458333,111.772,27.201849)"
255 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#5d69b1;fill-opacity:1;stroke:none;"
256 |        id="flowRoot4721"
257 |        xml:space="preserve"><flowRegion
258 |          id="flowRegion4717"
259 |          style="fill:#5d69b1;fill-opacity:1;"><rect
260 |            y="316.55557"
261 |            x="-297"
262 |            height="183.59572"
263 |            width="330.33502"
264 |            id="rect4715"
265 |            style="fill:#5d69b1;fill-opacity:1;" /></flowRegion><flowPara
266 |          style="font-size:18.66666603px;fill:#5d69b1;fill-opacity:1;"
267 |          id="flowPara4719">Group B</flowPara></flowRoot>    <path
268 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
269 |        d="M -44.634012,86.608519 V 56.163021"
270 |        id="path4723"
271 |        inkscape:connector-curvature="0" />
272 |     <path
273 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
274 |        d="M -31.270532,86.608519 V 57.089063"
275 |        id="path4725"
276 |        inkscape:connector-curvature="0" />
277 |     <path
278 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
279 |        d="M -17.907058,86.608519 V 56.890626"
280 |        id="path4727"
281 |        inkscape:connector-curvature="0" />
282 |     <path
283 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
284 |        d="M -4.5435834,86.608535 V 55.567709"
285 |        id="path4729"
286 |        inkscape:connector-curvature="0" />
287 |     <path
288 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
289 |        d="M 8.8198906,86.608519 V 56.361459"
290 |        id="path4731"
291 |        inkscape:connector-curvature="0" />
292 |     <path
293 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
294 |        d="M 22.183369,86.608519 V 55.303126"
295 |        id="path4733"
296 |        inkscape:connector-curvature="0" />
297 |     <rect
298 |        style="opacity:0.9;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
299 |        id="rect4735"
300 |        width="88.538818"
301 |        height="9.7803764"
302 |        x="-32.12278"
303 |        y="72.971321" />
304 |     <flowRoot
305 |        xml:space="preserve"
306 |        id="flowRoot4683"
307 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"
308 |        transform="matrix(0.26458333,0,0,0.26458333,48.516445,-13.347494)"><flowRegion
309 |          id="flowRegion4685"><rect
310 |            id="rect4687"
311 |            width="330.33502"
312 |            height="183.59572"
313 |            x="-297"
314 |            y="316.55557" /></flowRegion><flowPara
315 |          id="flowPara4695"
316 |          style="font-size:18.66666603px">Task 1: Generate volume element</flowPara></flowRoot>    <path
317 |        inkscape:connector-curvature="0"
318 |        id="path4737"
319 |        d="m -47.279847,169.44543 v -30.4455"
320 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
321 |     <path
322 |        inkscape:connector-curvature="0"
323 |        id="path4739"
324 |        d="M -33.916367,168.51938 V 138.99993"
325 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
326 |     <path
327 |        inkscape:connector-curvature="0"
328 |        id="path4741"
329 |        d="M -20.552893,168.71782 V 138.99993"
330 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
331 |     <path
332 |        inkscape:connector-curvature="0"
333 |        id="path4743"
334 |        d="M -7.1894169,170.04076 V 138.99993"
335 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
336 |     <path
337 |        inkscape:connector-curvature="0"
338 |        id="path4745"
339 |        d="M 6.1740571,169.24699 V 138.99993"
340 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
341 |     <path
342 |        inkscape:connector-curvature="0"
343 |        id="path4747"
344 |        d="M 19.537534,170.30532 V 138.99993"
345 |        style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
346 |     <path
347 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
348 |        d="m -44.634012,169.44543 v -30.4455"
349 |        id="path4619"
350 |        inkscape:connector-curvature="0" />
351 |     <path
352 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
353 |        d="M -31.270532,168.51938 V 138.99993"
354 |        id="path4621"
355 |        inkscape:connector-curvature="0" />
356 |     <path
357 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
358 |        d="M -17.907058,168.71782 V 138.99993"
359 |        id="path4623"
360 |        inkscape:connector-curvature="0" />
361 |     <path
362 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
363 |        d="M -4.5435834,170.04076 V 138.99993"
364 |        id="path4625"
365 |        inkscape:connector-curvature="0" />
366 |     <path
367 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
368 |        d="M 8.8198909,169.24699 V 138.99993"
369 |        id="path4627"
370 |        inkscape:connector-curvature="0" />
371 |     <path
372 |        style="fill:none;stroke:#e58606;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
373 |        d="M 22.183369,170.30532 V 138.99993"
374 |        id="path4629"
375 |        inkscape:connector-curvature="0" />
376 |     <path
377 |        inkscape:connector-curvature="0"
378 |        id="path4571"
379 |        d="m -41.988177,169.44543 v -30.4455"
380 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
381 |     <path
382 |        inkscape:connector-curvature="0"
383 |        id="path4573"
384 |        d="M -28.624697,168.51938 V 138.99993"
385 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
386 |     <path
387 |        inkscape:connector-curvature="0"
388 |        id="path4575"
389 |        d="M -15.261223,168.71782 V 138.99993"
390 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
391 |     <path
392 |        inkscape:connector-curvature="0"
393 |        id="path4577"
394 |        d="M -1.8977499,170.04076 V 138.99993"
395 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
396 |     <path
397 |        inkscape:connector-curvature="0"
398 |        id="path4579"
399 |        d="M 11.465725,169.24699 V 138.99993"
400 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
401 |     <path
402 |        inkscape:connector-curvature="0"
403 |        id="path4581"
404 |        d="M 24.829204,170.30532 V 138.99993"
405 |        style="fill:none;stroke:#5d69b1;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
406 |     <flowRoot
407 |        xml:space="preserve"
408 |        id="flowRoot4603"
409 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"
410 |        transform="matrix(0.26458333,0,0,0.26458333,-2.5784079,2.2739371)"><flowRegion
411 |          id="flowRegion4605"><rect
412 |            id="rect4607"
413 |            width="392.60013"
414 |            height="248.38661"
415 |            x="-638.5174"
416 |            y="43.474766" /></flowRegion><flowPara
417 |          id="flowPara4609"
418 |          style="font-size:16px">group colours follow Plotly.qualitative.Vivid (11 colours, repeat if more than 11 groups in workflow):</flowPara><flowPara
419 |          style="font-size:16px"
420 |          id="flowPara4623">https://plotly.com/python/discrete-color/</flowPara></flowRoot>    <flowRoot
421 |        transform="matrix(0.26458333,0,0,0.26458333,11.038232,64.919766)"
422 |        style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none"
423 |        id="flowRoot4617"
424 |        xml:space="preserve"><flowRegion
425 |          id="flowRegion4613"><rect
426 |            y="43.474766"
427 |            x="-638.5174"
428 |            height="204.6568"
429 |            width="267.1853"
430 |            id="rect4611" /></flowRegion><flowPara
431 |          style="font-size:16px"
432 |          id="flowPara4619">group memeber colours follow Plotly.qualitative.Antique, then Bold, then Pastel etc</flowPara></flowRoot>  </g>
433 |   <g
434 |      inkscape:groupmode="layer"
435 |      id="layer2"
436 |      inkscape:label="squares"
437 |      style="display:none">
438 |     <g
439 |        id="g4589">
440 |       <rect
441 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
442 |          id="rect4559"
443 |          width="11.236035"
444 |          height="11.45963"
445 |          x="-50.252029"
446 |          y="113.53593" />
447 |       <rect
448 |          y="100.17245"
449 |          x="-50.252029"
450 |          height="11.45963"
451 |          width="11.236035"
452 |          id="rect4547"
453 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
454 |       <rect
455 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
456 |          id="rect4665"
457 |          width="11.236035"
458 |          height="11.45963"
459 |          x="-50.252029"
460 |          y="86.608521" />
461 |       <rect
462 |          y="86.608521"
463 |          x="-36.88855"
464 |          height="11.45963"
465 |          width="11.236035"
466 |          id="rect4667"
467 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
468 |       <rect
469 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
470 |          id="rect4669"
471 |          width="11.236035"
472 |          height="11.45963"
473 |          x="-23.525076"
474 |          y="86.608521" />
475 |       <rect
476 |          y="86.608536"
477 |          x="-10.161601"
478 |          height="11.45963"
479 |          width="11.236035"
480 |          id="rect4659"
481 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
482 |       <rect
483 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
484 |          id="rect4661"
485 |          width="11.236035"
486 |          height="11.45963"
487 |          x="3.2018731"
488 |          y="86.608521" />
489 |       <rect
490 |          y="86.608521"
491 |          x="16.565351"
492 |          height="11.45963"
493 |          width="11.236035"
494 |          id="rect4663"
495 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
496 |       <rect
497 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
498 |          id="rect4549"
499 |          width="11.236035"
500 |          height="11.45963"
501 |          x="-36.88855"
502 |          y="100.17245" />
503 |       <rect
504 |          y="100.17245"
505 |          x="-23.525078"
506 |          height="11.45963"
507 |          width="11.236035"
508 |          id="rect4551"
509 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
510 |       <rect
511 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
512 |          id="rect4553"
513 |          width="11.236035"
514 |          height="11.45963"
515 |          x="-10.161603"
516 |          y="100.17247" />
517 |       <rect
518 |          y="100.17245"
519 |          x="3.2018714"
520 |          height="11.45963"
521 |          width="11.236035"
522 |          id="rect4555"
523 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
524 |       <rect
525 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
526 |          id="rect4557"
527 |          width="11.236035"
528 |          height="11.45963"
529 |          x="16.56535"
530 |          y="100.17245" />
531 |       <rect
532 |          y="113.53593"
533 |          x="-36.88855"
534 |          height="11.45963"
535 |          width="11.236035"
536 |          id="rect4561"
537 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
538 |       <rect
539 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
540 |          id="rect4563"
541 |          width="11.236035"
542 |          height="11.45963"
543 |          x="-23.525078"
544 |          y="113.53593" />
545 |       <rect
546 |          y="113.53595"
547 |          x="-10.161602"
548 |          height="11.45963"
549 |          width="11.236035"
550 |          id="rect4565"
551 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
552 |       <rect
553 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none"
554 |          id="rect4567"
555 |          width="11.236035"
556 |          height="11.45963"
557 |          x="3.2018721"
558 |          y="113.53593" />
559 |       <rect
560 |          y="113.53593"
561 |          x="16.56535"
562 |          height="11.45963"
563 |          width="11.236035"
564 |          id="rect4569"
565 |          style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.09999999;stroke-miterlimit:4;stroke-dasharray:none" />
566 |     </g>
567 |   </g>
568 | </svg>
569 | 


--------------------------------------------------------------------------------