├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── element_idx.svg
├── environment.yml
├── matflow
├── __init__.py
├── _version.py
├── api.py
├── cli.py
├── config.py
├── errors.py
├── extensions.py
├── hicklable.py
├── models
│ ├── __init__.py
│ ├── command.py
│ ├── construction.py
│ ├── element.py
│ ├── parameters.py
│ ├── software.py
│ ├── task.py
│ └── workflow.py
├── profile.py
├── scripting.py
├── utils.py
└── validation.py
├── requirements.txt
├── setup.py
├── tests
├── __init__.py
├── test_element_idx.py
├── test_hicklable.py
├── test_task.py
└── test_workflow.py
└── workflow_viz.svg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # pipenv
86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
88 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
89 | # install all needed dependencies.
90 | #Pipfile.lock
91 |
92 | # celery beat schedule file
93 | celerybeat-schedule
94 |
95 | # SageMath parsed files
96 | *.sage.py
97 |
98 | # Environments
99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 |
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 |
111 | # Rope project settings
112 | .ropeproject
113 |
114 | # mkdocs documentation
115 | /site
116 |
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 |
122 | # Pyre type checker
123 | .pyre/
124 |
125 | # VS Code
126 | /.vscode
127 | *.code-workspace
128 |
129 | # Intellij IDEs
130 | /.idea
131 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | ## [0.2.27] - 2024.06.26
4 |
5 | ### Fixed
6 |
7 | - Fix dependencies
8 |
9 | ## [0.2.26] - 2022.03.18
10 |
11 | ### Fixed
12 |
13 | - Use `traceback` module to print full exception from an output map that generates an exception.
14 | - Bug in `scripting.get_snippet_signature` function that produces code with a syntax error.
15 |
16 | ## [0.2.25] - 2021.12.20
17 |
18 | ### Fixed
19 |
20 | - Fix bug where preparation and processing run options were ignored on workflow load.
21 | - Fix bug where archive options were ignored on workflow load.
22 |
23 | ## [0.2.24] - 2021.10.06
24 |
25 | ### Fixed
26 |
27 | - Fix bug introduced in 0.2.23, where default preparation/processing run options were ignored.
28 |
29 | ## [0.2.23] - 2021.10.06
30 |
31 | ### Fixed
32 |
33 | - Fix inability to override default (preparation/processing) run options with an empty dict
34 |
35 | ## [0.2.22] - 2021.08.14
36 |
37 | ### Added
38 |
39 | - Add support for multiple archives. Fix [#72](https://github.com/LightForm-group/matflow/issues/72).
40 |
41 | ### Fixed
42 |
43 | - Fix error message if an input mapper function has an unknown argument.
44 | - Catch and print error message from output map function failure.
45 | - Fix incorrect import key when importing from a non-trivial context that is not defined in the schema.
46 |
47 | ## [0.2.21] - 2021.06.06
48 |
49 | ### Added
50 |
51 | - Allow passing a subset of the task input parameters to the output mapper function. Resolve [#102](https://github.com/LightForm-group/matflow/issues/102).
52 | - Allow passing all iterations of an input parameter to a function mapper. Resolve [#104](https://github.com/LightForm-group/matflow/issues/104).
53 | - Allow running an on-demand archive to an existing/completed workflow: `matflow archive path/to/workflow/directory ARCHIVE_NAME`. Resolve [#68](https://github.com/LightForm-group/matflow/issues/68).
54 | - Allow specifying `default_metadata` in the `config.yml` file. Keys are merged with `metadata` specified in the workflow spec file. Resolve [#98](https://github.com/LightForm-group/matflow/issues/98).
55 |
56 | ### Fixed
57 |
58 | - Save element resource usage (e.g. run time). Fix [#97](https://github.com/LightForm-group/matflow/issues/97).
59 | - Fix bug when determining the "producing task" in an iteration pathway. Fix [#105](https://github.com/LightForm-group/matflow/issues/105).
60 | - Fix bug when a file input parameter is specified with a `$HOME` tilde: `~/path/to/file`.
61 |
62 | ## [0.2.20] - 2021.05.12
63 |
64 | ### Added
65 |
66 | - Add `Task.cleanup` attribute that can be used to optionally specify a list of glob patterns, representing file names to remove at the end of `Workflow.process_task_element`. Useful for removing very large simulation outputs that are not required after MatFlow has extracted the requested data.
67 | - Add methods to `Element` object: `get_file_lines` and `print_file_lines`, which take a file name and a slice of lines to get or print.
68 |
69 | ### Changed
70 |
71 | - Change working directory to element directory for invoking input/output/function mapper functions. This is required in some cases where a tool or script does not accept a file path as an argument.
72 | - Allow specifying the `task_idx` directly when importing parameters. This overrides any specified `context`.
73 |
74 | ### Fixed
75 |
76 | - Catch `ImportError` and `SyntaxError` when trying to load extensions.
77 | - Import from the highest task index when importing a parameter that has been through a parameter-modifying task - fix [#103](https://github.com/LightForm-group/matflow/issues/103). The can be overrode by specifying a `task_idx` directly.
78 |
79 | ## [0.2.19] - 2021.04.12 (April 2021 - Fix 1)
80 |
81 | ### Fixed
82 |
83 | - Fix type problem when input schema keys are specified "inline" in the task schema (e.g. as `CRC_file_path[file=True,save=False]`), in which the keys remain as type `str`, when they should be `bool`.
84 | - Fix problem when an imported parameter is used in a task that is iterated.
85 |
86 | ## [0.2.18] - 2021.04.10 (April 2021)
87 |
88 | ### Fixed
89 |
90 | - Fix misleading error message when a task parameter specified as a file path does not actually exist as a file.
91 | - Fix bug where if all possible dependency pathways are circularly dependent, this is not caught by MatFlow. Fix [#88](https://github.com/LightForm-group/matflow/issues/88).
92 | - Fix issue with accessing parameter data with dot-notation via their "safe names". Fix [#87](https://github.com/LightForm-group/matflow/issues/87).
93 |
94 | ### Added
95 |
96 | - Add new parameter key `ignore_dependency_from`, which is a list of task names. This allows us to exclude tasks when considering the dependencies of this parameter. Fix [#89](https://github.com/LightForm-group/matflow/issues/89).
97 | - Allow embedding file-path inputs (inputs that are text files) into the HDF5 file. Fix [#86](https://github.com/LightForm-group/matflow/issues/86).
98 | - Add `Task.unique_name` property which adds on the non-trivial `Task.context` to `Task.name`.
99 | - Tasks can be accessed from the task list via dot-notation. Fix [#90](https://github.com/LightForm-group/matflow/issues/90).
100 | - Add `Task.elements_idx` property to retrieve to correct `elements_idx` dict for that task.
101 | - Add new exception type: `ParameterImportError`.
102 | - Add ability to import parameters from existing workflows. Fix [#30](https://github.com/LightForm-group/matflow/issues/30)
103 |
104 | ### Changed
105 |
106 | - Non-trivial task contexts are now part of the task directory name to help distinguish task directories where multiple contexts are used. Fix [#50](https://github.com/LightForm-group/matflow/issues/50).
107 | - Add `context` argument to `Workflow.get_input_tasks` and `Workflow.get_output_tasks`.
108 |
109 | ## [0.2.17] - 2021.02.15
110 |
111 | ### Fixed
112 |
113 | - Fix issue [#82](https://github.com/LightForm-group/matflow/issues/82) where the default group is not defined in the `Workflow.element_idx` for tasks where no local inputs are defined.
114 |
115 | ### Added
116 |
117 | - Add support for flexible positioning of parameter-modifying tasks ([#81](https://github.com/LightForm-group/matflow/issues/81))
118 |
119 | ## [0.2.16] - 2021.02.05
120 |
121 | ### Fixed
122 |
123 | - Bump hpcflow to v0.1.13 to fix #80 and then to v0.1.14 to fix a database locking issue and a bug with choosing the correct working directories.
124 |
125 | ## [0.2.15] - 2021.01.18
126 |
127 | ### Changed
128 |
129 | - Change an Exception to a warning in `Workflow.get_element_data` to allow manually deleting element data without corrupting.
130 |
131 | ## [0.2.14] - 2021.01.17
132 |
133 | ### Added
134 |
135 | - Add method `Task.get_elements_from_iteration(iteration_idx)`.
136 |
137 | ## [0.2.13] - 2020.12.17
138 |
139 | ### Fixed
140 |
141 | - Fix bug when populating `Workflow.elements_idx` for more than two iterations.
142 |
143 | ## [0.2.12] - 2020.12.16
144 |
145 | ### Added
146 |
147 | - Add `Workflow.figures` attribute for storing associated figure definitions.
148 | - Add `Workflow.metadata` attribute for storing arbitrary metadata (will later be used for Zenodo archiving).
149 | - Add various `Workflow` static methods to help with retrieving information in the viewer without loading the whole workflow via `hickle`.
150 | - Add `get_task_schemas` to API to load the available task schemas without generating a workflow.
151 | - Add `refresh` bool parameter to `Config.set_config`, to force a reload of the configuration.
152 | - Support inputs as dependencies as well as outputs.
153 | - Support "parameter modifying" tasks (a task which outputs a parameter that is also an input to that task).
154 | - Add `iterate_run_options` to Workflow.
155 | - Add new methods for finding dependent and dependency tasks/parameters, upstream/downstream parameter values associated with a given element.
156 | - Add input option: `include_all_iterations`. If True, inputs from all iterations are passed to input map functions.
157 |
158 | ### Fixed
159 |
160 | - Only save input/output map files if they exist!
161 | - Fix bug in propagating groups correctly
162 | - Various code formatting issues
163 | - Fix failure to raise on invalid schemas.
164 | - Fix bug when the same file is to be saved from multiple output maps.
165 |
166 | ### Changed
167 | - Redo task sorting algorithm such that minimal ordering changes are made.
168 | - Set `stats` bool to False by default.
169 | - Bump hpcflow version to v0.1.12.
170 |
171 | ## [0.2.11] - 2020.09.29
172 |
173 | ### Fixed
174 |
175 | - Resolve `~` in task schema and software file paths specified in the configuration file.
176 |
177 | ## [0.2.10] - 2020.09.29
178 |
179 | ### Fixed
180 |
181 | - Fix if a function mapper function does not return anything.
182 |
183 | ## [0.2.9] - 2020.09.17
184 |
185 | ### Added
186 |
187 | - Add scripting module for generating Python source scripts.
188 | - Default run options can be specified in the MatFlow configuration file for task, preparation and processing jobs using both "sticky" and "non-sticky" keys: `default_run_options`, `default_sticky_run_options`, `default_preparation_run_options`, `default_sticky_preparation_run_options`, `default_processing_run_options` and `default_sticky_processing_run_options`. The "sticky" defaults are always applied (but workflow-specified run options take precedence), whereas the "non-sticky" defaults are only applied if a task has no workflow-specified run options.
189 |
190 | ## [0.2.8] - 2020.09.01
191 |
192 | ### Changed
193 | - Add `version_info` to `Software.__repr__` method
194 | - Validate source maps after missing schema check
195 |
196 | ### Fixed
197 | - Remove vestigial and buggy line in `construction.get_element_idx` which would lead to enormous memory usage for large sequences.
198 |
199 | ## [0.2.7] - 2020.08.18
200 |
201 | ### Added
202 | - Default values can be specified for output map options within the schema
203 | - Default values can be specified for task input parameters within the schema
204 | - Depending on the inputs defined, different commands can be run, via "command pathway" definitions in the schema implementations.
205 |
206 | ### Changed
207 |
208 | - Uses `hickle` version 4.
209 | - Group structure in workflow HDF5 file has changed (backwards-incompatible); element data is more conveniently organised for inspecting the HDF5 file manually.
210 |
211 | ### Fixed
212 |
213 | - Fix problem when a task input key includes slashes.
214 |
215 | ## [0.2.6] - 2020.07.08
216 |
217 | ### Added
218 |
219 | - Add alternate scratch feature to allow a given task to be executed within a separate temporary directory.
220 |
221 | ### Fixed
222 |
223 | - Fix bug if specifying `merge_priority` on the default group.
224 |
225 | ### Changed
226 |
227 | - Bump hpcflow to v0.1.10
228 |
229 | ## [0.2.5] - 2020.06.27
230 |
231 | ### Fixed
232 |
233 | - Fix copying of profile file to the workflow directory when the profile file path is not in the current working directory.
234 |
235 | ## [0.2.4] - 2020.06.26
236 |
237 | ### Changed
238 |
239 | - Fix dependency `hickle` version for now, until we can assess requirements for jumping to version 4.
240 |
241 | ## [0.2.3] - 2020.06.26
242 |
243 | ### Changed
244 |
245 | - Files generated by input maps are only saved into the workflow file if explicitly requested with `save: true`.
246 |
247 | ### Fixed
248 |
249 | - Fix bug in `SourcesPreparation.get_formatted_commands` that appears if there are no commands.
250 |
251 | ## [0.2.2] - 2020.06.09
252 |
253 | ### Changed
254 |
255 | - Improved Dropbox authorization flow.
256 | - Bump hpcflow to v0.1.9
257 |
258 | ## [0.2.1] - 2020.06.09
259 |
260 | ### Fixed
261 |
262 | - Fix bug in reading `default_preparation_run_options` and `default_processing_run_options` dicts from the config file.
263 |
264 | ## [0.2.0] - 2020.06.09
265 |
266 | ### Added
267 |
268 | - Add a `Workflow.history` attribute that tracks when the workflow was modified. It also stores pertinent software versions.
269 | - Add a CLI command `matflow validate` that runs through the task schema and extension validation.
270 | - Add a CLI command `matflow kill`, which kills all executing and pending tasks.
271 | - Added configuration option `prepare_process_scheduler_options` to specify scheduler options for the prepare and process tasks.
272 | - matflow profile is stored as a `dict` in addition to a string representation of the profile file (both in the `Workflow.profile` attribute).
273 |
274 | ### Changed
275 |
276 | - Module and function `jsonable.py` and `to_jsonable` renamed to `hicklable.py` and `to_hicklable`.
277 | - Workflow and Task attributes in the workflow HDF5 file are now represented without leading underscores.
278 | - Tasks with only a single element use the task directory directly instead of using an element sub-directory.
279 | - Loading extensions and configuration files has been moved from the root `__init__` to separate modules.
280 | - `make_workflow`, `submit_workflow`, `load_workflow`, `append_schema_source`, `prepend_schema_source` and `validate` can now be imported from the root level: `from matflow import make_workflow` etc.
281 | - There are no longer unsightly global variables for `TASK_INPUT_MAP` etc. This functionality has been subsumed into the global `Config` class. This is tidier and provides a better place for some validation.
282 | - Software key `sources` has been replaced by `environment`.
283 | - hpcflow configuration directory is generated within the matflow configuration directory.
284 | - Jobscript names refer to the task to which they prepare/execute/process
285 | - hpcflow profile is passed as a `dict` to hpcflow. For information, the hpcflow profile is still dumped to a file.
286 |
287 | ## [0.1.3] - 2020.05.27
288 |
289 | - New release for Zenodo archive.
290 |
291 | ## [0.1.2] - 2020.05.12
292 |
293 | - Latest dev branch merged...
294 |
295 | ## [0.1.1] - 2020.05.07
296 |
297 | ### Fixed
298 |
299 | - Added missing dependency.
300 |
301 | ## [0.1.0] - 2020.05.07
302 |
303 | Initial release.
304 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Mozilla Public License Version 2.0
2 | ==================================
3 |
4 | 1. Definitions
5 | --------------
6 |
7 | 1.1. "Contributor"
8 | means each individual or legal entity that creates, contributes to
9 | the creation of, or owns Covered Software.
10 |
11 | 1.2. "Contributor Version"
12 | means the combination of the Contributions of others (if any) used
13 | by a Contributor and that particular Contributor's Contribution.
14 |
15 | 1.3. "Contribution"
16 | means Covered Software of a particular Contributor.
17 |
18 | 1.4. "Covered Software"
19 | means Source Code Form to which the initial Contributor has attached
20 | the notice in Exhibit A, the Executable Form of such Source Code
21 | Form, and Modifications of such Source Code Form, in each case
22 | including portions thereof.
23 |
24 | 1.5. "Incompatible With Secondary Licenses"
25 | means
26 |
27 | (a) that the initial Contributor has attached the notice described
28 | in Exhibit B to the Covered Software; or
29 |
30 | (b) that the Covered Software was made available under the terms of
31 | version 1.1 or earlier of the License, but not also under the
32 | terms of a Secondary License.
33 |
34 | 1.6. "Executable Form"
35 | means any form of the work other than Source Code Form.
36 |
37 | 1.7. "Larger Work"
38 | means a work that combines Covered Software with other material, in
39 | a separate file or files, that is not Covered Software.
40 |
41 | 1.8. "License"
42 | means this document.
43 |
44 | 1.9. "Licensable"
45 | means having the right to grant, to the maximum extent possible,
46 | whether at the time of the initial grant or subsequently, any and
47 | all of the rights conveyed by this License.
48 |
49 | 1.10. "Modifications"
50 | means any of the following:
51 |
52 | (a) any file in Source Code Form that results from an addition to,
53 | deletion from, or modification of the contents of Covered
54 | Software; or
55 |
56 | (b) any new file in Source Code Form that contains any Covered
57 | Software.
58 |
59 | 1.11. "Patent Claims" of a Contributor
60 | means any patent claim(s), including without limitation, method,
61 | process, and apparatus claims, in any patent Licensable by such
62 | Contributor that would be infringed, but for the grant of the
63 | License, by the making, using, selling, offering for sale, having
64 | made, import, or transfer of either its Contributions or its
65 | Contributor Version.
66 |
67 | 1.12. "Secondary License"
68 | means either the GNU General Public License, Version 2.0, the GNU
69 | Lesser General Public License, Version 2.1, the GNU Affero General
70 | Public License, Version 3.0, or any later versions of those
71 | licenses.
72 |
73 | 1.13. "Source Code Form"
74 | means the form of the work preferred for making modifications.
75 |
76 | 1.14. "You" (or "Your")
77 | means an individual or a legal entity exercising rights under this
78 | License. For legal entities, "You" includes any entity that
79 | controls, is controlled by, or is under common control with You. For
80 | purposes of this definition, "control" means (a) the power, direct
81 | or indirect, to cause the direction or management of such entity,
82 | whether by contract or otherwise, or (b) ownership of more than
83 | fifty percent (50%) of the outstanding shares or beneficial
84 | ownership of such entity.
85 |
86 | 2. License Grants and Conditions
87 | --------------------------------
88 |
89 | 2.1. Grants
90 |
91 | Each Contributor hereby grants You a world-wide, royalty-free,
92 | non-exclusive license:
93 |
94 | (a) under intellectual property rights (other than patent or trademark)
95 | Licensable by such Contributor to use, reproduce, make available,
96 | modify, display, perform, distribute, and otherwise exploit its
97 | Contributions, either on an unmodified basis, with Modifications, or
98 | as part of a Larger Work; and
99 |
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 | for sale, have made, import, and otherwise transfer either its
102 | Contributions or its Contributor Version.
103 |
104 | 2.2. Effective Date
105 |
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 |
110 | 2.3. Limitations on Grant Scope
111 |
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 |
118 | (a) for any code that a Contributor has removed from Covered Software;
119 | or
120 |
121 | (b) for infringements caused by: (i) Your and any other third party's
122 | modifications of Covered Software, or (ii) the combination of its
123 | Contributions with other software (except as part of its Contributor
124 | Version); or
125 |
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 | its Contributions.
128 |
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 |
133 | 2.4. Subsequent Licenses
134 |
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 |
140 | 2.5. Representation
141 |
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 |
146 | 2.6. Fair Use
147 |
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 |
152 | 2.7. Conditions
153 |
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 |
157 | 3. Responsibilities
158 | -------------------
159 |
160 | 3.1. Distribution of Source Form
161 |
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 |
170 | 3.2. Distribution of Executable Form
171 |
172 | If You distribute Covered Software in Executable Form then:
173 |
174 | (a) such Covered Software must also be made available in Source Code
175 | Form, as described in Section 3.1, and You must inform recipients of
176 | the Executable Form how they can obtain a copy of such Source Code
177 | Form by reasonable means in a timely manner, at a charge no more
178 | than the cost of distribution to the recipient; and
179 |
180 | (b) You may distribute such Executable Form under the terms of this
181 | License, or sublicense it under different terms, provided that the
182 | license for the Executable Form does not attempt to limit or alter
183 | the recipients' rights in the Source Code Form under this License.
184 |
185 | 3.3. Distribution of a Larger Work
186 |
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 |
198 | 3.4. Notices
199 |
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 |
206 | 3.5. Application of Additional Terms
207 |
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 |
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 |
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 |
232 | 5. Termination
233 | --------------
234 |
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 |
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 |
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 |
261 | ************************************************************************
262 | * *
263 | * 6. Disclaimer of Warranty *
264 | * ------------------------- *
265 | * *
266 | * Covered Software is provided under this License on an "as is" *
267 | * basis, without warranty of any kind, either expressed, implied, or *
268 | * statutory, including, without limitation, warranties that the *
269 | * Covered Software is free of defects, merchantable, fit for a *
270 | * particular purpose or non-infringing. The entire risk as to the *
271 | * quality and performance of the Covered Software is with You. *
272 | * Should any Covered Software prove defective in any respect, You *
273 | * (not any Contributor) assume the cost of any necessary servicing, *
274 | * repair, or correction. This disclaimer of warranty constitutes an *
275 | * essential part of this License. No use of any Covered Software is *
276 | * authorized under this License except under this disclaimer. *
277 | * *
278 | ************************************************************************
279 |
280 | ************************************************************************
281 | * *
282 | * 7. Limitation of Liability *
283 | * -------------------------- *
284 | * *
285 | * Under no circumstances and under no legal theory, whether tort *
286 | * (including negligence), contract, or otherwise, shall any *
287 | * Contributor, or anyone who distributes Covered Software as *
288 | * permitted above, be liable to You for any direct, indirect, *
289 | * special, incidental, or consequential damages of any character *
290 | * including, without limitation, damages for lost profits, loss of *
291 | * goodwill, work stoppage, computer failure or malfunction, or any *
292 | * and all other commercial damages or losses, even if such party *
293 | * shall have been informed of the possibility of such damages. This *
294 | * limitation of liability shall not apply to liability for death or *
295 | * personal injury resulting from such party's negligence to the *
296 | * extent applicable law prohibits such limitation. Some *
297 | * jurisdictions do not allow the exclusion or limitation of *
298 | * incidental or consequential damages, so this exclusion and *
299 | * limitation may not apply to You. *
300 | * *
301 | ************************************************************************
302 |
303 | 8. Litigation
304 | -------------
305 |
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 |
313 | 9. Miscellaneous
314 | ----------------
315 |
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 |
323 | 10. Versions of the License
324 | ---------------------------
325 |
326 | 10.1. New Versions
327 |
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 |
333 | 10.2. Effect of New Versions
334 |
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 |
340 | 10.3. Modified Versions
341 |
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 |
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 |
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 |
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 |
358 | This Source Code Form is subject to the terms of the Mozilla Public
359 | License, v. 2.0. If a copy of the MPL was not distributed with this
360 | file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 |
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 |
367 | You may add additional accurate notices of copyright ownership.
368 |
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 |
372 | This Source Code Form is "Incompatible With Secondary Licenses", as
373 | defined by the Mozilla Public License, v. 2.0.
374 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://zenodo.org/badge/latestdoi/219949875) [](https://badge.fury.io/py/matflow)
2 |
3 | ## **This code has been superseded by a new version that can be found here: https://github.com/hpcflow/matflow-new.**
4 |
5 | # MatFlow
6 |
7 | MatFlow is a framework for running reproducible workflows in materials science developed in the EPSRC programme grant [LightForm](http://lightform.org.uk), a research programme on light alloy formability. It is a python program that interacts with software (open-source and proprietary) used in materials science via extensions (see supported extensions below). It is particularly suited for hybrid workflows
8 | (involving experimental data and computational work), like for example HPC model calibration. Outputs,together with details of the worflow are automatically stored in an open source file format for post-processing, which Matflow can automatically upload to data repositories like [Zenodo](https://zenodo.org/).
9 |
10 | See [this repository](https://github.com/LightForm-group/UoM-CSF-matflow) for information regarding a MatFlow installation.
11 |
12 | ## Extensions
13 |
14 | MatFlow uses extension packages to interact with arbitrary software. Here is a list of current MatFlow extensions.
15 |
16 | ### Released/in-progress extensions
17 | | Software | Description | Status | Version |
18 | | ------ | ------------- | ------- | ------- |
19 | | [DAMASK](https://damask.mpie.de/) | Düsseldorf Advanced Material Simulation Kit (crystal plasticity) | [Released](https://github.com/LightForm-group/matflow-damask) | [](https://pypi.org/project/matflow-damask) |
20 | | [MTEX](https://mtex-toolbox.github.io/) | Matlab toolbox for analyzing and modeling crystallographic textures | [Released](https://github.com/LightForm-group/matflow-mtex) | [](https://pypi.org/project/matflow-mtex) |
21 | | [formable](https://github.com/LightForm-group/formable) | Formability analyses in Python | [Released](https://github.com/LightForm-group/matflow-formable) | [](https://pypi.org/project/matflow-formable) |
22 | | [DefDAP](https://github.com/MechMicroMan/DefDAP) | A python library for correlating EBSD and HRDIC data. | [Released](https://github.com/LightForm-group/matflow-defdap) | [](https://pypi.org/project/matflow-defdap) |
23 | | [Abaqus](https://www.3ds.com/products-services/simulia/products/abaqus/) | Finite element analysis | In-progress | [](https://pypi.org/project/matflow-abaqus) |
24 | | [Neper](http://www.neper.info) | Polycrystal generation and meshing | [Released/In-progress](https://github.com/LightForm-group/matflow-neper) | [](https://pypi.org/project/matflow-neper) |
25 |
26 |
27 | ### Example inputs/outputs
28 | | Label | Attributes | Output from tasks | Input to tasks |
29 | | ----------------------- | ------------------------------------------------------------ | ----------------------------------------- | ------------------------------------------------------------ |
30 | | ODF | crystal_symmetry
speciment_symmetry
euler_angles
euler_angle_labels
weights
orientation_coordinate_system | get_model_texture
estimate_ODF
| sample_texture |
31 | | microstructure_seeds | position
**orientations**
grid_size
phase_label | generate_microstructure_seeds | generate_volume_element |
32 | | orientations | euler_angles
euler_angle_labels
orientation_coordinate_system | sample_texture | generate_volume_element |
33 | | volume_element | grid
size
origin
**orientations**
grain_orientation_idx
grain_phase_label_idx
phase_labels
voxel_grain_idx
voxel_homogenization_idx | generate_volume_element | visualise_volume_element
simulate_volume_element_loading |
34 | | load_case | total_time
num_increments
def_grad_aim
def_grad_rate
stress
rotation | generate_load_case | simulate_volume_element_loading |
35 | | volume_element_response | ... | simulate_volume_element_loading | |
36 |
37 | ## Specifying default run options
38 |
39 | Default run options (i.e. options passed to the scheduler) can be specified in a few ways. Firstly, within the workflow file, `run_options` specified at the top-level will be used for any tasks that do not have a `run_options` specified. If a task *does* have a `run_options` key specified, the global `run_options` will not be used at all for that task.
40 |
41 | Additionally, you can specify default run options in the MatFlow configuration file (`config.yml`, by default generated in `~/.matflow`) with the options `default_run_options` and `default_sticky_run_options`. The "sticky" defaults are merged with any run options specified in the workflow file (with workflow-specified options taking precedence), whereas the "non-sticky" defaults are only used if no run options are supplied for a task. If no run options are supplied for a task, then both the "sticky" and "non-sticky" defaults will be used (with the "non-sticky" defaults taking precedence over the "sticky" defaults). Similar keys exist for task preparation and processing run options: `default_preparation_run_options`, `default_sticky_preparation_run_options` and `default_processing_run_options`, `default_sticky_processing_run_options`.
42 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: matflow_env
2 | dependencies:
3 | - python
4 | - pip
5 | - pylint
6 | - ipykernel
7 | - rope
8 | - autopep8
9 | - twine
10 |
--------------------------------------------------------------------------------
/matflow/__init__.py:
--------------------------------------------------------------------------------
1 | """`matflow.__init__.py`"""
2 |
3 | from matflow._version import __version__
4 | from matflow.api import (
5 | make_workflow,
6 | submit_workflow,
7 | load_workflow,
8 | append_schema_source,
9 | prepend_schema_source,
10 | validate,
11 | get_task_schemas,
12 | )
13 |
--------------------------------------------------------------------------------
/matflow/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.27"
2 |
--------------------------------------------------------------------------------
/matflow/api.py:
--------------------------------------------------------------------------------
1 | """`matflow.api.py`
2 |
3 | This module contains the application programming interface (API) to `matflow`,
4 | and includes functions that are called by the command line interface (CLI; in
5 | `matflow.cli.py`).
6 |
7 | """
8 |
9 | import copy
10 | from pathlib import Path
11 |
12 | import pyperclip
13 | from hpcflow import kill as hpcflow_kill
14 | from hpcflow import cloud_connect as hpcflow_cloud_connect
15 |
16 | from matflow.config import Config
17 | from matflow.extensions import load_extensions
18 | from matflow.profile import parse_workflow_profile
19 | from matflow.models.workflow import Workflow
20 |
21 |
22 | def make_workflow(profile_path, directory=None, write_dirs=True):
23 | """Generate a new Workflow from a profile file.
24 |
25 | Parameters
26 | ----------
27 | profile : str or Path
28 | Path to the profile file.
29 | directory : str or Path, optional
30 | The directory in which the Workflow will be generated. By default, this
31 | is the working (i.e. invoking) directory.
32 |
33 | Returns
34 | -------
35 | workflow : Workflow
36 |
37 | """
38 |
39 | load_extensions()
40 |
41 | profile_path = Path(profile_path)
42 | workflow_dict = parse_workflow_profile(profile_path)
43 |
44 | with profile_path.open('r') as handle:
45 | profile_str = handle.read()
46 |
47 | profile = {'file': profile_str, 'parsed': copy.deepcopy(workflow_dict)}
48 |
49 | iterate_run_opts = {
50 | **Config.get('default_sticky_iterate_run_options'),
51 | **Config.get('default_iterate_run_options'),
52 | }
53 | workflow_dict.update({'iterate_run_options': iterate_run_opts})
54 |
55 | workflow = Workflow(**workflow_dict, stage_directory=directory, profile=profile)
56 | workflow.set_ids()
57 |
58 | if write_dirs:
59 | workflow.write_HDF5_file()
60 | workflow.write_directories()
61 | workflow.prepare_iteration(iteration_idx=0)
62 | workflow.dump_hpcflow_workflow_file('hpcflow_workflow.yml')
63 |
64 | # Copy profile to workflow directory:
65 | workflow.path.joinpath(profile_path.name).write_bytes(profile_path.read_bytes())
66 |
67 | # Copy workflow human_id to clipboard, if supported:
68 | try:
69 | pyperclip.copy(workflow.human_id)
70 | except:
71 | pass
72 |
73 | return workflow
74 |
75 |
76 | def submit_workflow(workflow_path, directory=None):
77 | """Generate and submit a new workflow from a profile file.
78 |
79 | Parameters
80 | ----------
81 | workflow_path : str or Path
82 | Path to either a profile file or a workflow project directory that contains a
83 | previously generated workflow HDF5 file.
84 | directory : str or Path, optional
85 | Applicable if `workflow_path` points to a profile file. The directory in which the
86 | Workflow will be generated. By default, this is the working (i.e. invoking)
87 | directory.
88 |
89 | Returns
90 | -------
91 | None
92 |
93 | """
94 |
95 | if Path(workflow_path).is_file():
96 | workflow = make_workflow(workflow_path, directory=directory, write_dirs=True)
97 | else:
98 | load_extensions()
99 | workflow = load_workflow(workflow_path)
100 |
101 | workflow.submit()
102 |
103 |
104 | def load_workflow(directory, full_path=False):
105 | Config.set_config()
106 | path = Path(directory or '').resolve()
107 | workflow = Workflow.load_HDF5_file(path, full_path)
108 |
109 | return workflow
110 |
111 |
112 | def prepare_task(task_idx, iteration_idx, directory, is_array=False):
113 | """Prepare a task (iteration) for execution by setting inputs and running input
114 | maps."""
115 |
116 | load_extensions()
117 | workflow = load_workflow(directory)
118 | workflow.prepare_task(task_idx, iteration_idx, is_array=is_array)
119 |
120 |
121 | def prepare_task_element(task_idx, element_idx, directory, is_array=False):
122 | """Prepare a task element for execution by setting inputs and running input maps."""
123 | load_extensions()
124 | workflow = load_workflow(directory)
125 | workflow.prepare_task_element(task_idx, element_idx, is_array=is_array)
126 |
127 |
128 | def process_task(task_idx, iteration_idx, directory, is_array=False):
129 | """Process a completed task (iteration) by running the output map."""
130 | load_extensions()
131 | workflow = load_workflow(directory)
132 | workflow.process_task(task_idx, iteration_idx, is_array=is_array)
133 |
134 |
135 | def process_task_element(task_idx, element_idx, directory, is_array=False):
136 | """Process a task element for execution by running output maps and saving outputs."""
137 | load_extensions()
138 | workflow = load_workflow(directory)
139 | workflow.process_task_element(task_idx, element_idx, is_array=is_array)
140 |
141 |
142 | def run_python_task(task_idx, element_idx, directory):
143 | """Run a (commandless) Python task."""
144 | load_extensions()
145 | workflow = load_workflow(directory)
146 | workflow.run_python_task(task_idx, element_idx)
147 |
148 |
149 | def prepare_sources(task_idx, iteration_idx, directory):
150 | """Prepare source files."""
151 | load_extensions()
152 | workflow = load_workflow(directory)
153 | workflow.prepare_sources(task_idx, iteration_idx)
154 |
155 |
156 | def append_schema_source(schema_source_path):
157 | """Add a task schema source file to the end of the schema source list."""
158 | Config.append_schema_source(schema_source_path)
159 |
160 |
161 | def prepend_schema_source(schema_source_path):
162 | """Add a task schema source file to the front of the schema source list."""
163 | Config.prepend_schema_source(schema_source_path)
164 |
165 |
166 | def validate():
167 | load_extensions()
168 |
169 |
170 | def kill(directory):
171 | Config.set_config()
172 | hpcflow_kill(dir_path=directory, config_dir=Config.get('hpcflow_config_dir'))
173 |
174 |
175 | def cloud_connect(provider):
176 | Config.set_config()
177 | hpcflow_cloud_connect(provider, config_dir=Config.get('hpcflow_config_dir'))
178 |
179 |
180 | def write_element_directories(iteration_idx, directory):
181 | 'Generate element directories for a given iteration.'
182 | load_extensions()
183 | workflow = load_workflow(directory)
184 | if workflow.iterate:
185 | num_iters = workflow.iterate['num_iterations']
186 | else:
187 | num_iters = workflow.num_iterations
188 | if iteration_idx < num_iters:
189 | workflow.write_element_directories(iteration_idx)
190 | workflow.prepare_iteration(iteration_idx)
191 |
192 |
193 | def archive(directory, archive):
194 | """Perform an on-demand archive of an existing workflow."""
195 | workflow = load_workflow(directory)
196 | workflow.do_archive(archive)
197 |
198 |
199 | def get_task_schemas():
200 | Config.set_config()
201 | return Config.get('task_schemas')
202 |
--------------------------------------------------------------------------------
/matflow/cli.py:
--------------------------------------------------------------------------------
1 | """`matflow.cli.py`
2 |
3 | Module that exposes a command line interface for `matflow`.
4 |
5 | """
6 | import click
7 |
8 | from matflow import __version__
9 | from matflow import api
10 |
11 |
12 | @click.group()
13 | @click.version_option(version=__version__)
14 | def cli():
15 | pass
16 |
17 |
18 | @cli.command()
19 | @click.option('--directory', '-d')
20 | @click.argument('profile', type=click.Path(exists=True))
21 | def make(profile, directory=None):
22 | """Generate a new Workflow."""
23 | print('matflow.cli.make', flush=True)
24 | api.make_workflow(profile_path=profile, directory=directory)
25 |
26 |
27 | @cli.command()
28 | @click.option('--directory', '-d')
29 | @click.argument('workflow_path', type=click.Path(exists=True))
30 | def go(workflow_path, directory=None):
31 | """Generate and submit a new Workflow."""
32 | print('matflow.cli.go', flush=True)
33 | api.submit_workflow(workflow_path, directory=directory)
34 |
35 |
36 | @cli.command()
37 | @click.option('--task-idx', '-t', type=click.INT, required=True)
38 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
39 | @click.option('--directory', '-d', type=click.Path(exists=True))
40 | @click.option('--array', is_flag=True)
41 | def prepare_task(task_idx, iteration_idx, directory=None, array=False):
42 | print('matflow.cli.prepare_task', flush=True)
43 | api.prepare_task(task_idx, iteration_idx, directory, is_array=array)
44 |
45 |
46 | @cli.command()
47 | @click.option('--task-idx', '-t', type=click.INT, required=True)
48 | @click.option('--element-idx', '-e', type=click.INT, required=True)
49 | @click.option('--directory', '-d', type=click.Path(exists=True))
50 | @click.option('--array', is_flag=True)
51 | def prepare_task_element(task_idx, element_idx, directory=None, array=False):
52 | print('matflow.cli.prepare_task_element', flush=True)
53 | api.prepare_task_element(task_idx, element_idx, directory, is_array=array)
54 |
55 |
56 | @cli.command()
57 | @click.option('--task-idx', '-t', type=click.INT, required=True)
58 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
59 | @click.option('--directory', '-d', type=click.Path(exists=True))
60 | @click.option('--array', is_flag=True)
61 | def process_task(task_idx, iteration_idx, directory=None, array=False):
62 | print('matflow.cli.process_task', flush=True)
63 | api.process_task(task_idx, iteration_idx, directory, is_array=array)
64 |
65 |
66 | @cli.command()
67 | @click.option('--task-idx', '-t', type=click.INT, required=True)
68 | @click.option('--element-idx', '-e', type=click.INT, required=True)
69 | @click.option('--directory', '-d', type=click.Path(exists=True))
70 | @click.option('--array', is_flag=True)
71 | def process_task_element(task_idx, element_idx, directory=None, array=False):
72 | print('matflow.cli.process_task_element', flush=True)
73 | api.process_task_element(task_idx, element_idx, directory, is_array=array)
74 |
75 |
76 | @cli.command()
77 | @click.option('--task-idx', '-t', type=click.INT, required=True)
78 | @click.option('--element-idx', '-e', type=click.INT, required=True)
79 | @click.option('--directory', '-d', type=click.Path(exists=True))
80 | def run_python_task(task_idx, element_idx, directory=None):
81 | print('matflow.cli.run_python_task', flush=True)
82 | api.run_python_task(task_idx, element_idx, directory)
83 |
84 |
85 | @cli.command()
86 | @click.option('--task-idx', '-t', type=click.INT, required=True)
87 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
88 | @click.option('--directory', '-d', type=click.Path(exists=True))
89 | def prepare_sources(task_idx, iteration_idx, directory=None):
90 | print('matflow.cli.prepare_sources', flush=True)
91 | api.prepare_sources(task_idx, iteration_idx, directory)
92 |
93 |
94 | @cli.command()
95 | @click.argument('schema_source_path', type=click.Path(exists=True))
96 | def append_schema_source(schema_source_path):
97 | api.append_schema_source(schema_source_path)
98 |
99 |
100 | @cli.command()
101 | @click.argument('schema_source_path', type=click.Path(exists=True))
102 | def prepend_schema_source(schema_source_path):
103 | api.prepend_schema_source(schema_source_path)
104 |
105 |
106 | @cli.command()
107 | def validate():
108 | """Load and validate task schemas against available extensions."""
109 | api.validate()
110 |
111 |
112 | @cli.command()
113 | @click.option('--provider', '-p', required=True)
114 | def cloud_connect(provider):
115 | api.cloud_connect(provider)
116 |
117 |
118 | @cli.command()
119 | @click.argument('directory', type=click.Path(exists=True))
120 | def kill(directory):
121 | """Kill all pending and executing tasks."""
122 | api.kill(directory)
123 |
124 |
125 | @cli.command()
126 | @click.option('--iteration-idx', '-i', type=click.INT, required=True)
127 | @click.option('--directory', '-d', type=click.Path(exists=True))
128 | def write_element_directories(iteration_idx, directory=None):
129 | api.write_element_directories(iteration_idx, directory)
130 |
131 |
132 | @cli.command()
133 | @click.argument('directory', type=click.Path(exists=True))
134 | @click.argument('archive')
135 | def archive(directory, archive):
136 | api.archive(directory, archive)
137 |
138 |
139 | if __name__ == '__main__':
140 | cli()
141 |
--------------------------------------------------------------------------------
/matflow/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | from warnings import warn
4 |
5 | from ruamel.yaml import YAML, safe_load
6 |
7 |
8 | from matflow.errors import ConfigurationError, MatflowExtensionError
9 | from matflow.models.task import TaskSchema
10 | from matflow.models.software import SoftwareInstance
11 |
12 |
13 | class Config(object):
14 |
15 | __ALLOWED_CONFIG = [
16 | 'task_schema_sources',
17 | 'software_sources',
18 | 'default_run_options',
19 | 'default_preparation_run_options',
20 | 'default_processing_run_options',
21 | 'default_iterate_run_options',
22 | 'default_sticky_run_options',
23 | 'default_sticky_preparation_run_options',
24 | 'default_sticky_processing_run_options',
25 | 'default_sticky_iterate_run_options',
26 | 'parallel_modes',
27 | 'archive_locations',
28 | 'default_metadata',
29 | ]
30 |
31 | __conf = {}
32 |
33 | _is_set = False
34 | _is_extension_locked = True
35 |
36 | @staticmethod
37 | def append_schema_source(schema_source_path, config_dir=None):
38 | yaml = YAML(typ='rt')
39 | config_dat, config_file = Config.get_config_file(config_dir=config_dir)
40 | config_dat['task_schema_sources'].append(str(schema_source_path))
41 | yaml.dump(config_dat, config_file)
42 |
43 | @staticmethod
44 | def prepend_schema_source(schema_source_path, config_dir=None):
45 | yaml = YAML(typ='rt')
46 | config_dat, config_file = Config.get_config_file(config_dir=config_dir)
47 | config_dat['task_schema_sources'] = (
48 | str(schema_source_path) + config_dat['task_schema_sources']
49 | )
50 | yaml.dump(config_dat, config_file)
51 |
52 | @staticmethod
53 | def resolve_config_dir(config_dir=None):
54 |
55 | if not config_dir:
56 | config_dir = Path(os.getenv('MATFLOW_CONFIG_DIR', '~/.matflow')).expanduser()
57 | else:
58 | config_dir = Path(config_dir)
59 |
60 | if Config._is_set:
61 | if config_dir != Config.get('config_dir'):
62 | warn(f'Config is already set, but `config_dir` changed from '
63 | f'"{Config.get("config_dir")}" to "{config_dir}".')
64 |
65 | if not config_dir.is_dir():
66 | print('Configuration directory does not exist. Generating.')
67 | config_dir.mkdir()
68 |
69 | return config_dir
70 |
71 | @staticmethod
72 | def get_config_file(config_dir):
73 |
74 | yaml = YAML()
75 | config_file = config_dir.joinpath('config.yml')
76 | def_schema_file = config_dir.joinpath('task_schemas.yml')
77 | def_software_file = config_dir.joinpath('software.yml')
78 | if not config_file.is_file():
79 | print('No config.yml found. Generating a config.yml file.')
80 | def_config = {
81 | 'task_schema_sources': [str(def_schema_file)],
82 | 'software_sources': [str(def_software_file)],
83 | 'parallel_modes': {
84 | 'MPI': {'command': 'mpirun -np <>'},
85 | 'OpenMP': {'env': 'export OMP_NUM_THREADS=<>'},
86 | }
87 | }
88 | yaml.dump(def_config, config_file)
89 |
90 | if not def_schema_file.is_file():
91 | print('Generating a default task schema file.')
92 | yaml.dump([], def_schema_file)
93 |
94 | if not def_software_file.is_file():
95 | print('Generating a default software file.')
96 | yaml.dump({}, def_software_file)
97 |
98 | print(f'Loading matflow config from {config_file}')
99 | with config_file.open() as handle:
100 | config_dat = safe_load(handle)
101 | bad_keys = list(set(config_dat.keys()) - set(Config.__ALLOWED_CONFIG))
102 | if bad_keys:
103 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
104 | raise ConfigurationError(f'Unknown configuration options: {bad_keys_fmt}.')
105 |
106 | if 'task_schema_sources' not in config_dat:
107 | msg = (f'Missing `task_schema_sources` from configuration file: '
108 | f'{config_file}.')
109 | raise ConfigurationError(msg)
110 |
111 | if 'software_sources' not in config_dat:
112 | msg = f'Missing `software_sources` from configuration file: {config_file}'
113 | raise ConfigurationError(msg)
114 |
115 | return config_dat, config_file
116 |
117 | @staticmethod
118 | def set_config(config_dir=None, raise_on_set=False, refresh=False):
119 | """Load configuration from a YAML file."""
120 |
121 | config_dir = Config.resolve_config_dir(config_dir)
122 |
123 | if Config._is_set:
124 | if raise_on_set:
125 | raise ConfigurationError('Configuration is already set.')
126 | elif not refresh:
127 | return
128 |
129 | config_dat, _ = Config.get_config_file(config_dir)
130 | schema_sources = [Path(i).expanduser() for i in config_dat['task_schema_sources']]
131 | software_sources = [Path(i).expanduser() for i in config_dat['software_sources']]
132 |
133 | # Validate parallel_modes:
134 | ALLOWED_PARA_MODES = ['MPI', 'OpenMP']
135 | ALLOWED_PARA_MODES_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_MODES])
136 | ALLOWED_PARA_CONFIGS = ['env', 'command']
137 | ALLOWED_PARA_CONFIGS_FMT = ', '.join([f'{i!r}' for i in ALLOWED_PARA_CONFIGS])
138 | para_modes = {}
139 | for name, mode_config in config_dat.get('parallel_modes', {}).items():
140 | if name.lower() not in [i.lower() for i in ALLOWED_PARA_MODES]:
141 | msg = (f'Parallel mode "{name}" not known. Allowed parallel modes are '
142 | f'{ALLOWED_PARA_MODES_FMT}.')
143 | raise ConfigurationError(msg)
144 | if not mode_config:
145 | msg = (f'Specify at least one of {ALLOWED_PARA_CONFIGS_FMT} for parallel '
146 | f'mode configuration: "{name}".')
147 | raise ConfigurationError(msg)
148 | bad_keys = set(mode_config.keys()) - set(ALLOWED_PARA_CONFIGS)
149 | if bad_keys:
150 | bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys])
151 | msg = (f'Unknown parallel mode configuration keys: {bad_keys_fmt} for '
152 | f'mode "{name}".')
153 | raise ConfigurationError(msg)
154 |
155 | if 'env' in mode_config:
156 | # Split into list of lines:
157 | mode_config['env'] = mode_config['env'].splitlines()
158 |
159 | # Update to be lowercase:
160 | para_modes.update({name.lower(): mode_config})
161 |
162 | # Load task_schemas list from all specified task schema files:
163 | task_schema_dicts = {}
164 | yaml = YAML(typ='safe')
165 | for task_schema_file in schema_sources[::-1]:
166 | if not task_schema_file.is_file():
167 | msg = f'Task schema source is not a file: "{task_schema_file}".'
168 | raise ConfigurationError(msg)
169 | for i in yaml.load(task_schema_file):
170 | if 'name' not in i:
171 | raise ValueError('Task schema definition is missing a "name" key.')
172 | # Overwrite any task schema with the same name (hence we order files in
173 | # reverse so e.g. the first task schema file takes precedence):
174 | task_schema_dicts.update({i['name']: i})
175 |
176 | # Convert to lists:
177 | task_schema_dicts = [v for k, v in task_schema_dicts.items()]
178 |
179 | # Load and validate self-consistency of task schemas:
180 | print(f'Loading task schemas from {len(schema_sources)} file(s)...', end='')
181 | try:
182 | task_schemas = TaskSchema.load_from_hierarchy(task_schema_dicts)
183 | except Exception as err:
184 | print('Failed.')
185 | raise err
186 | print('OK!')
187 |
188 | print(f'Loading software definitions from {len(software_sources)} '
189 | f'file(s)...', end='')
190 | software = {}
191 | for software_file in software_sources:
192 | if not software_file.is_file():
193 | msg = f'Software source is not a file: "{software_file}".'
194 | raise ConfigurationError(msg)
195 | try:
196 | soft_loaded = SoftwareInstance.load_multiple(yaml.load(software_file))
197 | except Exception as err:
198 | print(f'\nFailed to load software definitions from: "{software_file}".')
199 | raise err
200 |
201 | # Combine software instances from multiple software source files:
202 | for soft_name, instances in soft_loaded.items():
203 | if soft_name in software:
204 | software[soft_name].extend(instances)
205 | else:
206 | software.update({soft_name: instances})
207 | print('OK!')
208 |
209 | archive_locs = config_dat.get('archive_locations', {})
210 | for arch_name, arch in archive_locs.items():
211 | ALLOWED_ARCH_KEYS = ['path', 'cloud_provider']
212 | if 'path' not in arch:
213 | msg = f'Missing `path` for archive location "{arch_name}".'
214 | raise ConfigurationError(msg)
215 | bad_keys = set(arch.keys()) - set(ALLOWED_ARCH_KEYS)
216 | if bad_keys:
217 | bad_keys_fmt = ', '.join([f'{i!r}' for i in bad_keys])
218 | msg = (f'Unknown archive location keys for archive "{arch_name}": '
219 | f'{bad_keys_fmt}')
220 | raise ConfigurationError(msg)
221 |
222 | ALLOWED_CLOUD_PROVIDERS = ['dropbox']
223 | cloud_provider = arch.get('cloud_provider')
224 | if cloud_provider and cloud_provider not in ALLOWED_CLOUD_PROVIDERS:
225 | msg = (f'Unsupported cloud provider for archive "{arch_name}": '
226 | f'"{cloud_provider}". Supported cloud providers are: '
227 | f'{ALLOWED_CLOUD_PROVIDERS}.')
228 | raise ConfigurationError(msg)
229 |
230 | Config.__conf['config_dir'] = config_dir
231 |
232 | for i in [
233 | 'default_run_options',
234 | 'default_preparation_run_options',
235 | 'default_processing_run_options',
236 | 'default_iterate_run_options',
237 | 'default_sticky_run_options',
238 | 'default_sticky_preparation_run_options',
239 | 'default_sticky_processing_run_options',
240 | 'default_sticky_iterate_run_options',
241 | 'default_metadata',
242 | ]:
243 | Config.__conf[i] = config_dat.get(i, {})
244 |
245 | hpcflow_config_dir = config_dir.joinpath('.hpcflow')
246 | Config.__conf['hpcflow_config_dir'] = hpcflow_config_dir
247 | Config.__conf['software'] = software
248 | Config.__conf['task_schemas'] = task_schemas
249 | Config.__conf['parallel_modes'] = para_modes
250 | Config.__conf['archive_locations'] = archive_locs
251 |
252 | Config.__conf['input_maps'] = {}
253 | Config.__conf['output_maps'] = {}
254 | Config.__conf['func_maps'] = {}
255 | Config.__conf['CLI_arg_maps'] = {}
256 | Config.__conf['sources_maps'] = {}
257 | Config.__conf['output_file_maps'] = {}
258 | Config.__conf['software_versions'] = {}
259 | Config.__conf['extension_info'] = {}
260 | Config.__conf['schema_validity'] = {}
261 |
262 | Config._is_set = True
263 |
264 | @staticmethod
265 | def get(name):
266 | if not Config._is_set:
267 | raise ConfigurationError('Configuration is not yet set.')
268 | return Config.__conf[name]
269 |
270 | @staticmethod
271 | def lock_extensions():
272 | Config._is_extension_locked = True
273 |
274 | @staticmethod
275 | def unlock_extensions():
276 | Config._is_extension_locked = False
277 |
278 | @staticmethod
279 | def _get_software_safe(software_name):
280 | return SoftwareInstance.get_software_safe(software_name)
281 |
282 | @staticmethod
283 | def _get_key_safe(key):
284 | return key[0], key[1], Config._get_software_safe(key[2])
285 |
286 | @staticmethod
287 | def _validate_extension_setter():
288 | if not Config._is_set:
289 | warn(f'Configuration is not yet set. Matflow extension functions will not '
290 | 'be mapped to task schemas unless matflow is loaded.')
291 | return False
292 | if Config._is_extension_locked:
293 | msg = 'Configuration is locked against modifying extension data.'
294 | raise ConfigurationError(msg)
295 | return True
296 |
297 | @staticmethod
298 | def set_input_map(key, input_file, func):
299 | if Config._validate_extension_setter():
300 | key = Config._get_key_safe(key)
301 | if key not in Config.__conf['input_maps']:
302 | Config.__conf['input_maps'].update({key: {}})
303 | if input_file in Config.__conf['input_maps'][key]:
304 | msg = f'Input file name "{input_file}" already exists in the input map.'
305 | raise MatflowExtensionError(msg)
306 | Config.__conf['input_maps'][key][input_file] = func
307 |
308 | @staticmethod
309 | def set_output_map(key, output_name, func):
310 | if Config._validate_extension_setter():
311 | key = Config._get_key_safe(key)
312 | if key not in Config.__conf['output_maps']:
313 | Config.__conf['output_maps'].update({key: {}})
314 | if output_name in Config.__conf['output_maps'][key]:
315 | msg = f'Output name "{output_name}" already exists in the output map.'
316 | raise MatflowExtensionError(msg)
317 | Config.__conf['output_maps'][key][output_name] = func
318 |
319 | @staticmethod
320 | def set_func_map(key, func):
321 | if Config._validate_extension_setter():
322 | key = Config._get_key_safe(key)
323 | if key in Config.__conf['func_maps']:
324 | msg = f'Function map "{key}" already exists in the function map.'
325 | raise MatflowExtensionError(msg)
326 | Config.__conf['func_maps'][key] = func
327 |
328 | @staticmethod
329 | def set_CLI_arg_map(key, input_name, func):
330 | if Config._validate_extension_setter():
331 | key = Config._get_key_safe(key)
332 | if key not in Config.__conf['CLI_arg_maps']:
333 | Config.__conf['CLI_arg_maps'].update({key: {}})
334 | if input_name in Config.__conf['CLI_arg_maps'][key]:
335 | msg = (f'Input name "{input_name}" already exists in the CLI formatter '
336 | f'map.')
337 | raise MatflowExtensionError(msg)
338 | Config.__conf['CLI_arg_maps'][key][input_name] = func
339 |
340 | @staticmethod
341 | def set_source_map(key, func, **sources_dict):
342 | if Config._validate_extension_setter():
343 | key = Config._get_key_safe(key)
344 | if key in Config.__conf['sources_maps']:
345 | msg = f'Sources map for key: {key} already exists in.'
346 | raise MatflowExtensionError(msg)
347 | Config.__conf['sources_maps'].update({
348 | key: {'func': func, 'sources': sources_dict}
349 | })
350 |
351 | @staticmethod
352 | def set_software_version_func(software, func):
353 | if Config._validate_extension_setter():
354 | software = Config._get_software_safe(software)
355 | if software in Config.__conf['software_versions']:
356 | msg = (f'Software "{software}" has already registered a '
357 | f'`software_versions` function.')
358 | raise MatflowExtensionError(msg)
359 | Config.__conf['software_versions'][software] = func
360 |
361 | @staticmethod
362 | def set_output_file_map(key, file_reference, file_name):
363 | if Config._validate_extension_setter():
364 | key = Config._get_key_safe(key)
365 | if key not in Config.__conf['output_file_maps']:
366 | Config.__conf['output_file_maps'].update({key: {}})
367 | file_ref_full = '__file__' + file_reference
368 | if file_ref_full in Config.__conf['output_file_maps'][key]:
369 | msg = f'File name "{file_name}" already exists in the output files map.'
370 | raise MatflowExtensionError(msg)
371 | Config.__conf['output_file_maps'][key].update({file_ref_full: file_name})
372 |
373 | @staticmethod
374 | def set_extension_info(name, info):
375 | if Config._validate_extension_setter():
376 | if name in Config.__conf['extension_info']:
377 | msg = f'Extension with name "{name}" already loaded.'
378 | raise MatflowExtensionError(msg)
379 | Config.__conf['extension_info'][name] = info
380 |
381 | @staticmethod
382 | def set_schema_validities(validities):
383 | if Config._validate_extension_setter():
384 | Config.__conf['schema_validity'].update(validities)
385 |
386 | @staticmethod
387 | def unload_extension(software_name):
388 |
389 | name = Config._get_software_safe(software_name)
390 |
391 | in_map = [k for k in Config.__conf['input_maps'] if k[2] == name]
392 | for k in in_map:
393 | del Config.__conf['input_maps'][k]
394 |
395 | out_map = [k for k in Config.__conf['output_maps'] if k[2] == name]
396 | for k in out_map:
397 | del Config.__conf['output_maps'][k]
398 |
399 | func_map = [k for k in Config.__conf['func_maps'] if k[2] == name]
400 | for k in func_map:
401 | del Config.__conf['func_maps'][k]
402 |
403 | CLI_map = [k for k in Config.__conf['CLI_arg_maps'] if k[2] == name]
404 | for k in CLI_map:
405 | del Config.__conf['CLI_arg_maps'][k]
406 |
407 | out_file_map = [k for k in Config.__conf['output_file_maps'] if k[2] == name]
408 | for k in out_file_map:
409 | del Config.__conf['output_file_maps'][k]
410 |
411 | soft_vers = [k for k in Config.__conf['software_versions'] if k == name]
412 | for k in soft_vers:
413 | del Config.__conf['software_versions'][k]
414 |
415 | ext_info = [k for k in Config.__conf['extension_info'] if k == name]
416 | for k in ext_info:
417 | del Config.__conf['extension_info'][k]
418 |
419 | schema_valid = [k for k in Config.__conf['schema_validity'] if k[2] == name]
420 | for k in schema_valid:
421 | del Config.__conf['schema_validity'][k]
422 |
423 | source_map = [k for k in Config.__conf['sources_maps'] if k[2] == name]
424 | for k in source_map:
425 | del Config.__conf['sources_maps'][k]
426 |
--------------------------------------------------------------------------------
/matflow/errors.py:
--------------------------------------------------------------------------------
1 | class IncompatibleWorkflow(Exception):
2 | pass
3 |
4 |
5 | class IncompatibleTaskNesting(IncompatibleWorkflow):
6 | pass
7 |
8 |
9 | class MissingMergePriority(IncompatibleTaskNesting):
10 | pass
11 |
12 |
13 | class IncompatibleSequence(Exception):
14 | """For task sequence definitions that are not logically consistent."""
15 |
16 |
17 | class SequenceError(Exception):
18 | """For malformed sequence definitions."""
19 |
20 |
21 | class TaskError(Exception):
22 | """For malformed task definitions."""
23 |
24 |
25 | class TaskSchemaError(Exception):
26 | """For nonsensical task schema definitions."""
27 |
28 |
29 | class TaskParameterError(Exception):
30 | """For incorrectly parametrised tasks."""
31 |
32 |
33 | class ProfileError(Exception):
34 | """For malformed profile file data."""
35 |
36 |
37 | class MissingSoftware(Exception):
38 | """For specified software that cannot be satisfied."""
39 |
40 |
41 | class WorkflowPersistenceError(Exception):
42 | """For problems related to saving and loading the persistent HDF5 files."""
43 |
44 |
45 | class UnsatisfiedGroupParameter(Exception):
46 | """For when an input has a group, but that group does not exist in the Workflow."""
47 |
48 |
49 | class MatflowExtensionError(Exception):
50 | """For problems when loading extensions."""
51 |
52 |
53 | class MissingSchemaError(Exception):
54 | """For when a suitable schema does not exist."""
55 |
56 |
57 | class UnsatisfiedSchemaError(Exception):
58 | """For when a suitable extension function cannot be found for a task schema."""
59 |
60 |
61 | class TaskElementExecutionError(Exception):
62 | """For when the execution of an task element fails."""
63 |
64 |
65 | class ConfigurationError(Exception):
66 | """For malformed configuration files."""
67 |
68 |
69 | class SoftwareInstanceError(Exception):
70 | """For malformed SoftwareInstance definitions."""
71 | pass
72 |
73 |
74 | class MissingSoftwareSourcesError(Exception):
75 | """For when a software instance requires source variables, but none are forthcoming."""
76 |
77 |
78 | class UnexpectedSourceMapReturnError(Exception):
79 | """For when a source map function does not return the expected dict."""
80 |
81 |
82 | class CommandError(Exception):
83 | """For problems with command groups and commands."""
84 |
85 |
86 | class WorkflowIterationError(Exception):
87 | """For issues with resolving requested iterations."""
88 |
89 |
90 | class ParameterImportError(Exception):
91 | """For issues with importing parameters from pre-existing workflows."""
92 |
--------------------------------------------------------------------------------
/matflow/extensions.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import pkg_resources
3 | import warnings
4 |
5 | from matflow.config import Config
6 | from matflow.validation import validate_task_schemas
7 |
8 |
9 | def load_extensions():
10 |
11 | Config.set_config(raise_on_set=True)
12 | Config.unlock_extensions()
13 |
14 | extensions_entries = pkg_resources.iter_entry_points('matflow.extension')
15 | if extensions_entries:
16 | print('Loading extensions...')
17 | for entry_point in extensions_entries:
18 |
19 | print(f' "{entry_point.name}"...', end='', flush=True)
20 |
21 | try:
22 | loaded = entry_point.load()
23 | except (ImportError, SyntaxError) as ex:
24 | print(f'Failed: {ex!r}', flush=True)
25 | continue
26 |
27 | unload = False
28 |
29 | if not hasattr(loaded, 'SOFTWARE'):
30 | print('Failed.', flush=True)
31 | warnings.warn(f'Matflow extension "{entry_point.module_name}" has no '
32 | f'`SOFTWARE` attribute. This extension will not be loaded.')
33 | unload = True
34 |
35 | if not hasattr(loaded, '__version__'):
36 | print('Failed.', flush=True)
37 | warnings.warn(f'Matflow extension "{entry_point.module_name}" has no '
38 | f'`__version__` attribute. This extension will not be '
39 | f'loaded.')
40 | unload = True
41 |
42 | software_safe = Config._get_software_safe(loaded.SOFTWARE)
43 |
44 | if (
45 | not unload and
46 | Config.get('software_versions').get(software_safe) is None
47 | ):
48 |
49 | # Every defined SoftwareInstance must have a specified version_info:
50 | version_defined = True
51 | soft_instances = Config.get('software').get(software_safe)
52 | if not soft_instances:
53 | version_defined = False
54 | else:
55 | for i in soft_instances:
56 | if i.version_info is None:
57 | version_defined = False
58 | break
59 |
60 | if not version_defined:
61 | print('Failed.', flush=True)
62 | msg = (f'Matflow extension "{entry_point.module_name}" does not '
63 | f'register a function for getting software versions and one '
64 | f'or more of its software instance definitions do not '
65 | f'specify `version_info`. This extension will not be loaded.')
66 | warnings.warn(msg)
67 | unload = True
68 |
69 | if unload:
70 | Config.unload_extension(software_safe)
71 | continue
72 |
73 | Config.set_extension_info(
74 | entry_point.name,
75 | {'module_name': entry_point.module_name, 'version': loaded.__version__},
76 | )
77 | print(f'(software: "{software_safe}") from '
78 | f'{entry_point.module_name} (version {loaded.__version__})', flush=True)
79 |
80 | # Validate task schemas against loaded extensions:
81 | print('Validating task schemas against loaded extensions...', end='')
82 | try:
83 | Config.set_schema_validities(
84 | validate_task_schemas(
85 | Config.get('task_schemas'),
86 | Config.get('input_maps'),
87 | Config.get('output_maps'),
88 | Config.get('func_maps'),
89 | )
90 | )
91 | except Exception as err:
92 | print('Failed.', flush=True)
93 | raise err
94 |
95 | schema_validity = Config.get('schema_validity')
96 | schema_invalids = [(k, v[1]) for k, v in schema_validity.items() if not v[0]]
97 | num_valid = sum([i[0] for i in schema_validity.values()])
98 | num_total = len(schema_validity)
99 | print(f'OK! {num_valid}/{num_total} schemas are valid.', flush=True)
100 | if schema_invalids:
101 | sch_invalids_fmt = '\n '.join([f'{i[0]}: {i[1]}' for i in schema_invalids])
102 | msg = f'The following schemas are invalid:\n {sch_invalids_fmt}\n'
103 | print(msg, flush=True)
104 |
105 | else:
106 | print('No extensions found.')
107 |
108 | Config.lock_extensions()
109 |
110 |
111 | def input_mapper(input_file, task, method, software):
112 | """Function decorator for adding input maps from extensions."""
113 | def _input_mapper(func):
114 | @functools.wraps(func)
115 | def func_wrap(*args, **kwargs):
116 | return func(*args, **kwargs)
117 | key = (task, method, software)
118 | Config.set_input_map(key, input_file, func_wrap)
119 | return func_wrap
120 | return _input_mapper
121 |
122 |
123 | def output_mapper(output_name, task, method, software):
124 | """Function decorator for adding output maps from extensions."""
125 | def _output_mapper(func):
126 | @functools.wraps(func)
127 | def func_wrap(*args, **kwargs):
128 | return func(*args, **kwargs)
129 | key = (task, method, software)
130 | Config.set_output_map(key, output_name, func_wrap)
131 | return func_wrap
132 | return _output_mapper
133 |
134 |
135 | def func_mapper(task, method, software):
136 | """Function decorator for adding function maps from extensions."""
137 | def _func_mapper(func):
138 | @functools.wraps(func)
139 | def func_wrap(*args, **kwargs):
140 | return func(*args, **kwargs)
141 | key = (task, method, software)
142 | Config.set_func_map(key, func_wrap)
143 | return func_wrap
144 | return _func_mapper
145 |
146 |
147 | def cli_format_mapper(input_name, task, method, software):
148 | """Function decorator for adding CLI arg formatter functions from extensions."""
149 | def _cli_format_mapper(func):
150 | @functools.wraps(func)
151 | def func_wrap(*args, **kwargs):
152 | return func(*args, **kwargs)
153 | key = (task, method, software)
154 | Config.set_CLI_arg_map(key, input_name, func_wrap)
155 | return func_wrap
156 | return _cli_format_mapper
157 |
158 |
159 | def software_versions(software):
160 | """Function decorator to register an extension function as the function that returns
161 | a dict of pertinent software versions for that extension."""
162 | def _software_versions(func):
163 | @functools.wraps(func)
164 | def func_wrap(*args, **kwargs):
165 | return func(*args, **kwargs)
166 | Config.set_software_version_func(software, func_wrap)
167 | return func_wrap
168 | return _software_versions
169 |
170 |
171 | def sources_mapper(task, method, software, **sources_dict):
172 | """Function decorator to register an extension function that generate task source
173 | files."""
174 | def _sources_mapper(func):
175 | @functools.wraps(func)
176 | def func_wrap(*args, **kwargs):
177 | return func(*args, **kwargs)
178 | key = (task, method, software)
179 | Config.set_source_map(key, func_wrap, **sources_dict)
180 | return func_wrap
181 | return _sources_mapper
182 |
183 |
184 | def register_output_file(file_reference, file_name, task, method, software):
185 | key = (task, method, software)
186 | Config.set_output_file_map(key, file_reference, file_name)
187 |
--------------------------------------------------------------------------------
/matflow/hicklable.py:
--------------------------------------------------------------------------------
1 | """`matflow.hicklable.py`"""
2 |
3 | import numpy as np
4 |
5 | HICKLABLE_PRIMITIVES = (
6 | int,
7 | float,
8 | str,
9 | np.ndarray,
10 | np.int32,
11 | np.int64,
12 | type(None),
13 | )
14 |
15 |
16 | def to_hicklable(obj):
17 | """Get an object representation that can be saved to an HDF5 file using `hickle`.
18 |
19 | Parameters
20 | ----------
21 | obj : object
22 | Object whose hicklable representation is to be returned.
23 |
24 | """
25 |
26 | if isinstance(obj, (list, tuple, set)):
27 | obj_valid = []
28 | for item in obj:
29 | obj_valid.append(to_hicklable(item))
30 | if isinstance(obj, tuple):
31 | obj_valid = tuple(obj_valid)
32 | elif isinstance(obj, set):
33 | obj_valid = set(obj_valid)
34 |
35 | elif isinstance(obj, dict):
36 | obj_valid = {}
37 | for dct_key, dct_val in obj.items():
38 | obj_valid.update({dct_key: to_hicklable(dct_val)})
39 |
40 | elif isinstance(obj, HICKLABLE_PRIMITIVES):
41 | obj_valid = obj
42 |
43 | else:
44 | all_attrs = {}
45 | if hasattr(obj, '__dict__'):
46 | all_attrs.update(getattr(obj, '__dict__'))
47 | if hasattr(obj, '__slots__'):
48 | all_attrs.update({k: getattr(obj, k) for k in getattr(obj, '__slots__')
49 | if k != '__dict__'})
50 | if not hasattr(obj, '__dict__') and not hasattr(obj, '__slots__'):
51 | raise ValueError(f'Object not understood: {obj}.')
52 |
53 | obj_valid = {}
54 | for attr, value in all_attrs.items():
55 | obj_valid.update({attr: to_hicklable(value)})
56 |
57 | return obj_valid
58 |
--------------------------------------------------------------------------------
/matflow/models/__init__.py:
--------------------------------------------------------------------------------
1 | """`matflow.models.__init__.py`"""
2 |
3 | from matflow.models.command import Command, CommandGroup
4 | from matflow.models.task import Task, TaskSchema
5 |
--------------------------------------------------------------------------------
/matflow/models/command.py:
--------------------------------------------------------------------------------
1 | """`matflow.models.command.py`
2 |
3 | Module containing functionality for executing commands.
4 |
5 | """
6 |
7 | import copy
8 |
9 | import numpy as np
10 |
11 | from matflow.errors import CommandError
12 | from matflow.utils import dump_to_yaml_string
13 | from matflow.hicklable import to_hicklable
14 |
15 |
16 | def list_formatter(lst):
17 | return ' '.join([f'{i}' for i in lst])
18 |
19 |
20 | DEFAULT_FORMATTERS = {
21 | str: lambda x: x,
22 | int: lambda number: str(number),
23 | float: lambda number: f'{number:.6f}',
24 | list: list_formatter,
25 | set: list_formatter,
26 | tuple: list_formatter,
27 | }
28 |
29 |
30 | class CommandGroup(object):
31 | """Class to represent a group of commands."""
32 |
33 | def __init__(self, commands, command_files=None, command_pathways=None):
34 | """
35 | Parameters
36 | ----------
37 | all_commands : list of Command objects
38 | command_files : dict, optional
39 | command_pathways : list of dict, optional
40 |
41 | """
42 |
43 | self.commands = [Command(**i) for i in commands]
44 | self.command_files = command_files or {}
45 | self.command_pathways = command_pathways or []
46 |
47 | self._validate_command_pathways()
48 | self.resolve_command_pathways()
49 |
50 | @property
51 | def all_commands(self):
52 | return self.commands
53 |
54 | def __repr__(self):
55 | out = f'{self.__class__.__name__}(commands=['
56 | out += ', '.join([f'{i!r}' for i in self.all_commands]) + ']'
57 | out += ')'
58 | return out
59 |
60 | def __str__(self):
61 | return dump_to_yaml_string(self.as_dict())
62 |
63 | def as_dict(self):
64 | return to_hicklable(self)
65 |
66 | def check_pathway_conditions(self, inputs_list):
67 | """Check the command pathway conditions are compatible with a list of schema
68 | inputs.
69 |
70 | Parameters
71 | ----------
72 | inputs_list : list of str
73 |
74 | """
75 |
76 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
77 | condition = cmd_pth.get('condition')
78 | if condition:
79 | bad_keys = set(condition) - set(inputs_list)
80 | if bad_keys:
81 | bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys])
82 | msg = ((f'Unknown command pathway condition inputs for command '
83 | f'pathway index {cmd_pth_idx}: {bad_keys_fmt}.'))
84 | raise CommandError(msg)
85 |
86 | def _validate_command_pathways(self):
87 |
88 | if not self.command_pathways:
89 | self.command_pathways = [
90 | {'commands_idx': list(range(len(self.all_commands)))}
91 | ]
92 |
93 | req_keys = ['commands_idx']
94 | allowed_keys = req_keys + ['condition', 'commands']
95 |
96 | # Check the condition list is a list of input labels for this task (have to be invoked by schema)
97 | no_condition_count = 0
98 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
99 |
100 | bad_keys = set(cmd_pth) - set(allowed_keys)
101 | miss_keys = set(req_keys) - set(cmd_pth)
102 |
103 | if bad_keys:
104 | bad_keys_fmt = ', '.join(['"{}"'.format(i) for i in bad_keys])
105 | msg = ((f'Unknown command pathway keys for command pathway index '
106 | f'{cmd_pth_idx}: {bad_keys_fmt}.'))
107 | raise CommandError(msg)
108 |
109 | if miss_keys:
110 | miss_keys_fmt = ', '.join(['"{}"'.format(i) for i in miss_keys])
111 | msg = (f'Missing required command pathway keys for command pathway '
112 | f'index {cmd_pth_idx}: {miss_keys_fmt}.')
113 | raise CommandError(msg)
114 |
115 | if 'condition' not in cmd_pth:
116 | no_condition_count += 1
117 |
118 | cmds_idx = cmd_pth['commands_idx']
119 | if (
120 | not isinstance(cmds_idx, list) or
121 | not all([i in range(len(self.all_commands)) for i in cmds_idx])
122 | ):
123 | msg = (f'`commands_idx` must be a list of integer indices into '
124 | f'`all_commands`.')
125 | raise CommandError(msg)
126 |
127 | if no_condition_count > 1:
128 | msg = (f'Only one command pathway may be specified without a `condition` key '
129 | f'(the default command pathway).')
130 | raise CommandError(msg)
131 |
132 | def resolve_command_pathways(self):
133 | """Add a `commands` list to each `commands_pathway`, according to its
134 | `commands_idx`."""
135 |
136 | for cmd_pth_idx, cmd_pth in enumerate(self.command_pathways):
137 | commands = [copy.deepcopy(self.all_commands[i])
138 | for i in cmd_pth['commands_idx']]
139 | cmd_pth.update({'commands': commands})
140 | self.resolve_command_files(cmd_pth_idx)
141 |
142 | def resolve_command_files(self, cmd_pathway_idx):
143 |
144 | # Validate command_files dict first:
145 | for cmd_fn_label, cmd_fn in self.command_files.items():
146 | if not isinstance(cmd_fn, str) or '<>' not in cmd_fn:
147 | msg = ('`command_files` must be a dict that maps a command file label to '
148 | 'a file name template that must include the substring "<>", '
149 | 'which is substituted by increasing integers.')
150 | raise CommandError(msg)
151 |
152 | file_names = self.get_command_file_names(cmd_pathway_idx)
153 |
154 | for cmd_idx, command in enumerate(self.get_commands(cmd_pathway_idx)):
155 |
156 | for opt_idx, opt in enumerate(command.options):
157 | for opt_token_idx, opt_token in enumerate(opt):
158 | options_files = file_names['all_commands'][cmd_idx]['options']
159 | for cmd_fn_label, cmd_fn in options_files.items():
160 | if f'<<{cmd_fn_label}>>' in opt_token:
161 | new_fmt_opt = opt_token.replace(f'<<{cmd_fn_label}>>', cmd_fn)
162 | command.options[opt_idx][opt_token_idx] = new_fmt_opt
163 |
164 | for param_idx, param in enumerate(command.parameters):
165 | params_files = file_names['all_commands'][cmd_idx]['parameters']
166 | for cmd_fn_label, cmd_fn in params_files.items():
167 | if f'<<{cmd_fn_label}>>' in param:
168 | new_param = param.replace(f'<<{cmd_fn_label}>>', cmd_fn)
169 | command.parameters[param_idx] = new_param
170 |
171 | if command.stdin:
172 | stdin_files = file_names['all_commands'][cmd_idx]['stdin']
173 | for cmd_fn_label, cmd_fn in stdin_files.items():
174 | if f'<<{cmd_fn_label}>>' in command.stdin:
175 | new_stdin = command.stdin.replace(f'<<{cmd_fn_label}>>', cmd_fn)
176 | command.stdin = new_stdin
177 |
178 | if command.stdout:
179 | new_stdout = command.stdout
180 | stdout_files = file_names['all_commands'][cmd_idx]['stdout']
181 | for cmd_fn_label, cmd_fn in stdout_files.items():
182 | if f'<<{cmd_fn_label}>>' in command.stdout:
183 | new_stdout = command.stdout.replace(f'<<{cmd_fn_label}>>', cmd_fn)
184 | command.stdout = new_stdout
185 |
186 | if command.stderr:
187 | stderr_files = file_names['all_commands'][cmd_idx]['stderr']
188 | for cmd_fn_label, cmd_fn in stderr_files.items():
189 | if f'<<{cmd_fn_label}>>' in command.stderr:
190 | new_stderr = command.stderr.replace(f'<<{cmd_fn_label}>>', cmd_fn)
191 | command.stderr = new_stderr
192 |
193 | def get_commands(self, cmd_pathway_idx):
194 | return self.command_pathways[cmd_pathway_idx]['commands']
195 |
196 | def select_command_pathway(self, inputs):
197 | """Get the correct command pathway index, give a set of input names and values.
198 |
199 | Parameters
200 | ----------
201 | inputs : dict of (str: list)
202 | Dict whose keys are input names and whose values are lists of input values
203 | (i.e. one element for each task sequence item).
204 |
205 | Returns
206 | -------
207 | cmd_pathway_idx : int
208 |
209 | """
210 |
211 | # Consider an input defined if any of its values (in the sequence) are not `None`:
212 | inputs_defined = [k for k, v in inputs.items() if any([i is not None for i in v])]
213 |
214 | # Sort pathways by most-specific first:
215 | order_idx = np.argsort([len(i.get('condition', []))
216 | for i in self.command_pathways])[::-1]
217 |
218 | cmd_pathway_idx = None
219 | for cmd_pth_idx in order_idx:
220 | cmd_pth = self.command_pathways[cmd_pth_idx]
221 | condition = cmd_pth.get('condition', [])
222 | if not (set(condition) - set(inputs_defined)):
223 | # All inputs named in condition are defined
224 | cmd_pathway_idx = cmd_pth_idx
225 | break
226 |
227 | if cmd_pathway_idx is None:
228 | raise CommandError('Could not find suitable command pathway.')
229 |
230 | return cmd_pathway_idx
231 |
232 | def get_command_file_names(self, cmd_pathway_idx):
233 |
234 | out = {
235 | 'input_map': {},
236 | 'output_map': {},
237 | 'all_commands': [],
238 | }
239 |
240 | file_name_increments = {k: 0 for k in self.command_files.keys()}
241 |
242 | # Input map should use the first increment:
243 | for cmd_fn_label in self.command_files.keys():
244 | new_fn = self.command_files[cmd_fn_label].replace(
245 | '<>',
246 | str(file_name_increments[cmd_fn_label]),
247 | )
248 | out['input_map'].update({cmd_fn_label: new_fn})
249 |
250 | for command in self.get_commands(cmd_pathway_idx):
251 |
252 | file_names_i = {
253 | 'stdin': {},
254 | 'options': {},
255 | 'parameters': {},
256 | 'stdout': {},
257 | 'stderr': {},
258 | }
259 |
260 | cmd_fn_is_incremented = {k: False for k in self.command_files.keys()}
261 | for cmd_fn_label in self.command_files.keys():
262 |
263 | for opt in command.options_raw:
264 | fmt_opt = list(opt)
265 | for opt_token in fmt_opt:
266 | if f'<<{cmd_fn_label}>>' in opt_token:
267 | new_fn = self.command_files[cmd_fn_label].replace(
268 | '<>',
269 | str(file_name_increments[cmd_fn_label]),
270 | )
271 | file_names_i['stdin'].update({cmd_fn_label: new_fn})
272 |
273 | for param in command.parameters_raw:
274 | if f'<<{cmd_fn_label}>>' in param:
275 | new_fn = self.command_files[cmd_fn_label].replace(
276 | '<>',
277 | str(file_name_increments[cmd_fn_label]),
278 | )
279 | file_names_i['parameters'].update({cmd_fn_label: new_fn})
280 |
281 | if command.stdin_raw:
282 | if f'<<{cmd_fn_label}>>' in command.stdin_raw:
283 | new_fn = self.command_files[cmd_fn_label].replace(
284 | '<>',
285 | str(file_name_increments[cmd_fn_label]),
286 | )
287 | file_names_i['stdin'].update({cmd_fn_label: new_fn})
288 |
289 | if command.stdout_raw:
290 | if f'<<{cmd_fn_label}>>' in command.stdout_raw:
291 | file_name_increments[cmd_fn_label] += 1
292 | cmd_fn_is_incremented[cmd_fn_label] = True
293 | new_fn = self.command_files[cmd_fn_label].replace(
294 | '<>',
295 | str(file_name_increments[cmd_fn_label]),
296 | )
297 | file_names_i['stdout'].update({cmd_fn_label: new_fn})
298 |
299 | if command.stderr_raw:
300 | if f'<<{cmd_fn_label}>>' in command.stderr_raw:
301 | if not cmd_fn_is_incremented[cmd_fn_label]:
302 | file_name_increments[cmd_fn_label] += 1
303 | new_fn = self.command_files[cmd_fn_label].replace(
304 | '<>',
305 | str(file_name_increments[cmd_fn_label]),
306 | )
307 |
308 | if not cmd_fn_is_incremented[cmd_fn_label]:
309 | cmd_fn_is_incremented[cmd_fn_label] = True
310 | file_names_i['stderr'].update({cmd_fn_label: new_fn})
311 |
312 | out['all_commands'].append(file_names_i)
313 |
314 | # Output map should use the final increment:
315 | for cmd_fn_label in self.command_files.keys():
316 | new_fn = self.command_files[cmd_fn_label].replace(
317 | '<>',
318 | str(file_name_increments[cmd_fn_label]),
319 | )
320 | out['output_map'].update({cmd_fn_label: new_fn})
321 |
322 | return out
323 |
324 | def get_formatted_commands(self, inputs_list, num_cores, cmd_pathway_idx):
325 | """Format commands into strings with hpcflow variable substitutions where
326 | required.
327 |
328 | Parameters
329 | ----------
330 | inputs_list : list of str
331 | List of input names from which a subset of hpcflow variables may be defined.
332 | num_cores : int
333 | Number of CPU cores to use for this task. This is required to determine
334 | whether a "parallel_mode" should be included in the formatted commands.
335 | cmd_pathway_idx : int
336 | Which command pathway should be returned.
337 |
338 | Returns
339 | -------
340 | tuple of (fmt_commands, var_names)
341 | fmt_commands : list of dict
342 | Each list item is a dict that contains keys corresponding to an individual
343 | command to be run.
344 | var_names : dict of (str, str)
345 | A dict that maps a parameter name to an hpcflow variable name.
346 |
347 | """
348 |
349 | fmt_commands = []
350 |
351 | var_names = {}
352 | for command in self.get_commands(cmd_pathway_idx):
353 |
354 | fmt_opts = []
355 | for opt in command.options:
356 | fmt_opt = list(opt)
357 | for opt_token_idx, opt_token in enumerate(fmt_opt):
358 | if opt_token in inputs_list:
359 | # Replace with an `hpcflow` variable:
360 | var_name = 'matflow_input_{}'.format(opt_token)
361 | fmt_opt[opt_token_idx] = '<<{}>>'.format(var_name)
362 | if opt_token not in var_names:
363 | var_names.update({opt_token: var_name})
364 |
365 | fmt_opt_joined = ' '.join(fmt_opt)
366 | fmt_opts.append(fmt_opt_joined)
367 |
368 | fmt_params = []
369 | for param in command.parameters:
370 |
371 | fmt_param = param
372 | if param in inputs_list:
373 | # Replace with an `hpcflow` variable:
374 | var_name = 'matflow_input_{}'.format(param)
375 | fmt_param = '<<{}>>'.format(var_name)
376 |
377 | if param not in var_names:
378 | var_names.update({param: var_name})
379 |
380 | fmt_params.append(fmt_param)
381 |
382 | cmd_fmt = ' '.join([command.command] + fmt_opts + fmt_params)
383 |
384 | if command.stdin:
385 | cmd_fmt += ' < {}'.format(command.stdin)
386 |
387 | if command.stdout:
388 | cmd_fmt += ' >> {}'.format(command.stdout)
389 |
390 | if command.stderr:
391 | if command.stderr == command.stdout:
392 | cmd_fmt += ' 2>&1'
393 | else:
394 | cmd_fmt += ' 2>> {}'.format(command.stderr)
395 |
396 | cmd_dict = {'line': cmd_fmt}
397 | if command.parallel_mode and num_cores > 1:
398 | cmd_dict.update({'parallel_mode': command.parallel_mode})
399 |
400 | fmt_commands.append(cmd_dict)
401 |
402 | return fmt_commands, var_names
403 |
404 |
405 | class Command(object):
406 | """Class to represent a command to be executed by a shell."""
407 |
408 | def __init__(self, command, options=None, parameters=None, stdin=None, stdout=None,
409 | stderr=None, parallel_mode=None):
410 |
411 | self.command = command
412 | self.parallel_mode = parallel_mode
413 |
414 | # Raw versions may include command file name variables:
415 | self.options_raw = options or []
416 | self.parameters_raw = parameters or []
417 | self.stdin_raw = stdin
418 | self.stdout_raw = stdout
419 | self.stderr_raw = stderr
420 |
421 | # Non-raw versions modified by the parent CommandGroup to include any resolved
422 | # command file name:
423 | self.options = copy.deepcopy(self.options_raw)
424 | self.parameters = copy.deepcopy(self.parameters_raw)
425 | self.stdin = self.stdin_raw
426 | self.stdout = self.stdout_raw
427 | self.stderr = self.stderr_raw
428 |
429 | def __repr__(self):
430 | out = f'{self.__class__.__name__}({self.command!r}'
431 | if self.options:
432 | out += f', options={self.options!r}'
433 | if self.parameters:
434 | out += f', parameters={self.parameters!r}'
435 | if self.stdin:
436 | out += f', stdin={self.stdin!r}'
437 | if self.stdout:
438 | out += f', stdout={self.stdout!r}'
439 | if self.stderr:
440 | out += f', stderr={self.stderr!r}'
441 | out += ')'
442 | return out
443 |
444 | def __str__(self):
445 |
446 | cmd_fmt = ' '.join(
447 | [self.command] +
448 | [' '.join(i) for i in self.options] +
449 | self.parameters
450 | )
451 |
452 | if self.stdin:
453 | cmd_fmt += ' < {}'.format(self.stdin)
454 | if self.stdout:
455 | cmd_fmt += ' > {}'.format(self.stdout)
456 | if self.stderr:
457 | if self.stderr == self.stdout:
458 | cmd_fmt += ' 2>&1'
459 | else:
460 | cmd_fmt += ' 2> {}'.format(self.stderr)
461 |
462 | return cmd_fmt
463 |
--------------------------------------------------------------------------------
/matflow/models/element.py:
--------------------------------------------------------------------------------
1 | """matflow.models.element.py"""
2 |
3 | import copy
4 |
5 | import hickle
6 | import h5py
7 |
8 | from matflow.models.parameters import Parameters, Files
9 |
10 |
11 | class Element(object):
12 |
13 | __slots__ = [
14 | '_task',
15 | '_element_idx',
16 | '_inputs',
17 | '_outputs',
18 | '_files',
19 | '_resource_usage',
20 | ]
21 |
22 | def __init__(self, task, element_idx, inputs_data_idx=None, outputs_data_idx=None,
23 | files_data_idx=None, resource_usage=None):
24 |
25 | self._task = task
26 | self._element_idx = element_idx
27 | self._resource_usage = resource_usage
28 |
29 | self._inputs = Parameters(self, inputs_data_idx)
30 | self._outputs = Parameters(self, outputs_data_idx)
31 | self._files = Files(self, files_data_idx)
32 |
33 | def __repr__(self):
34 | out = (
35 | f'{self.__class__.__name__}('
36 | f'inputs={self.inputs!r}, '
37 | f'outputs={self.outputs!r}, '
38 | f'files={self.files!r}'
39 | f')'
40 | )
41 | return out
42 |
43 | @property
44 | def task(self):
45 | return self._task
46 |
47 | @property
48 | def element_idx(self):
49 | return self._element_idx
50 |
51 | @property
52 | def resource_usage(self):
53 | return self._resource_usage
54 |
55 | def as_dict(self):
56 | """Return attributes dict with preceding underscores removed."""
57 | self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__}
58 | self_dict.pop('task')
59 | self_dict['inputs_data_idx'] = self_dict.pop('inputs').as_dict()
60 | self_dict['outputs_data_idx'] = self_dict.pop('outputs').as_dict()
61 | self_dict['files_data_idx'] = self_dict.pop('files').as_dict()
62 | return self_dict
63 |
64 | def get_parameter_data_idx(self, parameter_name):
65 | try:
66 | out = self.outputs.get_data_idx(parameter_name)
67 | except KeyError:
68 | out = self.inputs.get_data_idx(parameter_name)
69 |
70 | return out
71 |
72 | def get_input_data_idx(self, input_name, safe_name=False):
73 | return self.inputs.get_data_idx(input_name, safe_name)
74 |
75 | def get_output_data_idx(self, output_name, safe_name=False):
76 | return self.outputs.get_data_idx(output_name, safe_name)
77 |
78 | def get_file_data_idx(self, file_name, safe_name=False):
79 | return self.files.get_data_idx(file_name, safe_name)
80 |
81 | def get_input(self, input_name, safe_name=False):
82 | if not safe_name:
83 | input_name = self.inputs.get_name_map()[input_name]
84 | return getattr(self.inputs, input_name)
85 |
86 | def get_output(self, output_name, safe_name=False):
87 | if not safe_name:
88 | output_name = self.outputs.get_name_map()[output_name]
89 | return getattr(self.outputs, output_name)
90 |
91 | def get_file(self, file_name, safe_name=False):
92 | if not safe_name:
93 | file_name = self.files.get_name_map()[file_name]
94 | return getattr(self.files, file_name)
95 |
96 | def get_file_lines(self, file_name, lines_slice=(10,), safe_name=False):
97 | return self.files.get_lines(file_name, lines_slice, safe_name)
98 |
99 | def print_file_lines(self, file_name, lines_slice=(10,), safe_name=False):
100 | self.files.print_lines(file_name, lines_slice, safe_name)
101 |
102 | @property
103 | def inputs(self):
104 | return self._inputs
105 |
106 | @property
107 | def outputs(self):
108 | return self._outputs
109 |
110 | @property
111 | def files(self):
112 | return self._files
113 |
114 | @property
115 | def HDF5_path(self):
116 | return self.task.HDF5_path + f'/\'elements\'/data/data_{self.element_idx}'
117 |
118 | def add_input(self, input_name, value=None, data_idx=None):
119 | return self.inputs.add_parameter(input_name, 'inputs', value, data_idx)
120 |
121 | def add_output(self, output_name, value=None, data_idx=None):
122 | return self.outputs.add_parameter(output_name, 'outputs', value, data_idx)
123 |
124 | def add_file(self, file_name, value=None, data_idx=None):
125 | return self.files.add_parameter(file_name, 'files', value, data_idx)
126 |
127 | def add_resource_usage(self, resource_usage):
128 |
129 | with h5py.File(self.task.workflow.loaded_path, 'r+') as handle:
130 |
131 | # Load and save attributes of parameter index dict:
132 | path = self.HDF5_path + "/'resource_usage'"
133 | attributes = dict(handle[path].attrs)
134 | del handle[path]
135 |
136 | # Dump resource usage:
137 | hickle.dump(resource_usage, handle, path=path)
138 |
139 | # Update dict attributes to maintain /workflow_obj loadability
140 | for k, v in attributes.items():
141 | handle[path].attrs[k] = v
142 |
143 | def get_element_dependencies(self, recurse=False):
144 | """Get the task/element indices of elements that a given element depends on.
145 |
146 | Parameters
147 | ----------
148 | recurse : bool, optional
149 | If False, only include task/element indices that are direct dependencies of
150 | the given element. If True, also include task/element indices that indirect
151 | dependencies of the given element.
152 |
153 | Returns
154 | -------
155 | dict of (int : list)
156 | Dict whose keys are task indices and whose values are lists of element indices
157 | for a given task.
158 |
159 | Notes
160 | -----
161 | For the inverse, see `get_dependent_elements`.
162 |
163 | """
164 |
165 | task = self.task
166 | workflow = task.workflow
167 | elem_deps = {}
168 | for inp_alias, ins in workflow.elements_idx[task.task_idx]['inputs'].items():
169 | if ins['task_idx'][self.element_idx] is not None:
170 | dep_elem_idx = ins['element_idx'][self.element_idx]
171 | # (maybe not needed)
172 | if ins['task_idx'][self.element_idx] not in elem_deps:
173 | elem_deps.update({ins['task_idx'][self.element_idx]: []})
174 | elem_deps[ins['task_idx'][self.element_idx]].extend(dep_elem_idx)
175 |
176 | if recurse:
177 | new_elem_deps = copy.deepcopy(elem_deps)
178 | for task_idx, element_idx in elem_deps.items():
179 | for element_idx_i in element_idx:
180 | element_i = workflow.tasks[task_idx].elements[element_idx_i]
181 | add_elem_deps = element_i.get_element_dependencies(recurse=True)
182 | for k, v in add_elem_deps.items():
183 | if k not in new_elem_deps:
184 | new_elem_deps.update({k: []})
185 | new_elem_deps[k].extend(v)
186 |
187 | elem_deps = new_elem_deps
188 |
189 | # Remove repeats:
190 | for k, v in elem_deps.items():
191 | elem_deps[k] = list(set(v))
192 |
193 | return elem_deps
194 |
195 | def get_dependent_elements(self, recurse=False):
196 | """Get the task/element indices of elements that depend on a given element.
197 |
198 | Parameters
199 | ----------
200 | recurse : bool, optional
201 | If False, only include task/element indices that depend directly on the given
202 | element. If True, also include task/element indices that depend indirectly on
203 | the given element.
204 |
205 | Returns
206 | -------
207 | dict of (int : list)
208 | Dict whose keys are task indices and whose values are lists of element indices
209 | for a given task.
210 |
211 | Notes
212 | -----
213 | For the inverse, see `get_element_dependencies`.
214 |
215 | """
216 |
217 | task = self.task
218 | workflow = task.workflow
219 | dep_elems = {}
220 |
221 | for task_idx, elems_idx in enumerate(workflow.elements_idx):
222 | for inp_alias, ins in elems_idx['inputs'].items():
223 | if ins.get('task_idx') == task.task_idx:
224 | for element_idx, i in enumerate(ins['element_idx']):
225 | if self.element_idx in i:
226 | if task_idx not in dep_elems:
227 | dep_elems.update({task_idx: []})
228 | dep_elems[task_idx].append(element_idx)
229 |
230 | if recurse:
231 | new_dep_elems = copy.deepcopy(dep_elems)
232 | for task_idx, element_idx in dep_elems.items():
233 | for element_idx_i in element_idx:
234 | element_i = workflow.tasks[task_idx].elements[element_idx_i]
235 | add_elem_deps = element_i.get_dependent_elements(recurse=True)
236 | for k, v in add_elem_deps.items():
237 | if k not in new_dep_elems:
238 | new_dep_elems.update({k: []})
239 | new_dep_elems[k].extend(v)
240 |
241 | dep_elems = new_dep_elems
242 |
243 | # Remove repeats:
244 | for k, v in dep_elems.items():
245 | dep_elems[k] = list(set(v))
246 |
247 | return dep_elems
248 |
249 | def get_parameter_dependency_value(self, parameter_dependency_name):
250 |
251 | workflow = self.task.workflow
252 |
253 | in_tasks = workflow.get_input_tasks(parameter_dependency_name)
254 | out_tasks = workflow.get_output_tasks(parameter_dependency_name)
255 | elem_deps = self.get_element_dependencies(recurse=True)
256 |
257 | if parameter_dependency_name in self.task.schema.input_names:
258 | param_vals = [self.get_input(parameter_dependency_name)]
259 |
260 | elif out_tasks:
261 | elems = []
262 | out_tasks_valid = set(out_tasks) & set(elem_deps)
263 | if not out_tasks_valid:
264 | msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of '
265 | f'given element of task "{self.task.name}".')
266 | raise ValueError(msg)
267 | for task_idx in out_tasks_valid:
268 | for i in elem_deps[task_idx]:
269 | elems.append(workflow.tasks[task_idx].elements[i])
270 | param_vals = [elem.get_output(parameter_dependency_name) for elem in elems]
271 |
272 | elif in_tasks:
273 | elems = []
274 | in_tasks_valid = set(in_tasks) & set(elem_deps)
275 | if not in_tasks_valid:
276 | msg = (f'Parameter "{parameter_dependency_name}" is not a dependency of '
277 | f'given element of task "{self.task.name}".')
278 | raise ValueError(msg)
279 | for task_idx in in_tasks_valid:
280 | for i in elem_deps[task_idx]:
281 | elems.append(workflow.tasks[task_idx].elements[i])
282 | param_vals = [elem.get_input(parameter_dependency_name) for elem in elems]
283 | else:
284 | msg = (f'Parameter "{parameter_dependency_name}" is not an input or output '
285 | f'parameter for any workflow task.')
286 | raise ValueError(msg)
287 |
288 | if len(param_vals) == 1:
289 | param_vals = param_vals[0]
290 |
291 | return param_vals
292 |
293 | def get_dependent_parameter_value(self, dependent_parameter_name):
294 |
295 | workflow = self.task.workflow
296 |
297 | out_tasks = workflow.get_output_tasks(dependent_parameter_name)
298 | dep_elems = self.get_dependent_elements(recurse=True)
299 |
300 | if dependent_parameter_name in self.task.schema.outputs:
301 | param_vals = [self.get_output(dependent_parameter_name)]
302 |
303 | elif out_tasks:
304 | elems = []
305 | out_tasks_valid = set(out_tasks) & set(dep_elems)
306 | if not out_tasks_valid:
307 | msg = (f'Parameter "{dependent_parameter_name}" does not depend on the '
308 | f'given element of task "{self.task.name}".')
309 | raise ValueError(msg)
310 | for task_idx in out_tasks_valid:
311 | for i in dep_elems[task_idx]:
312 | elems.append(workflow.tasks[task_idx].elements[i])
313 | param_vals = [elem.get_output(dependent_parameter_name) for elem in elems]
314 | else:
315 | msg = (f'Parameter "{dependent_parameter_name}" is not an output parameter '
316 | f'for any workflow task.')
317 | raise ValueError(msg)
318 |
319 | if len(param_vals) == 1:
320 | param_vals = param_vals[0]
321 |
322 | return param_vals
323 |
--------------------------------------------------------------------------------
/matflow/models/parameters.py:
--------------------------------------------------------------------------------
1 | """matflow.models.parameters.py"""
2 |
3 | import re
4 | import keyword
5 |
6 | import h5py
7 | import hickle
8 |
9 | from matflow.utils import zeropad
10 |
11 |
12 | class Parameters(object):
13 |
14 | """
15 | Attributes
16 | ----------
17 | _element : Element
18 | _parameters : dict
19 | Dict mapping the safe names of the parameters to their data indices within the
20 | HDF5 element_idx group.
21 | _name_map : dict
22 | Dict mapping the non-safe names of the parameters to their safe names. A safe name
23 | refers to a name that can be used as a variable name within Python. For example,
24 | spaces and dots are removed from non-safe names to become safe names. The reason
25 | for doing this is to allow the use of dot-notation to access element data/files.
26 |
27 | """
28 |
29 | def __init__(self, element, parameters):
30 |
31 | self._element = element
32 | self._parameters, self._name_map = self._normalise_params_dict(parameters)
33 |
34 | def __getattr__(self, safe_name):
35 | if safe_name in self._parameters:
36 | wkflow = self._element.task.workflow
37 | names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()}
38 | name = names_inv[safe_name]
39 | data_idx = self.get_data_idx(name)
40 | return wkflow.get_element_data(data_idx)
41 | else:
42 | msg = f'{self.__class__.__name__!r} object has no attribute {safe_name!r}.'
43 | raise AttributeError(msg)
44 |
45 | def __setattr__(self, name, value):
46 | if name in ['_element', '_parameters', '_name_map']:
47 | super().__setattr__(name, value)
48 | else:
49 | raise AttributeError
50 |
51 | def __dir__(self):
52 | return super().__dir__() + list(self._parameters.keys())
53 |
54 | def __repr__(self):
55 | names_fmt = ', '.join([f'{i!r}' for i in self._parameters.keys()])
56 | out = f'{self.__class__.__name__}({names_fmt})'
57 | return out
58 |
59 | def _normalise_params_dict(self, parameters):
60 |
61 | normed_data_idx = {}
62 | name_map = {}
63 | for name, v in (parameters or {}).items():
64 | safe_name = self._normalise_param_name(name, normed_data_idx.keys())
65 | normed_data_idx.update({safe_name: v})
66 | name_map.update({name: safe_name})
67 |
68 | return normed_data_idx, name_map
69 |
70 | @staticmethod
71 | def get_element_data_key(element_idx, param_name):
72 | return f'{zeropad(element_idx, 1000)}_{param_name}'
73 |
74 | @staticmethod
75 | def _normalise_param_name(param_name, existing_names):
76 | """Transform a string so that it is a valid Python variable name."""
77 | param_name_old = param_name
78 | safe_name = param_name.replace('.', '_dot_').replace(' ', '_').replace('-', '_')
79 | if (
80 | re.match(r'\d', safe_name) or
81 | safe_name in dir(Parameters) or
82 | keyword.iskeyword(safe_name) or
83 | safe_name in existing_names
84 | ):
85 | safe_name = 'param_' + safe_name
86 |
87 | if re.search(r'[^a-zA-Z0-9_]', safe_name) or not safe_name:
88 | raise ValueError(f'Invalid parameter name: "{param_name_old}".')
89 |
90 | return safe_name
91 |
92 | def as_dict(self):
93 | return self.get_parameters(safe_names=False)
94 |
95 | def get_parameters(self, safe_names=True):
96 | if not safe_names:
97 | names_inv = {safe: non_safe for non_safe, safe in self._name_map.items()}
98 | return {names_inv[safe_name]: v for safe_name, v in self._parameters.items()}
99 | return self._parameters
100 |
101 | def get(self, name, safe_name=False):
102 | if not safe_name:
103 | name = self._name_map[name]
104 | return getattr(self, name)
105 |
106 | def get_all(self, safe_names=False):
107 | return {
108 | k: self.get(k, safe_names)
109 | for k in (self._parameters if safe_names else self._name_map).keys()
110 | }
111 |
112 | def get_element(self):
113 | """Not a property to reduce chance of attribute collisions."""
114 | return self._element
115 |
116 | def get_name_map(self):
117 | """Not a property to reduce chance of attribute collisions."""
118 | return self._name_map
119 |
120 | def get_data_idx(self, name, safe_name=False):
121 | if not safe_name:
122 | name = self._name_map[name]
123 | out = self._parameters[name]
124 | if isinstance(out, list):
125 | out = tuple(out)
126 | return out
127 |
128 | def add_parameter(self, name, param_type, value=None, data_idx=None):
129 |
130 | if name in self._name_map:
131 | raise ValueError(f'Parameter "{name}" already exists.')
132 |
133 | safe_name = self._normalise_param_name(name, self._parameters.keys())
134 | loaded_path = self._element.task.workflow.loaded_path
135 |
136 | with h5py.File(loaded_path, 'r+') as handle:
137 |
138 | if data_idx is None:
139 | # Add data to the `element_data` group if required:
140 | path = '/element_data'
141 | next_idx = len(handle[path])
142 | element_data_key = self.get_element_data_key(next_idx, name)
143 | new_group = handle[path].create_group(element_data_key)
144 | hickle.dump(value, handle, path=new_group.name)
145 | data_idx = next_idx
146 |
147 | # Load and save attributes of parameter index dict:
148 | path = self._element.HDF5_path + f"/'{param_type}_data_idx'"
149 | attributes = dict(handle[path].attrs)
150 | param_index = hickle.load(handle, path=path)
151 | del handle[path]
152 |
153 | # Update and re-dump parameter index dict:
154 | param_index.update({name: data_idx})
155 | hickle.dump(param_index, handle, path=path)
156 |
157 | # Update parameter index dict attributes to maintain /workflow_obj loadability
158 | for k, v in attributes.items():
159 | handle[path].attrs[k] = v
160 |
161 | self._name_map.update({name: safe_name})
162 | self._parameters.update({safe_name: data_idx})
163 |
164 | return data_idx
165 |
166 |
167 | class Files(Parameters):
168 |
169 | def get_lines(self, file_name, lines_slice=(1, 10), safe_name=False):
170 |
171 | if not safe_name:
172 | file_name = self.get_name_map()[file_name]
173 |
174 | if not isinstance(lines_slice, slice):
175 | if isinstance(lines_slice, int):
176 | lines_slice = (lines_slice,)
177 | lines_slice = slice(*lines_slice)
178 |
179 | return getattr(self, file_name).split('\n')[lines_slice]
180 |
181 | def print_lines(self, file_name, lines_slice=(1, 10), safe_name=False):
182 |
183 | lns = self.get_lines(file_name, lines_slice, safe_name)
184 | print('\n'.join(lns))
185 |
--------------------------------------------------------------------------------
/matflow/models/software.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import socket
3 |
4 | from matflow.errors import SoftwareInstanceError, MissingSoftwareSourcesError
5 | from matflow.utils import extract_variable_names
6 |
7 |
8 | class SourcesPreparation(object):
9 |
10 | __slots__ = ['_commands', '_env']
11 |
12 | def __init__(self, commands=None, env=None):
13 | self._commands = commands
14 | self._env = EnvironmentSpec(env)
15 |
16 | def __repr__(self):
17 | return f'{self.__class__.__name__}(commands={self.commands!r}, env={self.env!r})'
18 |
19 | def __bool__(self):
20 | return True if self.commands else False
21 |
22 | @property
23 | def commands(self):
24 | return self._commands
25 |
26 | def get_formatted_commands(self, source_vars, sources_dir, task_idx):
27 | out = [{
28 | 'line': (f'matflow prepare-sources '
29 | f'--task-idx={task_idx} '
30 | f'--iteration-idx=$ITER_IDX')
31 | }]
32 | if self.commands:
33 | for new_cmd in self.commands.splitlines():
34 | new_cmd = new_cmd.replace('<>', sources_dir)
35 | for src_var_name, src_name in source_vars.items():
36 | new_cmd = new_cmd.replace(f'<<{src_var_name}>>', src_name)
37 | out.append({'line': new_cmd})
38 | return out
39 |
40 | @property
41 | def commands_fmt(self):
42 | return [{'line': i} for i in self._commands]
43 |
44 | @property
45 | def env(self):
46 | return self._env
47 |
48 | def as_dict(self):
49 | return {'commands': self.commands, 'env': self.env.value}
50 |
51 |
52 | class AuxiliaryTaskSpec(object):
53 |
54 | __slots__ = ['_env']
55 |
56 | def __init__(self, env=None):
57 | self._env = EnvironmentSpec(env)
58 |
59 | def __repr__(self):
60 | return f'{self.__class__.__name__}(env={self.env!r})'
61 |
62 | @property
63 | def env(self):
64 | return self._env
65 |
66 | def as_dict(self):
67 | return {'env': self.env.value}
68 |
69 |
70 | class EnvironmentSpec(object):
71 |
72 | __slots__ = ['_value']
73 |
74 | def __init__(self, value=None):
75 | self._value = value
76 |
77 | def __repr__(self):
78 | return f'{self.__class__.__name__}(value={self.value!r})'
79 |
80 | @property
81 | def value(self):
82 | return self._value
83 |
84 | def as_str(self):
85 | return self.value or ''
86 |
87 | def as_list(self):
88 | return self.as_str().splitlines()
89 |
90 |
91 | class SoftwareInstance(object):
92 |
93 | __slots__ = [
94 | '_machine',
95 | '_software_friendly',
96 | '_label',
97 | '_env',
98 | '_cores_min',
99 | '_cores_max',
100 | '_cores_step',
101 | '_executable',
102 | '_sources_preparation',
103 | '_options',
104 | '_required_scheduler_options',
105 | '_version_info',
106 | '_task_preparation',
107 | '_task_processing',
108 | ]
109 |
110 | def __init__(self, software, label=None, env=None, cores_min=1, cores_max=1,
111 | cores_step=1, executable=None, sources_preparation=None, options=None,
112 | required_scheduler_options=None, version_info=None,
113 | task_preparation=None, task_processing=None):
114 | """Initialise a SoftwareInstance object.
115 |
116 | Parameters
117 | ----------
118 | software : str
119 | Name of the software. This is the name that will be exposed as the `SOFTWARE`
120 | attribute of a Matflow extension package.
121 | label : str, optional
122 | Label used to distinguish software instances for the same `software`. For
123 | example, this could be a version string.
124 | env : str, optional
125 | Multi-line string containing commands to be executed by the shell that are
126 | necessary to set up the environment for running this software.
127 | executable : str, optional
128 | The command that represents the executable for running this software.
129 | cores_min : int, optional
130 | Specifies the minimum number (inclusive) of cores this software instance
131 | supports. By default, 1.
132 | cores_max : int, optional
133 | Specifies the maximum number (inclusive) of cores this software instance
134 | supports. By default, 1.
135 | cores_step : int, optional
136 | Specifies the step size from `cores_min` to `cores_max` this software instance
137 | supports. By default, 1.
138 | sources_preparation : dict, optional
139 | Dict containing the following keys:
140 | env : str
141 | Multi-line string containing commands to be executed by the shell that
142 | are necessary to set up the environment for running the preparation
143 | commands.
144 | commands : str
145 | Multi-line string containing commands to be executed within the
146 | preparation `environment` that are necessary to prepare the
147 | executable. For instance, this might contain commands that compile a
148 | source code file into an executable.
149 | options : list of str, optional
150 | Additional software options as string labels that this instance supports. This
151 | can be used to label software instances for which add-ons are loaded.
152 | required_scheduler_options : dict, optional
153 | Scheduler options that are required for using this software instance.
154 | version_info : dict, optional
155 | If an extension does not provide a `software_version` function, then the
156 | version info dict must be specified here. The keys are str names and the
157 | values are dicts that must contain at least a key `version`.
158 | task_preparation : dict, optional
159 | Dict containing the following keys:
160 | env : str
161 | Multi-line string containing commands to be executed by the shell that
162 | are necessary to set up the environment for running
163 | `matflow prepare-task`.
164 | task_processing : dict, optional
165 | Dict containing the following keys:
166 | env : str
167 | Multi-line string containing commands to be executed by the shell that
168 | are necessary to set up the environment for running
169 | `matflow process-task`.
170 |
171 | """
172 |
173 | self._machine = None # Set once by `set_machine`
174 |
175 | self._software_friendly = software
176 | self._label = label
177 | self._env = EnvironmentSpec(env)
178 | self._cores_min = cores_min
179 | self._cores_max = cores_max
180 | self._cores_step = cores_step
181 | self._sources_preparation = SourcesPreparation(**(sources_preparation or {}))
182 | self._executable = executable
183 | self._options = options or []
184 | self._required_scheduler_options = required_scheduler_options or {}
185 | self._version_info = version_info or None
186 | self._task_preparation = AuxiliaryTaskSpec(**(task_preparation or {}))
187 | self._task_processing = AuxiliaryTaskSpec(**(task_processing or {}))
188 |
189 | self._validate_num_cores()
190 | self._validate_version_infos()
191 |
192 | def _validate_num_cores(self):
193 | if self.cores_min < 1:
194 | raise SoftwareInstanceError('`cores_min` must be greater than 0.')
195 | if self.cores_min > self.cores_max:
196 | msg = '`cores_max` must be greater than or equal to `cores_min`.'
197 | raise SoftwareInstanceError(msg)
198 | if self.cores_step < 1:
199 | raise SoftwareInstanceError('`cores_step` must be greater than 0.')
200 |
201 | def _validate_version_infos(self):
202 | if self.version_info:
203 | REQUIRED = ['version']
204 | for k, v in self.version_info.items():
205 | miss_keys = set(REQUIRED) - set(v.keys())
206 | if miss_keys:
207 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
208 | msg = (f'Missing required keys in version info dict for name "{k}" '
209 | f'for software definition "{self.software}": {miss_keys_fmt}.')
210 | raise SoftwareInstanceError(msg)
211 |
212 | def __repr__(self):
213 | return (
214 | f'{self.__class__.__name__}('
215 | f'software={self.software!r}, '
216 | f'label={self.label!r}, '
217 | f'cores_range={self.cores_range!r}, '
218 | f'executable={self.executable!r}, '
219 | f'version_info={self.version_info!r}'
220 | f')'
221 | )
222 |
223 | def as_dict(self):
224 | """Return attributes dict with preceding underscores removed."""
225 | self_dict = {k.lstrip('_'): getattr(self, k) for k in self.__slots__}
226 | self_dict['software'] = self_dict.pop('software_friendly')
227 | self_dict['env'] = self_dict['env'].value
228 | self_dict['sources_preparation'] = self_dict['sources_preparation'].as_dict()
229 | self_dict['task_preparation'] = self_dict['task_preparation'].as_dict()
230 | self_dict['task_processing'] = self_dict['task_processing'].as_dict()
231 | return self_dict
232 |
233 | def validate_source_maps(self, task, method, software, all_sources_maps):
234 | """Check that any sources required in the preparation commands or executable are
235 | available in the sources map."""
236 |
237 | source_vars = self.source_variables
238 | if source_vars:
239 | if (task, method, software) not in all_sources_maps:
240 | msg = (f'No extension defines a sources map for the task "{task}" with '
241 | f'method "{method}" and software "{software}".')
242 | raise MissingSoftwareSourcesError(msg)
243 | else:
244 | sources_map = all_sources_maps[(task, method, software)]
245 |
246 | for i in source_vars:
247 | if i not in sources_map['sources']:
248 | msg = (f'Source variable name "{i}" is not in the sources map for '
249 | f'task "{task}" with method "{method}" and software '
250 | f'"{software}".')
251 | raise MissingSoftwareSourcesError(msg)
252 |
253 | @classmethod
254 | def load_multiple(cls, software_dict=None):
255 | """Load many SoftwareInstance objects from a dict of software instance
256 | definitions.
257 |
258 | Parameters
259 | ----------
260 | software_dict : dict of (str : dict)
261 | Keys are software names and values are dicts with the following keys:
262 | instances : list of dict
263 | Each element is a dict
264 | instance_defaults : dict, optional
265 | Default values to apply to each dict in the `instances` list.
266 |
267 | Returns
268 | -------
269 | all_instances : dict of (str : list of SoftwareInstance)
270 |
271 | """
272 |
273 | software_dict = software_dict or {}
274 | REQUIRED = ['instances']
275 | ALLOWED = REQUIRED + ['instance_defaults']
276 |
277 | INST_REQUIRED = ['num_cores']
278 | INST_DICT_KEYS = [
279 | 'required_scheduler_options',
280 | 'sources_preparation',
281 | ]
282 | INST_ALLOWED = INST_REQUIRED + INST_DICT_KEYS + [
283 | 'label',
284 | 'options',
285 | 'env',
286 | 'executable',
287 | 'version_info',
288 | 'task_preparation',
289 | 'task_processing',
290 | ]
291 |
292 | all_instances = {}
293 | for name, definition in software_dict.items():
294 |
295 | name_friendly = name
296 | name = SoftwareInstance.get_software_safe(name)
297 |
298 | bad_keys = set(definition.keys()) - set(ALLOWED)
299 | miss_keys = set(REQUIRED) - set(definition.keys())
300 | if bad_keys:
301 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
302 | msg = (f'Unknown keys in software instance definitions for software '
303 | f'"{name}": {bad_keys_fmt}.')
304 | raise SoftwareInstanceError(msg)
305 | if miss_keys:
306 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
307 | msg = (f'Software instance definitions for software "{name}" are missing '
308 | f'keys: {miss_keys_fmt}.')
309 | raise SoftwareInstanceError(msg)
310 |
311 | # Merge instance defaults with instance definition:
312 | inst_defs = definition.get('instance_defaults', {})
313 | all_name_instances = []
314 | for inst in definition['instances']:
315 |
316 | inst = dict(inst)
317 | inst_merged = dict(copy.deepcopy(inst_defs))
318 |
319 | for key, val in inst.items():
320 | if key not in INST_DICT_KEYS:
321 | inst_merged.update({key: val})
322 |
323 | # Merge values of any `INST_DICT_KEYS` individually.
324 | for key in INST_DICT_KEYS:
325 | if key in inst:
326 | if key not in inst_merged:
327 | inst_merged.update({key: {}})
328 | for subkey in inst[key]:
329 | inst_merged[key].update({subkey: inst[key][subkey]})
330 |
331 | bad_keys = set(inst_merged.keys()) - set(INST_ALLOWED)
332 | miss_keys = set(INST_REQUIRED) - set(inst_merged.keys())
333 |
334 | if bad_keys:
335 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
336 | msg = (f'Unknown keys in software instance definitions for software '
337 | f'"{name}": {bad_keys_fmt}.')
338 | raise SoftwareInstanceError(msg)
339 | if miss_keys:
340 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
341 | msg = (f'Software instance definitions for software "{name}" are '
342 | f'missing keys: {miss_keys_fmt}.')
343 | raise SoftwareInstanceError(msg)
344 |
345 | inst_merged['software'] = name_friendly
346 | num_cores = inst_merged.pop('num_cores', None)
347 | cores_min = 1
348 | cores_max = 1
349 | cores_step = 1
350 | if num_cores is not None:
351 | if isinstance(num_cores, (list, tuple)):
352 | if len(num_cores) == 2:
353 | cores_min, cores_max = num_cores
354 | elif len(num_cores) == 3:
355 | cores_min, cores_max, cores_step = num_cores
356 | else:
357 | msg = (f'`num_cores` value not understood in software '
358 | f'instance definition for software "{name}".')
359 | raise SoftwareInstanceError(msg)
360 | else:
361 | cores_min = num_cores
362 | cores_max = num_cores
363 | cores_step = num_cores
364 |
365 | inst_merged.update({
366 | 'cores_min': cores_min,
367 | 'cores_max': cores_max,
368 | 'cores_step': cores_step,
369 | })
370 |
371 | soft_inst = cls(**inst_merged)
372 | soft_inst.set_machine()
373 | all_name_instances.append(soft_inst)
374 |
375 | all_instances.update({name: all_name_instances})
376 |
377 | return all_instances
378 |
379 | @property
380 | def requires_sources(self):
381 | if (
382 | (
383 | self.sources_preparation and
384 | '<>' in self.sources_preparation.commands
385 | ) or
386 | (self.executable and '<>' in self.executable)
387 | ):
388 | return True
389 | else:
390 | return False
391 |
392 | @property
393 | def source_variables(self):
394 | if not self.requires_sources:
395 | return []
396 | else:
397 | source_vars = []
398 | if self.sources_preparation:
399 | source_vars += extract_variable_names(
400 | self.sources_preparation.commands,
401 | ['<<', '>>']
402 | )
403 | if self.executable:
404 | source_vars += extract_variable_names(self.executable, ['<<', '>>'])
405 |
406 | return list(set(source_vars) - set(['sources_dir']))
407 |
408 | @property
409 | def software(self):
410 | return self.get_software_safe(self.software_friendly)
411 |
412 | @staticmethod
413 | def get_software_safe(software_name):
414 | return software_name.lower().replace(' ', '_')
415 |
416 | @property
417 | def software_friendly(self):
418 | return self._software_friendly
419 |
420 | @property
421 | def label(self):
422 | return self._label
423 |
424 | @property
425 | def env(self):
426 | return self._env
427 |
428 | @property
429 | def task_preparation(self):
430 | return self._task_preparation
431 |
432 | @property
433 | def task_processing(self):
434 | return self._task_processing
435 |
436 | @property
437 | def cores_min(self):
438 | return self._cores_min
439 |
440 | @property
441 | def cores_max(self):
442 | return self._cores_max
443 |
444 | @property
445 | def cores_step(self):
446 | return self._cores_step
447 |
448 | @property
449 | def cores_range(self):
450 | return range(self.cores_min, self.cores_max + 1, self.cores_step)
451 |
452 | @property
453 | def sources_preparation(self):
454 | return self._sources_preparation
455 |
456 | @property
457 | def executable(self):
458 | return self._executable
459 |
460 | @property
461 | def options(self):
462 | return self._options
463 |
464 | @property
465 | def required_scheduler_options(self):
466 | return self._required_scheduler_options
467 |
468 | @property
469 | def version_info(self):
470 | return self._version_info
471 |
472 | @property
473 | def machine(self):
474 | return self._machine
475 |
476 | @machine.setter
477 | def machine(self, machine):
478 | if self._machine:
479 | raise ValueError('`machine` is already set.')
480 | self._machine = machine
481 |
482 | def set_machine(self):
483 | self.machine = socket.gethostname()
484 |
--------------------------------------------------------------------------------
/matflow/profile.py:
--------------------------------------------------------------------------------
1 | """`matflow.profile.py`"""
2 |
3 | from pathlib import Path
4 |
5 | from ruamel import yaml
6 |
7 | from matflow.errors import ProfileError
8 | from matflow.config import Config
9 |
10 |
11 | def parse_workflow_profile(profile_path):
12 |
13 | with Path(profile_path).open() as handle:
14 | profile = yaml.safe_load(handle)
15 |
16 | req_keys = ['name', 'tasks']
17 | task_globals = ['run_options', 'stats']
18 | good_keys = req_keys + task_globals + [
19 | 'extends',
20 | 'archive',
21 | 'archives',
22 | 'archive_excludes',
23 | 'figures',
24 | 'metadata',
25 | 'num_iterations',
26 | 'iterate',
27 | 'import',
28 | 'import_list', # equivalent to 'import'; provides a Python-code-safe variant.
29 | ]
30 |
31 | miss_keys = list(set(req_keys) - set(profile.keys()))
32 | bad_keys = list(set(profile.keys()) - set(good_keys))
33 |
34 | if miss_keys:
35 | miss_keys_fmt = ', '.join([f'"{i}"' for i in miss_keys])
36 | raise ProfileError(f'Missing keys in profile: {miss_keys_fmt}.')
37 | if bad_keys:
38 | bad_keys_fmt = ', '.join([f'"{i}"' for i in bad_keys])
39 | raise ProfileError(f'Unknown keys in profile: {bad_keys_fmt}.')
40 |
41 | if 'import' in profile and 'import_list' in profile:
42 | raise ProfileError(f'Specify exactly one of `import` and `import_list`. '
43 | f'These options are functionally equivalent.')
44 |
45 | if 'archive' in profile and 'archives' in profile:
46 | raise ValueError('Specify either `archive` or `archives` but not both. For '
47 | 'either case, valid values are a string or list of strings.')
48 | elif 'archive' in profile:
49 | profile['archives'] = profile.pop('archive')
50 | elif 'archives' not in profile:
51 | profile['archives'] = []
52 |
53 | if isinstance(profile['archives'], str):
54 | profile['archives'] = [profile['archives']]
55 |
56 | for i in task_globals:
57 | if i in profile:
58 | # Add to each task if it has none:
59 | for idx, task in enumerate(profile['tasks']):
60 | if i not in task:
61 | profile['tasks'][idx][i] = profile[i]
62 |
63 | workflow_dict = {
64 | 'name': profile['name'],
65 | 'tasks': profile['tasks'],
66 | 'archives': profile['archives'],
67 | 'figures': profile.get('figures'),
68 | 'metadata': {**Config.get('default_metadata'), **profile.get('metadata', {})},
69 | 'num_iterations': profile.get('num_iterations'),
70 | 'iterate': profile.get('iterate'),
71 | 'extends': profile.get('extends'),
72 | 'archive_excludes': profile.get('archive_excludes'),
73 | 'import_list': profile.get('import') or profile.get('import_list'),
74 | }
75 |
76 | return workflow_dict
77 |
--------------------------------------------------------------------------------
/matflow/scripting.py:
--------------------------------------------------------------------------------
1 | """Module containing functionality for generating Python scripts as task sources."""
2 |
3 | import re
4 | from textwrap import dedent
5 |
6 | import black
7 | import autopep8
8 |
9 | from pkg_resources import resource_string
10 |
11 |
12 | def main_func(func):
13 | """Decorator used to annotate which function within a snippet is the main function."""
14 | def main_inner(*args, **kwargs):
15 | func(*args, **kwargs)
16 | return main_inner
17 |
18 |
19 | def get_snippet(package_name, snippet_name, decorator=True):
20 | """Get a Python snippet function (as a string) from the snippets directory."""
21 | out = resource_string(package_name, f'snippets/{snippet_name}').decode()
22 | if not decorator:
23 | # Remove the `@main_func` decorator and import.
24 | remove_lns = ['from matflow.scripting import main_func', '@main_func']
25 | for i in remove_lns:
26 | out = ''.join(out.split(i))
27 | return out
28 |
29 |
30 | def parse_python_func_return(func_str):
31 | """Get a list of the variable names in a Python function return statement.
32 |
33 | The return statement may return a tuple (with parenthesis or not) or a single variable.
34 |
35 | """
36 |
37 | out = []
38 | match = re.search(r'return \(*([\S\s][^\)]+)\)*', func_str)
39 | if match:
40 | match_clean = match.group(1).strip().strip(',')
41 | out = [i.strip() for i in match_clean.split(',')]
42 |
43 | return out
44 |
45 |
46 | def parse_python_func_imports(func_str):
47 | """Get a list of import statement lines from a (string) Python function."""
48 |
49 | import_lines = func_str.split('def ')[0].strip()
50 | match = re.search(r'((?:import|from)[\S\s]*)', import_lines)
51 | out = []
52 | if match:
53 | out = match.group(1).splitlines()
54 |
55 | return out
56 |
57 |
58 | def extract_snippet_main(snippet_str):
59 | """Extract only the snippet main function (plus imports), as annotated by the
60 | `@mainfunc` decorator."""
61 |
62 | func_start_pat = r'((?:@main_func\n)?def\s(?:.*)\((?:[\s\S]*?)\):)'
63 |
64 | func_split_snip = re.split(func_start_pat, snippet_str)
65 | imports = func_split_snip[0]
66 | main_func_dec_str = '@main_func'
67 |
68 | main_func_str = None
69 | for idx in range(1, len(func_split_snip[1:]), 2):
70 | func_str = func_split_snip[idx] + func_split_snip[idx + 1]
71 | if main_func_dec_str in func_str:
72 | if main_func_str:
73 | msg = (f'`{main_func_dec_str}` should decorate only one function within '
74 | f'the snippet.')
75 | raise ValueError(msg)
76 | else:
77 | main_func_str = func_str.lstrip(f'{main_func_dec_str}\n')
78 |
79 | imports = ''.join(imports.split('from matflow_defdap import main_func'))
80 |
81 | return imports + '\n' + main_func_str
82 |
83 |
84 | def get_snippet_signature(package_name, script_name):
85 | """Get imports, inputs and outputs of a Python snippet function."""
86 |
87 | snippet_str = get_snippet(package_name, script_name)
88 | snippet_str = extract_snippet_main(snippet_str)
89 |
90 | def_line = re.search(r'def\s(.*)\(([\s\S]*?)\):', snippet_str).groups()
91 | func_name = def_line[0]
92 | func_ins = [i.strip() for i in def_line[1].split(',') if i.strip()]
93 |
94 | if script_name != func_name + '.py':
95 | msg = ('For simplicity, the snippet main function name should be the same as the '
96 | 'snippet file name.')
97 | raise ValueError(msg)
98 |
99 | func_outs = parse_python_func_return(snippet_str)
100 | func_imports = parse_python_func_imports(snippet_str)
101 |
102 | out = {
103 | 'name': func_name,
104 | 'imports': func_imports,
105 | 'inputs': func_ins,
106 | 'outputs': func_outs,
107 | }
108 | return out
109 |
110 |
111 | def get_snippet_call(package_name, script_name):
112 | sig = get_snippet_signature(package_name, script_name)
113 | outs_fmt = ', '.join(sig['outputs'])
114 | ins_fmt = ', '.join(sig['inputs'])
115 | ret = f'{sig["name"]}({ins_fmt})'
116 | if outs_fmt:
117 | ret = f'{outs_fmt} = {ret}'
118 | return ret
119 |
120 |
121 | def get_wrapper_script(package_name, script_name, snippets, outputs):
122 |
123 | ind = ' '
124 | sigs = [get_snippet_signature(package_name, i['name']) for i in snippets]
125 | all_ins = [j for i in sigs for j in i['inputs']]
126 | all_outs = [j for i in sigs for j in i['outputs']]
127 |
128 | print(f'all_ins: {all_ins}')
129 | print(f'all_outs: {all_outs}')
130 |
131 | for i in outputs:
132 | if i not in all_outs:
133 | raise ValueError(f'Cannot output "{i}". No functions return this name.')
134 |
135 | # Required inputs are those that are not output by any snippet
136 | req_ins = list(set(all_ins) - set(all_outs))
137 | req_ins_fmt = ', '.join(req_ins)
138 |
139 | print(f'req_ins: {req_ins}')
140 | print(f'req_ins_fmt: {req_ins_fmt}')
141 |
142 | main_sig = [f'def main({req_ins_fmt}):']
143 | main_body = [ind + get_snippet_call(package_name, i['name']) for i in snippets]
144 | main_outs = ['\n' + ind + f'return {", ".join([i for i in outputs])}']
145 | main_func = main_sig + main_body + main_outs
146 |
147 | req_imports = [
148 | 'import sys',
149 | 'import hickle',
150 | 'from pathlib import Path',
151 | ]
152 | out = req_imports
153 | out += main_func
154 | snippet_funcs = '\n'.join([get_snippet(package_name, i['name'], decorator=False)
155 | for i in snippets])
156 |
157 | out = '\n'.join(out) + '\n' + snippet_funcs + '\n'
158 | out += dedent('''\
159 | if __name__ == '__main__':
160 | inputs = hickle.load(sys.argv[1])
161 | outputs = main(**inputs)
162 | hickle.dump(outputs, 'outputs.hdf5')
163 |
164 | ''')
165 |
166 | print(f'out 1: \n----------\n{out}\n----------\n')
167 | out = autopep8.fix_code(out)
168 | print(f'out 2: \n----------\n{out}\n----------\n')
169 | out = black.format_str(out, mode=black.FileMode())
170 | print(f'out 3: \n----------\n{out}\n----------\n')
171 |
172 | return out
173 |
--------------------------------------------------------------------------------
/matflow/utils.py:
--------------------------------------------------------------------------------
1 | """`matflow.utils.py`"""
2 |
3 | import os
4 | import sys
5 | import io
6 | import collections
7 | import copy
8 | import itertools
9 | import h5py
10 | import numpy as np
11 | import random
12 | import re
13 | import time
14 | from contextlib import redirect_stdout, contextmanager
15 | from datetime import datetime
16 | from pathlib import Path
17 |
18 | from ruamel.yaml import YAML
19 |
20 |
21 | def parse_times(format_str):
22 | """Parse a string which contain time format code and one or
23 | more `%%r` to represent a random digit from 0 to 9."""
24 |
25 | time_parsed = time.strftime(format_str)
26 | rnd_all = ''
27 | while '%r' in time_parsed:
28 | rnd = str(random.randint(0, 9))
29 | rnd_all += rnd
30 | time_parsed = time_parsed.replace('%r', rnd, 1)
31 |
32 | return time_parsed, rnd_all
33 |
34 |
35 | def zeropad(num, largest):
36 | """Return a zero-padded string of a number, given the largest number.
37 |
38 | TODO: want to support floating-point numbers as well? Or rename function
39 | accordingly.
40 |
41 | Parameters
42 | ----------
43 | num : int
44 | The number to be formatted with zeros padding on the left.
45 | largest : int
46 | The number that determines the number of zeros to pad with.
47 |
48 | Returns
49 | -------
50 | padded : str
51 | The original number, `num`, formatted as a string with zeros added
52 | on the left.
53 |
54 | """
55 |
56 | num_digits = len('{:.0f}'.format(largest))
57 | padded = '{0:0{width}}'.format(num, width=num_digits)
58 |
59 | return padded
60 |
61 |
62 | def combine_list_of_dicts(a):
63 |
64 | a = copy.deepcopy(a)
65 |
66 | for i in range(1, len(a)):
67 | update_dict(a[0], a[i])
68 |
69 | return a[0]
70 |
71 |
72 | def update_dict(base, upd):
73 | """Update an arbitrarily-nested dict."""
74 |
75 | for key, val in upd.items():
76 | if isinstance(base, collections.Mapping):
77 | if isinstance(val, collections.Mapping):
78 | r = update_dict(base.get(key, {}), val)
79 | base[key] = r
80 | else:
81 | base[key] = upd[key]
82 | else:
83 | base = {key: upd[key]}
84 |
85 | return base
86 |
87 |
88 | def nest_lists(my_list):
89 | """
90 | `a` is a list of `N` sublists.
91 |
92 | E.g.
93 | my_list = [
94 | [1,2],
95 | [3,4,5],
96 | [6,7]
97 | ]
98 |
99 | returns a list of lists of length `N` such that all combinations of elements from sublists in
100 | `a` are found
101 | E.g
102 | out = [
103 | [1, 3, 6],
104 | [1, 3, 7],
105 | [1, 4, 6],
106 | [1, 4, 7],
107 | [1, 5, 6],
108 | [1, 5, 7],
109 | [2, 3, 6],
110 | [2, 3, 7],
111 | [2, 4, 6],
112 | [2, 4, 7],
113 | [2, 5, 6],
114 | [2, 5, 7]
115 | ]
116 |
117 | """
118 |
119 | N = len(my_list)
120 | sub_len = [len(i) for i in my_list]
121 |
122 | products = np.array([1] * (N + 1))
123 | for i in range(len(my_list) - 1, -1, -1):
124 | products[:i + 1] *= len(my_list[i])
125 |
126 | out = [[None for x in range(N)] for y in range(products[0])]
127 |
128 | for row_idx, row in enumerate(out):
129 |
130 | for col_idx, col in enumerate(row):
131 |
132 | num_repeats = products[col_idx + 1]
133 | sub_list_idx = int(row_idx / num_repeats) % len(my_list[col_idx])
134 | out[row_idx][col_idx] = copy.deepcopy(
135 | my_list[col_idx][sub_list_idx])
136 |
137 | return out
138 |
139 |
140 | def repeat(lst, reps):
141 | """Repeat 1D list elements."""
142 | return list(itertools.chain.from_iterable(itertools.repeat(x, reps) for x in lst))
143 |
144 |
145 | def tile(lst, tiles):
146 | """Tile a 1D list."""
147 | return lst * tiles
148 |
149 |
150 | def index(lst, idx):
151 | """Get elements of a list."""
152 | return [lst[i] for i in idx]
153 |
154 |
155 | def arange(size):
156 | """Get 1D list of increasing integers."""
157 | return list(range(size))
158 |
159 |
160 | def extend_index_list(lst, repeats):
161 | """Extend an integer index list by repeating some number of times such that the extra
162 | indices added are new and follow the same ordering as the existing elements.
163 |
164 | Parameters
165 | ----------
166 | lst : list of int
167 | repeats : int
168 |
169 | Returns
170 | -------
171 | new_idx : list of int
172 | Returned list has length `len(lst) * repeats`.
173 |
174 | Examples
175 | --------
176 | >>> extend_index_list([0, 1, 2], 2)
177 | [0, 1, 2, 3, 4, 5]
178 |
179 | >>> extend_index_list([0, 0, 1, 1], 3)
180 | [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]
181 |
182 | >>> extend_index_list([4, 1, 2], 2)
183 | [4, 1, 2, 8, 5, 6]
184 |
185 | """
186 |
187 | new_idx = []
188 | for i in lst:
189 | if i < 0:
190 | raise ValueError('List elements must be positive or zero.')
191 | new_idx.append(i)
192 |
193 | for _ in range(repeats - 1):
194 | next_avail_idx = max(new_idx) + 1
195 | new_idx.extend([next_avail_idx + i - min(lst) for i in lst])
196 |
197 | return new_idx
198 |
199 |
200 | def flatten_list(lst):
201 | """Flatten a list of lists.
202 |
203 | Parameters
204 | ----------
205 | lst : list of list
206 |
207 | Returns
208 | -------
209 | list
210 |
211 | Examples
212 | --------
213 | >>> flatten_list([[0, 2, 4], [9, 1]])
214 | [0, 2, 4, 9, 1]
215 |
216 | """
217 | return [j for i in lst for j in i]
218 |
219 |
220 | def to_sub_list(lst, sub_list_len):
221 | """Transform a list into a list of sub lists of certain size.
222 |
223 | Parameters
224 | ----------
225 | lst : list
226 | List to transform into a list of sub-lists.
227 | sub_list_len : int
228 | Size of sub-lists. Must be an integer factor of the length of the
229 | original list, `lst`.
230 |
231 | Returns
232 | -------
233 | list of list
234 |
235 | Examples
236 | --------
237 | >>> to_sub_list([0, 1, 2, 3], 2)
238 | [[0, 1], [2, 3]]
239 |
240 | """
241 |
242 | if (sub_list_len <= 0) or (len(lst) % sub_list_len != 0):
243 | raise ValueError('`sub_list_len` must be a positive factor of `len(lst)`.')
244 | out = [lst[(i * sub_list_len):((i * sub_list_len) + sub_list_len)]
245 | for i in range(len(lst) // sub_list_len)]
246 | return out
247 |
248 |
249 | def datetime_to_dict(dt):
250 | return {
251 | 'year': dt.year,
252 | 'month': dt.month,
253 | 'day': dt.day,
254 | 'hour': dt.hour,
255 | 'minute': dt.minute,
256 | 'second': dt.second,
257 | 'microsecond': dt.microsecond,
258 | }
259 |
260 |
261 | def dump_to_yaml_string(data):
262 | yaml = YAML()
263 | yaml.indent(mapping=2, sequence=4, offset=2)
264 | with redirect_stdout(io.StringIO()) as buffer:
265 | yaml.dump(data, sys.stdout)
266 | output = buffer.getvalue()
267 | return output
268 |
269 |
270 | def get_specifier_dict(key, name_key=None, base_key=None, defaults=None,
271 | list_specifiers=None, cast_types=None):
272 | """Resolve a string key with additional specifiers using square-brackets into a dict.
273 |
274 | Parameters
275 | ----------
276 | key : str or dict
277 | name_key : str
278 | base_key : str
279 | defaults : dict
280 | list_specifiers : list of str
281 | Any specifier in this list will be added to the returned dict as a list element.
282 | cast_types : dict
283 | Dict of (key: type) to cast those keys' values to.
284 |
285 | Returns
286 | -------
287 | dict
288 |
289 | Examples
290 | --------
291 | >>> get_specifier_dict(
292 | 'parameter_1[hey, label_2=hi]',
293 | name_key='param_name',
294 | base_key='label_1',
295 | defaults={'a': 1},
296 | )
297 | {
298 | 'param_name': 'parameter_1',
299 | 'label_1': 'hey'
300 | 'label_2': 'hi',
301 | 'a': 1,
302 | }
303 |
304 | """
305 |
306 | list_specifiers = list_specifiers or []
307 | cast_types = cast_types or {}
308 | out = {}
309 |
310 | if isinstance(key, str):
311 |
312 | if name_key is None:
313 | raise TypeError('`name_key` must be specified.')
314 |
315 | match = re.search(r'([\w\-\s]+)(\[(.*?)\])*', key)
316 | name = match.group(1)
317 | out.update({name_key: name})
318 |
319 | specifiers_str = match.group(3)
320 | if specifiers_str:
321 | base_keys = []
322 | for s in specifiers_str.split(','):
323 | if not s:
324 | continue
325 | if '=' in s:
326 | s_key, s_val = [i.strip() for i in s.split('=')]
327 | if s_key in list_specifiers:
328 | if s_key in out:
329 | out[s_key].append(s_val)
330 | else:
331 | out[s_key] = [s_val]
332 | else:
333 | if s_key in out:
334 | raise ValueError(
335 | f'Specifier "{s_key}" multiply defined. Add this '
336 | f'specifier to `list_specifiers` to add multiple values '
337 | f'to the returned dict (in a list).'
338 | )
339 | out.update({s_key: s_val})
340 | else:
341 | base_keys.append(s.strip())
342 |
343 | if len(base_keys) > 1:
344 | raise ValueError('Only one specifier may be specified without a key.')
345 |
346 | if base_keys:
347 | if base_key is None:
348 | raise ValueError('Base key found but `base_key` name not specified.')
349 | out.update({base_key: base_keys[0]})
350 |
351 | elif isinstance(key, dict):
352 | out.update(key)
353 |
354 | else:
355 | raise TypeError('`key` must be a dict or str to allow specifiers to be resolved.')
356 |
357 | for k, v in (defaults or {}).items():
358 | if k not in out:
359 | out[k] = copy.deepcopy(v)
360 |
361 | for key, cast_type in cast_types.items():
362 | if key in out:
363 | if cast_type is bool:
364 | new_val = cast_bool(out[key])
365 | else:
366 | new_val = cast_type(out[key])
367 | out[key] = new_val
368 |
369 | return out
370 |
371 |
372 | def extract_variable_names(source_str, delimiters):
373 | """Given a specified syntax for embedding variable names within a string,
374 | extract all variable names.
375 |
376 | Parameters
377 | ----------
378 | source_str : str
379 | The string within which to search for variable names.
380 | delimiters : two-tuple of str
381 | The left and right delimiters of a variable name.
382 |
383 | Returns
384 | -------
385 | var_names : list of str
386 | The variable names embedded in the original string.
387 |
388 | """
389 |
390 | delim_esc = [re.escape(i) for i in delimiters]
391 | pattern = delim_esc[0] + r'(.\S+?)' + delim_esc[1]
392 | var_names = re.findall(pattern, source_str)
393 |
394 | return var_names
395 |
396 |
397 | def get_nested_item(obj, address):
398 | out = obj
399 | for i in address:
400 | out = out[i]
401 | return out
402 |
403 |
404 | def get_workflow_paths(base_dir, quiet=True):
405 | base_dir = Path(base_dir)
406 | wkflows = []
407 | for i in base_dir.glob('**/*'):
408 | if i.name == 'workflow.hdf5':
409 | wk_full_path = i
410 | wk_rel_path = wk_full_path.relative_to(base_dir)
411 | wk_disp_path = wk_rel_path.parent
412 | with h5py.File(wk_full_path, 'r') as handle:
413 | try:
414 | try:
415 | handle["/workflow_obj/data/'figures'"]
416 | except KeyError:
417 | if not quiet:
418 | print(f'No "figures" key for workflow: {wk_disp_path}.')
419 | continue
420 | timestamp_path = "/workflow_obj/data/'history'/data/data_0/'timestamp'/data"
421 | timestamp_dict = {k[1:-1]: v['data'][()]
422 | for k, v in handle[timestamp_path].items()}
423 | timestamp = datetime(**timestamp_dict)
424 | wkflows.append({
425 | 'ID': handle.attrs['workflow_id'],
426 | 'full_path': str(wk_full_path),
427 | 'display_path': str(wk_disp_path),
428 | 'timestamp': timestamp,
429 | 'display_timestamp': timestamp.strftime(r'%Y-%m-%d %H:%M:%S'),
430 | })
431 | except:
432 | if not quiet:
433 | print(f'No timestamp for workflow: {wk_disp_path}')
434 | return wkflows
435 |
436 |
437 | def order_workflow_paths_by_date(workflow_paths):
438 | return sorted(workflow_paths, key=lambda x: x['timestamp'])
439 |
440 |
441 | def nested_dict_arrays_to_list(obj):
442 | if isinstance(obj, np.ndarray):
443 | obj = obj.tolist()
444 | elif isinstance(obj, dict):
445 | for key, val in obj.items():
446 | obj[key] = nested_dict_arrays_to_list(val)
447 | return obj
448 |
449 |
450 | def move_element_forward(lst, index, position, return_map=True):
451 | """Move a list element forward in the list to a new index position."""
452 |
453 | if index > position:
454 | raise ValueError('`index` cannot be larger than `position`, since that would '
455 | 'not be a "forward" move!')
456 |
457 | if position > len(lst) - 1:
458 | raise ValueError('`position` must be a valid list index.')
459 |
460 | sub_list_1 = lst[:position + 1]
461 | sub_list_2 = lst[position + 1:]
462 | elem = sub_list_1.pop(index)
463 | out = sub_list_1 + [elem] + sub_list_2
464 |
465 | # Indices to the left of the element that is to be moved do not change:
466 | idx_map_left = {i: i for i in range(0, index)}
467 |
468 | # The index of the moved element changes to `position`
469 | idx_map_element = {index: position}
470 |
471 | # Indicies to the right of the element up to the new position are decremented:
472 | idx_map_middle = {i: i - 1 for i in range(index + 1, position + 1)}
473 |
474 | # Indices to the right of the new position do not change:
475 | idx_map_right = {i: i for i in range(position + 1, len(lst))}
476 |
477 | idx_map = {
478 | **idx_map_left,
479 | **idx_map_element,
480 | **idx_map_middle,
481 | **idx_map_right
482 | }
483 |
484 | if return_map:
485 | return out, idx_map
486 | else:
487 | return out
488 |
489 |
490 | def cast_bool(bool_str):
491 | if isinstance(bool_str, bool):
492 | return bool_str
493 | elif bool_str.lower() == 'true':
494 | return True
495 | elif bool_str.lower() == 'false':
496 | return False
497 | else:
498 | raise ValueError(f'"{bool_str}" cannot be cast to True or False.')
499 |
500 |
501 | @contextmanager
502 | def working_directory(path):
503 | """Change to a working directory and return to previous working directory on exit."""
504 | prev_cwd = Path.cwd()
505 | os.chdir(path)
506 | try:
507 | yield
508 | finally:
509 | os.chdir(prev_cwd)
510 |
--------------------------------------------------------------------------------
/matflow/validation.py:
--------------------------------------------------------------------------------
1 |
2 | import inspect
3 |
4 | from matflow.errors import UnsatisfiedSchemaError
5 |
6 |
7 | def validate_input_mapper_func(func, task_inputs):
8 | """Using `inspect`, validate an input mapper callable from a Matflow extension.
9 |
10 | Parameters
11 | ----------
12 | func : callable
13 | task_inputs : list of str
14 | List of the input name aliases associated with the task schema.
15 |
16 | Notes
17 | -----
18 | Checks performed on `func`:
19 | - check the first argument is named "path"; raise `TypeError` if not;
20 | - check for one or more additional arguments which are named according to
21 | a subset of task parameters (passed in `task_inputs`).
22 |
23 | """
24 |
25 | func_params = inspect.signature(func).parameters
26 |
27 | # Check first argument must be "path":
28 | first_arg = list(func_params.items())[0]
29 | if first_arg[0] != 'path':
30 | msg = (f'The first parameter of an input mapper function must be "path" '
31 | f'but for {func.__name__} is actually "{first_arg[0]}".')
32 | raise TypeError(msg)
33 | else:
34 | # Remove "path" from argument list, for further analysis:
35 | func_params = dict(func_params)
36 | del func_params[first_arg[0]]
37 |
38 | bad_params = list(set(func_params) - set(task_inputs))
39 | if bad_params:
40 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
41 | msg = (f'The following arguments to the input mapper function "{func.__name__}" '
42 | f'are not known by the schema: {bad_params_fmt}.')
43 | raise TypeError(msg)
44 |
45 |
46 | def validate_output_mapper_func(func, num_file_paths, option_names, input_names):
47 | """Using `inspect`, validate an output mapper callable from a Matflow extension.
48 |
49 | Parameters
50 | ----------
51 | func : callable
52 | num_file_paths : int
53 | Number of output files specified in the schema's output map.
54 | option_names : list of str
55 | List of the names of output map options.
56 | input_names : list of str
57 | List of the names of output map inputs.
58 |
59 | Notes
60 | -----
61 | Checks performed on `func`:
62 | - After the first `num_file_paths` arguments, check the remaining arguments names
63 | coincide exactly with `option_names` + `inputs`.
64 |
65 | """
66 |
67 | func_params = inspect.signature(func).parameters
68 |
69 | # Check num args first
70 | exp_num_params = num_file_paths + len(option_names) + len(input_names)
71 | if len(func_params) != exp_num_params:
72 | msg = (
73 | f'The output mapper function "{func.__name__}" does not have the expected '
74 | f'number of arguments: found {len(func_params)} but expected '
75 | f'{exp_num_params} ({num_file_paths} file path(s) + {len(option_names)} '
76 | f'options parameters + {len(input_names)} inputs).'
77 | )
78 | raise TypeError(msg)
79 |
80 | # Check option names:
81 | params = list(func_params.items())[num_file_paths:]
82 | params_func = [i[0] for i in params]
83 |
84 | miss_params = list(set(option_names + input_names) - set(params_func))
85 | bad_params = list(set(params_func) - set(option_names + input_names))
86 |
87 | if bad_params:
88 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
89 | msg = (f'The following arguments in the output mapper function "{func.__name__}" '
90 | f'are not output map options or inputs: {bad_params_fmt}.')
91 | raise TypeError(msg)
92 |
93 | if miss_params:
94 | miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params])
95 | msg = (f'The following output mapper options and/or inputs are missing from the '
96 | f'signature of the output mapper function "{func.__name__}": '
97 | f'{miss_params_fmt}.')
98 | raise TypeError(msg)
99 |
100 |
101 | def validate_func_mapper_func(func, task_inputs):
102 | """Using `inspect`, validate an input mapper callable from a Matflow extension.
103 |
104 | Parameters
105 | ----------
106 | func : callable
107 | task_inputs : list of str
108 | List of the input name aliases associated with the task schema.
109 |
110 | Notes
111 | -----
112 | Checks performed on `func`:
113 | - check function arguments are named according to all task parameters (passed in
114 | `task_inputs`).
115 |
116 | """
117 |
118 | func_params = inspect.signature(func).parameters
119 |
120 | bad_params = list(set(func_params) - set(task_inputs))
121 | miss_params = list(set(task_inputs) - set(func_params))
122 |
123 | if bad_params:
124 | bad_params_fmt = ', '.join([f'"{i}"' for i in bad_params])
125 | msg = (f'The function mapper function "{func.__name__}" contains the following '
126 | f'arguments that are not consistent with the schema: {bad_params_fmt}.')
127 | raise TypeError(msg)
128 |
129 | if miss_params:
130 | miss_params_fmt = ', '.join([f'"{i}"' for i in miss_params])
131 | msg = (f'The following task inputs are missing from the signature of the '
132 | f'function mapper function "{func.__name__}": {miss_params_fmt}.')
133 | raise TypeError(msg)
134 |
135 |
136 | def validate_task_schemas(task_schemas, task_input_map, task_output_map, task_func_map):
137 | """
138 | Determine whether each task schema is valid.
139 |
140 | Parameters
141 | ----------
142 | task_schemas : dict of (tuple : TaskSchema)
143 | Dict keys are (task_name, task_method, software).
144 | task_input_map : dict of (tuple : dict of (str : callable))
145 | Outer dict keys are (task_name, task_method, software); inner dicts map a string
146 | input file name to a MatFlow extension callable which writes that input file.
147 | task_output_map : dict of (tuple : dict of (str : callable))
148 | Outer dict keys are (task_name, task_method, software); inner dicts map a string
149 | output name to a MatFlow extension callable which return that output.
150 | task_func_map : dict of (tuple : callable)
151 | Dict keys are (task_name, task_method, software); values are MatFlow extension
152 | callables.
153 |
154 | Returns
155 | -------
156 | schema_is_valid : dict of (tuple : tuple of (bool, str))
157 | Dict keys are (task_name, task_method, software); values are tuples whose first
158 | values are boolean values indicating if a given schema is valid. If False, this
159 | indicates that one of extension functions (input map, output map or function map)
160 | is missing. Note that this function does not raise any exception in this case ---
161 | but the task schema will be noted as invalid. The second value of the dict value
162 | tuple is a string description of the reason why the schema is invalid.
163 |
164 | Raises
165 | ------
166 | UnsatisfiedSchemaError
167 | Raised if any of the extension callables (input/output/func maps) are not
168 | consistent with their associated task schema.
169 |
170 | """
171 |
172 | schema_is_valid = {}
173 |
174 | for key, schema in task_schemas.items():
175 |
176 | schema_is_valid.update({key: (True, '')})
177 |
178 | key_msg = (f'Unresolved task schema for task "{schema.name}" with method '
179 | f'"{schema.method}" and software "{schema.implementation}".')
180 |
181 | for inp_map in schema.input_map:
182 |
183 | extension_inp_maps = task_input_map.get(key)
184 | msg = (
185 | f'{key_msg} No matching extension function found for the input '
186 | f'map that generates the input file "{inp_map["file"]}".'
187 | )
188 |
189 | if not extension_inp_maps:
190 | reason = (f'No input map function found for input map that generates file'
191 | f' "{inp_map["file"]}". ')
192 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
193 | continue
194 | else:
195 | inp_map_func = extension_inp_maps.get(inp_map['file'])
196 | if not inp_map_func:
197 | raise UnsatisfiedSchemaError(msg)
198 |
199 | # Validate signature of input map function:
200 | try:
201 | validate_input_mapper_func(inp_map_func, inp_map['inputs'])
202 | except TypeError as err:
203 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
204 |
205 | for out_map in schema.output_map:
206 |
207 | extension_out_maps = task_output_map.get(key)
208 | msg = (
209 | f'{key_msg} No matching extension function found for the output '
210 | f'map that generates the output "{out_map["output"]}".'
211 | )
212 |
213 | if not extension_out_maps:
214 | reason = (f'No output map function found for output map that generates '
215 | f'output "{out_map["output"]}". ')
216 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
217 | continue
218 | else:
219 | out_map_func = extension_out_maps.get(out_map['output'])
220 | if not out_map_func:
221 | raise UnsatisfiedSchemaError(msg)
222 |
223 | # Validate signature of output map function:
224 | try:
225 | validate_output_mapper_func(
226 | func=out_map_func,
227 | num_file_paths=len(out_map['files']),
228 | option_names=[i['name'] for i in out_map.get('options', [])],
229 | input_names=[i['name'] for i in out_map.get('inputs', [])],
230 | )
231 | except TypeError as err:
232 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
233 |
234 | if schema.is_func:
235 |
236 | func = task_func_map.get(key)
237 | if not func:
238 | reason = 'No function mapper function found. '
239 | schema_is_valid[key] = (False, schema_is_valid[key][1] + reason)
240 | continue
241 |
242 | # Validate signature of func mapper function:
243 | try:
244 | validate_func_mapper_func(func, schema.input_aliases)
245 | except TypeError as err:
246 | raise UnsatisfiedSchemaError(key_msg + ' ' + str(err)) from None
247 |
248 | return schema_is_valid
249 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pylint
2 | ipykernel
3 | rope
4 | autopep8
5 | twine
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Pip installation script for `matflow`."""
2 |
3 | import os
4 | import re
5 | from setuptools import find_packages, setup
6 |
7 |
8 | def get_version():
9 |
10 | ver_file = 'matflow/_version.py'
11 | with open(ver_file) as handle:
12 | ver_str_line = handle.read()
13 |
14 | ver_pattern = r'^__version__ = [\'"]([^\'"]*)[\'"]'
15 | match = re.search(ver_pattern, ver_str_line, re.M)
16 | if match:
17 | ver_str = match.group(1)
18 | else:
19 | msg = 'Unable to find version string in "{}"'.format(ver_file)
20 | raise RuntimeError(msg)
21 |
22 | return ver_str
23 |
24 |
25 | def get_long_description():
26 |
27 | readme_file = 'README.md'
28 | with open(readme_file, encoding='utf-8') as handle:
29 | contents = handle.read()
30 |
31 | return contents
32 |
33 |
34 | package_data = [
35 | os.path.join(*os.path.join(root, f).split(os.path.sep)[1:])
36 | for root, dirs, files in os.walk(os.path.join('matflow', 'data'))
37 | for f in files
38 | ]
39 |
40 | setup(
41 | name='matflow',
42 | version=get_version(),
43 | description=('Computational workflow management for materials science.'),
44 | long_description=get_long_description(),
45 | long_description_content_type='text/markdown',
46 | author='Adam J. Plowman',
47 | author_email='adam.plowman@manchester.ac.uk',
48 | packages=find_packages(),
49 | package_data={
50 | 'matflow': package_data,
51 | },
52 | install_requires=[
53 | 'matflow-demo-extension',
54 | 'hpcflow>=0.1.16',
55 | 'click>7.0',
56 | 'hickle==4.0.4',
57 | 'h5py==2.10.0',
58 | 'numpy<1.24',
59 | 'sqlalchemy<2',
60 | 'ruamel.yaml==0.16.10',
61 | 'pyperclip',
62 | 'black',
63 | 'autopep8',
64 | ],
65 | project_urls={
66 | 'Github': 'https://github.com/Lightform-group/matflow',
67 | },
68 | classifiers=[
69 | 'Development Status :: 3 - Alpha',
70 | 'Intended Audience :: Science/Research',
71 | 'Topic :: Scientific/Engineering',
72 | 'Programming Language :: Python :: 3.7',
73 | 'Programming Language :: Python :: 3.8',
74 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)',
75 | 'Operating System :: OS Independent',
76 | ],
77 | entry_points="""
78 | [console_scripts]
79 | matflow=matflow.cli:cli
80 | """
81 | )
82 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LightForm-group/matflow/4a40bd27a5c97778bdf902f1a7f47a882c5fb889/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_hicklable.py:
--------------------------------------------------------------------------------
1 | """Module containing unit tests on the `hicklable.to_hicklable` function."""
2 |
3 | import unittest
4 | from tempfile import TemporaryFile
5 |
6 | import numpy as np
7 | import hickle
8 |
9 | from matflow.hicklable import to_hicklable
10 |
11 |
12 | class ConversionTestCase(unittest.TestCase):
13 | """Tests on `to_hicklable`."""
14 |
15 | def test_built_ins(self):
16 | """Test expected output for some built-in types."""
17 |
18 | obj = {
19 | 'a': 1,
20 | 'b': 2.0,
21 | 'c': [3, 4, 5.0],
22 | 'd': (6, 7, 8),
23 | 'e': {9, 10, 11},
24 | 'f': {'f1': 1, 'f2': 2},
25 | 'g': 'hello',
26 | }
27 | obj_expected = {
28 | 'a': 1,
29 | 'b': 2.0,
30 | 'c': [3, 4, 5.0],
31 | 'd': (6, 7, 8),
32 | 'e': {9, 10, 11},
33 | 'f': {'f1': 1, 'f2': 2},
34 | 'g': 'hello',
35 | }
36 | obj_valid = to_hicklable(obj)
37 | self.assertTrue(obj_valid == obj_expected)
38 |
39 | def test_arrays(self):
40 | """Test expected output for some arrays."""
41 |
42 | obj = {
43 | 'int_array': np.array([1, 2, 3]),
44 | 'float_array': np.array([3.3, 2.5, -2.1]),
45 | 'bool_array': np.array([1, 0, 0, 1]).astype(bool),
46 | }
47 | obj_valid = to_hicklable(obj)
48 | self.assertTrue(obj_valid == obj)
49 |
50 | def test_object_dict(self):
51 | """Test expected output for an object with a __dict__ attribute."""
52 |
53 | class myClassObject(object):
54 | def __init__(self, a=1): self.a = a
55 |
56 | my_class_obj = myClassObject(a=3.5)
57 |
58 | obj = {'my_class_obj': my_class_obj}
59 | expected_obj = {'my_class_obj': {'a': 3.5}}
60 | obj_valid = to_hicklable(obj)
61 | self.assertTrue(obj_valid == expected_obj)
62 |
63 | def test_object_slots(self):
64 | """Test expected output for an object with a __slots__ attribute."""
65 |
66 | class myClassObject(object):
67 | __slots__ = ['a']
68 | def __init__(self, a=1): self.a = a
69 |
70 | my_class_obj = myClassObject(a=3.5)
71 |
72 | obj = {'my_class_obj': my_class_obj}
73 | expected_obj = {'my_class_obj': {'a': 3.5}}
74 | obj_valid = to_hicklable(obj)
75 | self.assertTrue(obj_valid == expected_obj)
76 |
77 | def test_object_dict_slots(self):
78 | """Test expected output for an object with __dict__ and __slots__ attributes."""
79 |
80 | class myClassObject(object):
81 | __slots__ = ['a', '__dict__']
82 | def __init__(self, a=1): self.a = a
83 |
84 | my_class_obj = myClassObject(a=3.5)
85 | my_class_obj.b = 2
86 |
87 | obj = {'my_class_obj': my_class_obj}
88 | expected_obj = {'my_class_obj': {'a': 3.5, 'b': 2}}
89 | obj_valid = to_hicklable(obj)
90 | self.assertTrue(obj_valid == expected_obj)
91 |
--------------------------------------------------------------------------------
/tests/test_task.py:
--------------------------------------------------------------------------------
1 | """Module containing unit tests on Task logic."""
2 |
3 | import copy
4 | import unittest
5 |
6 | from matflow.models import TaskSchema
7 | from matflow.models.construction import normalise_local_inputs, get_local_inputs
8 | from matflow.errors import (
9 | IncompatibleSequence,
10 | TaskSchemaError,
11 | TaskParameterError,
12 | SequenceError,
13 | )
14 |
15 | # TODO: add test that warn is issued when an input is in base but also has a sequence.
16 |
17 |
18 | class TaskSchemaTestCase(unittest.TestCase):
19 | """Tests on TaskSchema"""
20 |
21 | def test_raise_on_input_is_output(self):
22 | with self.assertRaises(TaskSchemaError):
23 | TaskSchema('schema_1', inputs=['parameter_1'], outputs=['parameter_1'])
24 |
25 | def test_raise_on_input_map_bad_inputs(self):
26 | """Check inputs defined in the schema input map are in the schema inputs list."""
27 |
28 | with self.assertRaises(TaskSchemaError):
29 | TaskSchema(
30 | 'schema_1',
31 | inputs=['parameter_7', 'parameter_9'],
32 | outputs=['parameter_8'],
33 | input_map=[
34 | {
35 | 'inputs': [
36 | # "parameter_10" is not in the inputs list.
37 | 'parameter_10',
38 | ],
39 | 'file': 'input_file_1',
40 | }
41 | ]
42 | )
43 |
44 | def test_raise_on_output_map_bad_outputs(self):
45 | """Check outputs defined in the schema output map are in the schema outputs list."""
46 |
47 | with self.assertRaises(TaskSchemaError):
48 | TaskSchema(
49 | 'schema_1',
50 | inputs=['parameter_7', 'parameter_9'],
51 | outputs=['parameter_8'],
52 | output_map=[
53 | {
54 | 'files': [
55 | 'output_file_1',
56 | ],
57 | # "parameter_10" is not in the outputs list.
58 | 'output': 'parameter_10',
59 | }
60 | ]
61 | )
62 |
63 |
64 | class TaskParameterTestCase(unittest.TestCase):
65 | """Tests of correct behaviour when defining tasks."""
66 |
67 | def test_raise_on_unknown_input(self):
68 | with self.assertRaises(TaskParameterError):
69 | schema = TaskSchema(
70 | 'schema_1',
71 | inputs=['parameter_1'],
72 | outputs=['parameter_2'],
73 | )
74 | schema.check_surplus_inputs(['parameter_3'])
75 |
76 | def test_raise_on_missing_input(self):
77 | with self.assertRaises(TaskParameterError):
78 | schema = TaskSchema(
79 | 'schema1',
80 | inputs=['parameter_1', 'parameter_2'],
81 | outputs=['parameter_3'],
82 | )
83 | schema.check_missing_inputs(['parameter_2'])
84 |
85 |
86 | class NormaliseLocalTestCase(unittest.TestCase):
87 | """Testing `normalise_local_inputs`."""
88 |
89 | def test_raise_on_bad_nest_idx_float(self):
90 | """Check raises on non-integer (float) nest index for any sequence."""
91 | sequences = [{'name': 'p1', 'nest_idx': 1.0, 'vals': [101, 102]}]
92 | with self.assertRaises(SequenceError):
93 | normalise_local_inputs(sequences=sequences)
94 |
95 | def test_raise_on_bad_nest_idx_string(self):
96 | """Check raises on non-integer (str) nest index for any sequence."""
97 | sequences = [{'name': 'p1', 'nest_idx': '0', 'vals': [101, 102]}]
98 | with self.assertRaises(SequenceError):
99 | normalise_local_inputs(sequences=sequences)
100 |
101 | def test_raise_on_bad_nest_idx_list(self):
102 | """Check raises on non-integer (list) nest index for any sequence."""
103 | sequences = [{'name': 'p1', 'nest_idx': [1, 0], 'vals': [101, 102]}]
104 | with self.assertRaises(SequenceError):
105 | normalise_local_inputs(sequences=sequences)
106 |
107 |
108 | class GetLocalInputsExceptionTestCase(unittest.TestCase):
109 | """Testing exceptions and warnings from `get_local_inputs`."""
110 |
111 | def test_raise_on_missing_nest_idx(self):
112 | """Check raises when more than one sequence, but nest_idx is missing from any
113 | sequence."""
114 | sequences = [
115 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
116 | {'name': 'p3', 'vals': [301, 302]},
117 | ]
118 | with self.assertRaises(SequenceError):
119 | get_local_inputs([], sequences=sequences)
120 |
121 | def test_raise_on_bad_sequence_vals_type_str(self):
122 | """Test raises when sequence vals is a string."""
123 | sequences = [{'name': 'p1', 'vals': '120'}]
124 | with self.assertRaises(SequenceError):
125 | get_local_inputs([], sequences=sequences)
126 |
127 | def test_raise_on_bad_sequence_vals_type_number(self):
128 | """Test raises when sequence vals is a number."""
129 | sequences = [{'name': 'p1', 'vals': 120}]
130 | with self.assertRaises(SequenceError):
131 | get_local_inputs([], sequences=sequences)
132 |
133 | def test_raise_on_bad_sequences_type(self):
134 | """Test raises when sequences is not a list."""
135 | sequences = {'name': 'p1', 'vals': [1, 2]}
136 | with self.assertRaises(SequenceError):
137 | get_local_inputs([], sequences=sequences)
138 |
139 | def test_warn_on_unrequired_nest_idx(self):
140 | """Test warning on unrequired nest idx."""
141 | sequences = [{'name': 'p1', 'vals': [101, 102], 'nest_idx': 0}]
142 | with self.assertWarns(Warning):
143 | get_local_inputs([], sequences=sequences)
144 |
145 | def test_raise_on_bad_sequence_keys(self):
146 | """Test raises when a sequence has unknown keys."""
147 | sequences = [{'name': 'p1', 'vals': [101, 102], 'bad_key': 4}]
148 | with self.assertRaises(SequenceError):
149 | get_local_inputs([], sequences=sequences)
150 |
151 | def test_raise_on_missing_sequence_keys(self):
152 | """Test raises when a sequence has missing keys."""
153 | sequences = [{'vals': [101, 102]}]
154 | with self.assertRaises(SequenceError):
155 | get_local_inputs([], sequences=sequences)
156 |
157 | def test_raise_on_incompatible_nesting(self):
158 | """Test error raised on logically inconsistent Task sequence."""
159 | sequences = [
160 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102]},
161 | {'name': 'p2', 'nest_idx': 0, 'vals': [201]},
162 | ]
163 | with self.assertRaises(IncompatibleSequence):
164 | get_local_inputs([], sequences=sequences)
165 |
166 |
167 | class GetLocalInputsInputsTestCase(unittest.TestCase):
168 | """Tests on the `inputs` dict generated by `get_local_inputs`."""
169 |
170 | def test_base_only(self):
171 | """Check expected output for no sequences."""
172 | base = {'p1': 101}
173 | local_ins = get_local_inputs([], base=base)['inputs']
174 | local_ins_exp = {'p1': {'vals': [101], 'vals_idx': [0]}}
175 | self.assertTrue(local_ins == local_ins_exp)
176 |
177 | def test_base_and_sequence(self):
178 | """Check expected output for base and one sequence."""
179 | base = {'p1': 101}
180 | sequences = [{'name': 'p2', 'vals': [201, 202]}]
181 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
182 | local_ins_exp = {
183 | 'p1': {'vals': [101], 'vals_idx': [0, 0]},
184 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 1]},
185 | }
186 | self.assertTrue(local_ins == local_ins_exp)
187 |
188 | def test_base_and_multi_nested_sequences(self):
189 | """Check expected output for base and two nested sequences."""
190 | base = {'p1': 101}
191 | sequences = [
192 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
193 | {'name': 'p3', 'vals': [301, 302, 303], 'nest_idx': 1},
194 | ]
195 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
196 | local_ins_exp = {
197 | 'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]},
198 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]},
199 | 'p3': {'vals': [301, 302, 303], 'vals_idx': [0, 1, 2, 0, 1, 2]},
200 | }
201 | self.assertTrue(local_ins == local_ins_exp)
202 |
203 | def test_base_and_multi_merged_sequences(self):
204 | """Check expected output for base and two merged sequences."""
205 | base = {'p1': 101}
206 | sequences = [
207 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
208 | {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0},
209 | ]
210 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
211 | local_ins_exp = {
212 | 'p1': {'vals': [101], 'vals_idx': [0, 0]},
213 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 1]},
214 | 'p3': {'vals': [301, 302], 'vals_idx': [0, 1]},
215 | }
216 | self.assertTrue(local_ins == local_ins_exp)
217 |
218 | def test_base_and_merged_and_nested_sequences(self):
219 | """Check expected output for base and two merged sequences."""
220 | base = {'p1': 101}
221 | sequences = [
222 | {'name': 'p2', 'vals': [201, 202], 'nest_idx': 0},
223 | {'name': 'p3', 'vals': [301, 302], 'nest_idx': 0},
224 | {'name': 'p4', 'vals': [401, 402, 403], 'nest_idx': 1},
225 | ]
226 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
227 | local_ins_exp = {
228 | 'p1': {'vals': [101], 'vals_idx': [0, 0, 0, 0, 0, 0]},
229 | 'p2': {'vals': [201, 202], 'vals_idx': [0, 0, 0, 1, 1, 1]},
230 | 'p3': {'vals': [301, 302], 'vals_idx': [0, 0, 0, 1, 1, 1]},
231 | 'p4': {'vals': [401, 402, 403], 'vals_idx': [0, 1, 2, 0, 1, 2]},
232 | }
233 | self.assertTrue(local_ins == local_ins_exp)
234 |
235 | def test_equivalent_relative_nesting_idx(self):
236 | """Check the actual value of `nest_idx` is inconsequential."""
237 | sequences_1 = [
238 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
239 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
240 | ]
241 | sequences_2 = copy.deepcopy(sequences_1)
242 | sequences_2[0]['nest_idx'] = 105
243 | sequences_2[1]['nest_idx'] = 2721
244 |
245 | local_ins_1 = get_local_inputs([], sequences=sequences_1)['inputs']
246 | local_ins_2 = get_local_inputs([], sequences=sequences_2)['inputs']
247 |
248 | self.assertTrue(local_ins_1 == local_ins_2)
249 |
250 | def test_correct_number_of_local_inputs_all_nesting(self):
251 | """Check the correct number of elements for a given input."""
252 | sequences = [
253 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
254 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
255 | ]
256 | local_ins = get_local_inputs([], sequences=sequences)['inputs']
257 | self.assertTrue(len(local_ins['p1']['vals_idx']) == 6)
258 |
259 | def test_all_inputs_local_inputs_size(self):
260 | """Check all inputs have the same number of elements."""
261 | sequences = [
262 | {'name': 'p1', 'nest_idx': 0, 'vals': [101, 102, 103]},
263 | {'name': 'p2', 'nest_idx': 1, 'vals': [201, 202]},
264 | ]
265 | local_ins = get_local_inputs([], sequences=sequences)['inputs']
266 | self.assertTrue(
267 | len(local_ins['p1']['vals_idx']) == len(local_ins['p2']['vals_idx'])
268 | )
269 |
270 | def test_correct_number_of_local_inputs_all_merge(self):
271 | """Check the correct number of local inputs for merging three sequences."""
272 | sequences = [
273 | {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]},
274 | {'name': 'p2', 'nest_idx': 3, 'vals': [201, 202]},
275 | {'name': 'p3', 'nest_idx': 3, 'vals': [301, 302]},
276 | ]
277 | local_ins = get_local_inputs([], sequences=sequences)['inputs']
278 | self.assertTrue(
279 | len(local_ins['p1']['vals_idx']) ==
280 | len(local_ins['p2']['vals_idx']) ==
281 | len(local_ins['p3']['vals_idx']) == 2
282 | )
283 |
284 | def test_correct_number_of_local_inputs_one_merge(self):
285 | """Check the correct number of local inputs for merging/nesting three sequences."""
286 | sequences = [
287 | {'name': 'p1', 'nest_idx': 3, 'vals': [101, 102]},
288 | {'name': 'p2', 'nest_idx': 4, 'vals': [201, 202]},
289 | {'name': 'p3', 'nest_idx': 4, 'vals': [301, 302]},
290 | ]
291 | local_ins = get_local_inputs([], sequences=sequences)['inputs']
292 | self.assertTrue(
293 | len(local_ins['p1']['vals_idx']) ==
294 | len(local_ins['p2']['vals_idx']) ==
295 | len(local_ins['p3']['vals_idx']) == 4
296 | )
297 |
298 | def test_base_is_merged_into_sequence(self):
299 | """Check the base dict is merged into a sequence."""
300 | base = {'p1': 101}
301 | sequences = [{'name': 'p2', 'nest_idx': 0, 'vals': [201, 202]}]
302 | local_ins = get_local_inputs([], base=base, sequences=sequences)['inputs']
303 | self.assertTrue(
304 | local_ins['p1']['vals_idx'] == [0, 0] and
305 | local_ins['p2']['vals_idx'] == [0, 1]
306 | )
307 |
308 | def test_unit_length_sequence(self):
309 | """Check specifying sequences of length one has the same effect as specifying the
310 | parameter in the base dict."""
311 | base = {'p1': 101}
312 | sequences = [{'name': 'p1', 'nest_idx': 0, 'vals': [101]}]
313 | local_ins_1 = get_local_inputs([], sequences=sequences)['inputs']
314 | local_ins_2 = get_local_inputs([], base=base)['inputs']
315 | self.assertTrue(local_ins_1 == local_ins_2)
316 |
317 |
318 | class GetLocalInputsFullTestCase(unittest.TestCase):
319 | """Explicit checks on the full outputs of `get_local_inputs`."""
320 |
321 | def full_test_1(self):
322 | pass
323 |
--------------------------------------------------------------------------------
/tests/test_workflow.py:
--------------------------------------------------------------------------------
1 | """Module containing unit tests on Workflow initialisation."""
2 |
3 | import unittest
4 |
5 | from matflow.errors import IncompatibleWorkflow
6 | from matflow.models import TaskSchema
7 | from matflow.models.construction import get_dependency_idx
8 |
9 | """
10 | tests for inputs/outputs_idx:
11 | - for a variety of scenarios, check all parameters from the same task have the same number of elements_idx.
12 | - for a few scenarios, check expected elements_idx and task_idx.
13 | - check all keys of output (i.e. `task_idx`) are exactly the set of task_idx values in downstream + upstream tasks.
14 | - check works when no upstream tasks.
15 |
16 | tests for resolve_task_num_elements:
17 | - check works when no upstream tasks
18 |
19 | """
20 |
21 |
22 | def init_schemas(task_lst):
23 | """Construct TaskSchema objects for TaskDependencyTestCase tests."""
24 | for idx, i in enumerate(task_lst):
25 | task_lst[idx]['schema'] = TaskSchema(**i['schema'])
26 | return task_lst
27 |
28 |
29 | class TaskDependencyTestCase(unittest.TestCase):
30 | """Tests on `get_dependency_idx`"""
31 |
32 | def test_single_dependency(self):
33 | """Test correct dependency index for a single task dependency."""
34 | task_lst = [
35 | {
36 | 'context': '',
37 | 'schema': {
38 | 'name': 'one',
39 | 'inputs': [
40 | {'name': 'p1', 'context': None},
41 | {'name': 'p2', 'context': None},
42 | ],
43 | 'outputs': ['p3'],
44 | },
45 | },
46 | {
47 | 'context': '',
48 | 'schema': {
49 | 'name': 'one',
50 | 'inputs': [
51 | {'name': 'p3', 'context': None},
52 | {'name': 'p4', 'context': None},
53 | ],
54 | 'outputs': ['p5'],
55 | },
56 | },
57 | ]
58 | dep_idx = get_dependency_idx(init_schemas(task_lst))
59 | dep_idx_exp = [[], [0]]
60 | self.assertTrue(dep_idx == dep_idx_exp)
61 |
62 | def test_single_dependency_two_contexts(self):
63 | """Test single dependencies for two parallel contexts."""
64 | task_lst = [
65 | {
66 | 'context': 'context_A',
67 | 'schema': {
68 | 'name': 'one',
69 | 'inputs': [
70 | {'name': 'p1', 'context': None},
71 | {'name': 'p2', 'context': None},
72 | ],
73 | 'outputs': ['p3'],
74 | },
75 | },
76 | {
77 | 'context': 'context_A',
78 | 'schema': {
79 | 'name': 'one',
80 | 'inputs': [
81 | {'name': 'p3', 'context': None},
82 | {'name': 'p4', 'context': None},
83 | ],
84 | 'outputs': ['p5'],
85 | },
86 | },
87 | {
88 | 'context': 'context_B',
89 | 'schema': {
90 | 'name': 'one',
91 | 'inputs': [
92 | {'name': 'p1', 'context': None},
93 | {'name': 'p2', 'context': None},
94 | ],
95 | 'outputs': ['p3'],
96 | },
97 | },
98 | {
99 | 'context': 'context_B',
100 | 'schema': {
101 | 'name': 'one',
102 | 'inputs': [
103 | {'name': 'p3', 'context': None},
104 | {'name': 'p4', 'context': None},
105 | ],
106 | 'outputs': ['p5'],
107 | },
108 | },
109 | ]
110 | dep_idx = get_dependency_idx(init_schemas(task_lst))
111 | dep_idx_exp = [[], [0], [], [2]]
112 | self.assertTrue(dep_idx == dep_idx_exp)
113 |
114 | def test_two_dependencies(self):
115 | """Test where a task depends on two tasks."""
116 | task_lst = [
117 | {
118 | 'context': 'contextA',
119 | 'schema': {
120 | 'name': 'one',
121 | 'inputs': [
122 | {'name': 'p1', 'context': None},
123 | {'name': 'p2', 'context': None},
124 | ],
125 | 'outputs': ['p3', 'p4'],
126 | },
127 | },
128 | {
129 | 'context': 'contextB',
130 | 'schema': {
131 | 'name': 'one',
132 | 'inputs': [
133 | {'name': 'p1', 'context': None},
134 | {'name': 'p2', 'context': None},
135 | ],
136 | 'outputs': ['p3', 'p4'],
137 | },
138 | },
139 | {
140 | 'context': '',
141 | 'schema': {
142 | 'name': 'one',
143 | 'inputs': [
144 | {'name': 'p3', 'context': 'contextA'},
145 | {'name': 'p4', 'context': 'contextB'},
146 | ],
147 | 'outputs': ['p5'],
148 | },
149 | },
150 | ]
151 | dep_idx = get_dependency_idx(init_schemas(task_lst))
152 | dep_idx_exp = [[], [], [0, 1]]
153 | self.assertTrue(dep_idx == dep_idx_exp)
154 |
155 | def test_raise_on_output_non_exclusivity(self):
156 | """Test raises on multiple tasks that include the same output (and context)."""
157 | task_lst = [
158 | {
159 | 'context': '',
160 | 'schema': {
161 | 'name': 'one',
162 | 'inputs': [
163 | {'name': 'p1', 'context': None},
164 | {'name': 'p2', 'context': None},
165 | ],
166 | 'outputs': ['p3'],
167 | },
168 | },
169 | {
170 | 'context': '',
171 | 'schema': {
172 | 'name': 'two',
173 | 'inputs': [
174 | {'name': 'p4', 'context': None},
175 | ],
176 | 'outputs': ['p3'],
177 | },
178 | },
179 | ]
180 | with self.assertRaises(IncompatibleWorkflow):
181 | get_dependency_idx(init_schemas(task_lst))
182 |
183 | def test_raise_on_circular_reference(self):
184 | """Test raises on circularly referential Tasks."""
185 | task_lst = [
186 | {
187 | 'context': '',
188 | 'schema': {
189 | 'name': 'one',
190 | 'inputs': [
191 | {'name': 'p1', 'context': None},
192 | ],
193 | 'outputs': ['p2'],
194 | },
195 | },
196 | {
197 | 'context': '',
198 | 'schema': {
199 | 'name': 'two',
200 | 'inputs': [
201 | {'name': 'p2', 'context': None},
202 | ],
203 | 'outputs': ['p1'],
204 | },
205 | },
206 | ]
207 | with self.assertRaises(IncompatibleWorkflow):
208 | get_dependency_idx(init_schemas(task_lst))
209 |
--------------------------------------------------------------------------------
/workflow_viz.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
569 |
--------------------------------------------------------------------------------