├── .codacy.yml ├── .editorconfig ├── .gitignore ├── .travis.yml ├── .zenodo.json ├── CHANGELOG.md ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── README.rst ├── codemeta.json ├── docs ├── Makefile ├── _templates │ └── sidebarintro.html ├── adding_outputs.rst ├── adding_workflow_steps.rst ├── conf.py ├── cwl_tips_tricks.rst ├── enable_logging.rst ├── examples.rst ├── images │ ├── add-multiply-example-workflow.png │ └── nlppln-anonymize-workflow.png ├── index.rst ├── installation.rst ├── listing_steps.rst ├── loading_steps.rst ├── make.bat ├── nlppln_anonymize.rst ├── printing_workflows.rst ├── saving_workflows.rst ├── setting_documentation.rst ├── useful_tools.rst ├── user_manual.rst └── workflow_inputs.rst ├── requirements.txt ├── scriptcwl ├── __init__.py ├── examples │ ├── __init__.py │ ├── add.cwl │ ├── add.py │ ├── add_multiply_example.cwl │ ├── multiply.cwl │ └── multiply.py ├── library.py ├── reference.py ├── scriptcwl.py ├── step.py ├── workflow.py └── yamlutils.py ├── setup.cfg ├── setup.py └── tests ├── data ├── echo-no-shebang.cwl ├── echo-wc.workflowstep.cwl ├── echo.scattered.cwl ├── file-names │ ├── echo-with-minuses.cwl │ ├── echo-with-minuses_and_underscores.cwl │ ├── echo_with_minuses-and-underscores.cwl │ └── echo_with_underscores.cwl ├── misc │ ├── align-dir-pack.cwl │ ├── echo2.cwl │ ├── echo3.cwl │ └── non-python-names.cwl ├── tools │ ├── echo.cwl │ ├── multiple-out-args.cwl │ └── wc.cwl └── workflows │ ├── echo-wc.cwl │ ├── echo-wc_inline.cwl │ └── echo-wc_wd.cwl ├── test_library.py ├── test_lint.py ├── test_scriptcwl.py ├── test_step.py ├── test_workflow.py └── test_yamlutils.py /.codacy.yml: -------------------------------------------------------------------------------- 1 | # codacy configuration file 2 | 3 | --- 4 | 5 | exclude_paths: [ 6 | 'tests/**', 7 | 'scriptcwl/examples/**', 8 | ] 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | trim_trailing_whitespace = true 11 | charset = utf-8 12 | 13 | # Matches multiple files with brace expansion notation 14 | # Set default charset 15 | [*.{js,py,java,r,R,html}] 16 | indent_style = space 17 | 18 | # 4 space indentation 19 | [*.{py,java,r,R}] 20 | indent_size = 4 21 | 22 | # 2 space indentation 23 | [*.{js,json,yml,html}] 24 | indent_size = 2 25 | 26 | [*.{md,Rmd}] 27 | trim_trailing_whitespace = false 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | *.eggs 4 | 5 | # data 6 | *.txt 7 | *.out 8 | *.csv 9 | *.xml 10 | 11 | !requirements.txt 12 | 13 | dist 14 | .cache 15 | .pytest_cache 16 | 17 | bower_components 18 | node_modules 19 | 20 | htmlcov 21 | .coverage 22 | .tox 23 | 24 | docs/_build 25 | docs/apidocs 26 | 27 | build 28 | 29 | .DS_Store 30 | .idea/ 31 | venv/ 32 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 4 | - "3.6" 5 | - "3.7" 6 | env: 7 | - CWLTOOL_VERSION=">=1.0.20170727112954,<=1.0.20180721142728" 8 | - CWLTOOL_VERSION=">=1.0.20180721142728,<=1.0.20181102182747" 9 | - CWLTOOL_VERSION=">=1.0.20181102182747,<=1.0.20190228155703" 10 | install: 11 | - pip install -q cwltool$CWLTOOL_VERSION 12 | - python setup.py -q install 13 | #- pip install 'setuptools>=18.5' 14 | #- pip install -r requirements.txt 15 | script: python setup.py test 16 | after_success: 17 | - python-codacy-coverage -r coverage.xml 18 | sudo: false 19 | cache: pip 20 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "creators": [ 3 | { 4 | "affiliation": "Netherlands eScience Center", 5 | "name": "van der Zwaan, Janneke M.", 6 | "orcid": "0000-0002-8329-7000" 7 | }, 8 | { 9 | "affiliation": "Nederlands eScience Center", 10 | "name": "Andela, Bouwe" 11 | }, 12 | { 13 | "affiliation": "Nederlands eScience Center", 14 | "name": "de Vries, Ben" 15 | }, 16 | { 17 | "affiliation": "Netherlands eScience Center", 18 | "name": "Veen, Lourens" 19 | }, 20 | { 21 | "affiliation": "Nederlands eScience Center", 22 | "name": "van Haren, Ronald" 23 | }, 24 | { 25 | "affiliation": "Nederlands eScience Center", 26 | "name": "Verhoeven, Stefan" 27 | }, 28 | { 29 | "affiliation": "Nederlands eScience Center", 30 | "name": "van der Ploeg, Atze" 31 | } 32 | ], 33 | "license": { 34 | "id": "Apache-2.0" 35 | }, 36 | "title": "scriptcwl 0.7.2" 37 | } 38 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## 0.8.1 4 | 5 | Released on August 11, 2019. 6 | 7 | ### Added 8 | 9 | * Support even newer versions of cwltool (<=1.0.20190228155703) 10 | * Support for Python 3.7 11 | 12 | ### Changed 13 | 14 | * Ignore packed workflows when sorting for loading order 15 | 16 | ### Removed 17 | 18 | * Support for Python 2 19 | * Support for Python 3.4 20 | 21 | ## 0.8.0 22 | 23 | ### Added 24 | 25 | * Add `MultipleInputFeatureRequirement` when a step gets a list of inputs (#105; see also #101) 26 | * Convert input and output names with dashes (-) to underscores (\_), so they are valid Python names 27 | * Allow printing of workflows (#86) 28 | * Logging (for debugging) 29 | * Support newer versions of cwltool (#108) 30 | 31 | ### Changed 32 | 33 | * Using booleans to indicate how a workflow is saved is deprecated. Instead, a mode string should be used (e.g., `wf.save('wf.cwl', mode='rel')`) (#87) 34 | * Inline saving of workflows is deprecated. When saving a workflow with `mode='inline'`, the workflow is saved as a packed workflow (#92) 35 | * Make `scatter_method` optional when scattering over a single parameter (#103) 36 | 37 | ## 0.7.2 38 | 39 | ### Added 40 | 41 | * Allow for list of step outputs/wf inputs as step input (#101) 42 | * CFF files with citation metadata 43 | * Link between a step's python name (i.e. how it is called on the WorkflowGenerator object) to step names (#100) 44 | * Allow setting workflow labels 45 | * Allow setting a label for a workflow input 46 | * support for CommandInputEnumSchema as workflow input (#99) 47 | * User manual in documentation 48 | 49 | ### Changed 50 | 51 | * Ensure workflows without a requirements section are loaded into the steps library 52 | * Raise real warning when duplicate cwl step (i.e. having the same file name as another step) is loaded 53 | 54 | ### Removed 55 | 56 | * Method to convert a string to cwl file name 57 | 58 | ## 0.7.1 59 | 60 | ### Added 61 | 62 | * Load tools before workflows when a working directory is used (#94) 63 | * Make sure no duplicate workflow input names are used (#96) 64 | 65 | ### Changed 66 | 67 | * Inputs with a default value are also recognized as optional 68 | 69 | ## 0.7.0 70 | 71 | ### Added 72 | 73 | * Save packed workflows 74 | * Save workflows using a working directory (a solution to the problem of dealing with paths to steps if steps are loaded from different local directories) 75 | 76 | ### Changed 77 | 78 | * Prevent name clashes of embedded (sub)workflows (however, this doesn't work when a (sub)workflow is added multiple times) 79 | * Use name of step in workflow to create unique ids when saving steps inline (#82) 80 | * Allow saving workflows with inline steps for step files without shebang (#83) 81 | * Document feature for adding documentation to a workflow (#81) 82 | * Fix saving of relative paths for workflows with steps from urls 83 | * By default, workflows are saved with absolute paths 84 | 85 | ## 0.6.0 86 | 87 | ### Added 88 | 89 | * Make `WorkflowGenerator` into a context manager (#24) 90 | * Type checking of input and output types (#22) 91 | * Allow saving workflow with inline steps (#38) 92 | * Allow saving workflow with relative paths (#25) 93 | * Documentation on Read the Docs (#35) 94 | * Allow loading of multiple CWL steps (from file, http url, and directory) at the same time 95 | 96 | ### Changed 97 | 98 | * Rename `wf.add_inputs()` to `wf.add_input()` (#11) 99 | 100 | ### Removed 101 | 102 | * Python 3.3 support (Sphinx needs Python 3.4) 103 | 104 | ## 0.5.1 105 | 106 | ### Added 107 | 108 | * Allow addition of default values to workflow inputs (#32) 109 | * List of steps and workflows in steps library is ordered alphabetically 110 | 111 | ## 0.5.0 112 | 113 | ### Added 114 | 115 | * Python 3 compatibility (and testing with [tox](https://tox.readthedocs.io/en/latest/)) 116 | 117 | ## 0.4.0 118 | 119 | ### Added 120 | 121 | * Generate unique names for steps that are added to the workflow more than once (#31) 122 | * Pass all outputs from a step, instead of just one (#27) 123 | * Improve listing of workflow steps 124 | 125 | ## 0.3.1 126 | 127 | ### Added 128 | 129 | * Load ExpressionTools as steps 130 | 131 | ### Changed 132 | 133 | * Preserve the order in which steps were added to the workflow 134 | 135 | ## 0.3.0 136 | 137 | ### Changed 138 | 139 | * Replace pyyaml by ruamel (fixes compatibility with cwltool) 140 | 141 | ## 0.2.0 142 | 143 | ### Added 144 | 145 | * Documentation for WorkflowGenerator and Step (#15). 146 | * Allow step to be scattered (#17) 147 | * Tests (#9) 148 | * Shebang to saved CWL file (#14) 149 | * Preprocess shortcuts in CWL steps (#12) 150 | * Allow workflows to be used as steps (subworkflows) (#4) 151 | * Take into account optional arguments (#6) 152 | 153 | ### Removed 154 | 155 | * Step.get_input() because it was not used (#21) 156 | 157 | ## 0.1.0 158 | 159 | ### Added 160 | 161 | * WorkflowGenerator object that allows users to create CWL workflows. The WorkflowGenerator has functionality to 162 | * load CWL steps from a directory, 163 | * list available CWL steps 164 | * connect the inputs and outputs of CWL steps, 165 | * determine the types of a step's inputs 166 | * specify a workflow's inputs and outputs, and 167 | * add workflow documentation. 168 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) 3 | cff-version: 1.0.3 4 | message: If you use this software, please cite it as below. 5 | title: 'scriptcwl 0.8.1' 6 | doi: 10.5281/zenodo.1160725 7 | authors: 8 | - given-names: "Janneke M." 9 | orcid: 0000-0002-8329-7000 10 | affiliation: Netherlands eScience Center 11 | family-names: Zwaan 12 | name-particle: "van der" 13 | - given-names: Bouwe 14 | affiliation: Nederlands eScience Center 15 | family-names: Andela 16 | - given-names: Ben 17 | affiliation: Nederlands eScience Center 18 | family-names: Vries 19 | name-particle: de 20 | - given-names: Lourens 21 | affiliation: Netherlands eScience Center 22 | family-names: Veen 23 | - given-names: Ronald 24 | affiliation: Nederlands eScience Center 25 | family-names: Haren 26 | name-particle: van 27 | - given-names: Stefan 28 | affiliation: Nederlands eScience Center 29 | family-names: Verhoeven 30 | - given-names: Atze 31 | affiliation: Nederlands eScience Center 32 | family-names: Ploeg 33 | name-particle: "van der" 34 | version: 0.8.1 35 | date-released: 2018-01-26 36 | repository-code: https://github.com/NLeSC/scriptcwl 37 | license: Apache-2.0 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Include the license file 2 | include LICENSE 3 | # setup.py needs readme for long_description 4 | include README.md 5 | include scriptcwl/examples/*.cwl 6 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | scriptcwl 2 | ========= 3 | 4 | |codacy_grade| |codacy_coverage| |travis| |documentation| |pypi_version| |pypi_supported| |zenodo| 5 | 6 | scriptcwl is a Python package for creating workflows in 7 | `Common Workflow Language (CWL) `_. If you give it a number of CWL 8 | ``CommandLineTools``, you can create a workflow by writing a Python script. This can 9 | be done interactively using `Jupyter Notebooks `_. The full 10 | documentation can be found on `Read the Docs `_. 11 | 12 | .. image:: docs/images/add-multiply-example-workflow.png 13 | :alt: add multiply example workflow 14 | :align: center 15 | 16 | Given CWL ``CommandLineTools`` for ``add`` and ``multiply`` (these are available 17 | in `scriptcwl `_), 18 | a CWL specification of this workflow can be written as: 19 | 20 | .. code-block:: python 21 | 22 | from scriptcwl import WorkflowGenerator 23 | 24 | with WorkflowGenerator() as wf: 25 | wf.load(steps_dir='/path_to_scriptcwl/scriptcwl/examples/') 26 | 27 | num1 = wf.add_input(num1='int') 28 | num2 = wf.add_input(num2='int') 29 | 30 | answer1 = wf.add(x=num1, y=num2) 31 | answer2 = wf.multiply(x=answer1, y=num2) 32 | 33 | wf.add_outputs(final_answer=answer2) 34 | 35 | wf.save('add_multiply_example_workflow.cwl') 36 | 37 | The workflow has two integers as inputs (``num1`` and ``num2``), and first adds 38 | these two numbers (``wf.add(x=num1, y=num2)``), and then multiplies the answer 39 | with the second input (``num2``). The result of that processing step is the output 40 | of the workflow. Finally, the workflow is saved to a file. The result looks like: 41 | 42 | .. code-block:: sh 43 | 44 | #!/usr/bin/env cwl-runner 45 | cwlVersion: v1.0 46 | class: Workflow 47 | inputs: 48 | num1: int 49 | num2: int 50 | outputs: 51 | final_answer: 52 | type: int 53 | outputSource: multiply/answer 54 | steps: 55 | add: 56 | run: add.cwl 57 | in: 58 | y: num2 59 | x: num1 60 | out: 61 | - answer 62 | multiply: 63 | run: multiply.cwl 64 | in: 65 | y: num2 66 | x: add/answer 67 | out: 68 | - answer 69 | 70 | The Python and CWL files used in the example can be found in the `examples folder `_. 71 | 72 | Installation 73 | ############ 74 | 75 | Install using pip: 76 | 77 | .. code-block:: sh 78 | 79 | pip install scriptcwl 80 | 81 | 82 | For development: 83 | 84 | .. code-block:: sh 85 | 86 | git clone git@github.com:NLeSC/scriptcwl.git 87 | cd scriptcwl 88 | python setup.py develop 89 | 90 | Run tests (including coverage) with: 91 | 92 | .. code-block:: sh 93 | 94 | python setup.py test 95 | 96 | Useful tools 97 | ############ 98 | 99 | To use scriptcwl for creating CWL workflows, you need CWL ``CommandLineTools``. 100 | There are some software packages that help with generating those 101 | for existing command line tools written in Python: 102 | 103 | * `argparse2tool `_: Generate CWL CommandLineTool wrappers (and/or Galaxy tool descriptions) from Python programs that use argparse. Also supports the `click `_ argument parser. 104 | * `pypi2cwl `_: Automatically run argparse2cwl on any package in PyPi. 105 | * `python-cwlgen `_: Generate CommandLineTool and DockerRequirement programmatically 106 | 107 | License 108 | ####### 109 | 110 | Copyright (c) 2016-2018, Netherlands eScience Center, University of Twente 111 | 112 | Licensed under the Apache License, Version 2.0 (the "License"); 113 | you may not use this file except in compliance with the License. 114 | You may obtain a copy of the License at 115 | 116 | http://www.apache.org/licenses/LICENSE-2.0 117 | 118 | Unless required by applicable law or agreed to in writing, software 119 | distributed under the License is distributed on an "AS IS" BASIS, 120 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121 | See the License for the specific language governing permissions and 122 | limitations under the License. 123 | 124 | .. |codacy_grade| image:: https://api.codacy.com/project/badge/Grade/8f383bca18384d8187c10c27affa9d53 125 | :target: https://www.codacy.com/app/jvdzwaan/scriptcwl?utm_source=github.com&utm_medium=referral&utm_content=NLeSC/scriptcwl&utm_campaign=Badge_Grade 126 | 127 | .. |codacy_coverage| image:: https://api.codacy.com/project/badge/Coverage/8f383bca18384d8187c10c27affa9d53 128 | :target: https://www.codacy.com/app/jvdzwaan/scriptcwl?utm_source=github.com&utm_medium=referral&utm_content=NLeSC/scriptcwl&utm_campaign=Badge_Coverage 129 | 130 | .. |travis| image:: https://travis-ci.org/NLeSC/scriptcwl.svg?branch=master 131 | :target: https://travis-ci.org/NLeSC/scriptcwl 132 | 133 | .. |documentation| image:: https://readthedocs.org/projects/scriptcwl/badge/?version=latest 134 | :target: http://scriptcwl.readthedocs.io/en/latest/?badge=latest 135 | 136 | .. |pypi_version| image:: https://badge.fury.io/py/scriptcwl.svg 137 | :target: https://badge.fury.io/py/scriptcwl 138 | 139 | .. |pypi_supported| image:: https://img.shields.io/pypi/pyversions/scriptcwl.svg 140 | :target: https://pypi.python.org/pypi/scriptcwl 141 | 142 | .. |zenodo| image:: https://zenodo.org/badge/70679474.svg 143 | :target: https://zenodo.org/badge/latestdoi/70679474 144 | :alt: DOI 145 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": [ 3 | "https://doi.org/10.5063/schema/codemeta-2.0", 4 | "http://schema.org" 5 | ], 6 | "@type": "SoftwareSourceCode", 7 | "author": [ 8 | { 9 | "@id": "0000-0002-8329-7000", 10 | "@type": "Person", 11 | "affiliation": { 12 | "@type": "Organization", 13 | "legalName": "Netherlands eScience Center" 14 | }, 15 | "familyName": "van der Zwaan", 16 | "givenName": "Janneke M." 17 | }, 18 | { 19 | "@type": "Person", 20 | "affiliation": { 21 | "@type": "Organization", 22 | "legalName": "Nederlands eScience Center" 23 | }, 24 | "familyName": "Andela", 25 | "givenName": "Bouwe" 26 | }, 27 | { 28 | "@type": "Person", 29 | "affiliation": { 30 | "@type": "Organization", 31 | "legalName": "Nederlands eScience Center" 32 | }, 33 | "familyName": "de Vries", 34 | "givenName": "Ben" 35 | }, 36 | { 37 | "@type": "Person", 38 | "affiliation": { 39 | "@type": "Organization", 40 | "legalName": "Netherlands eScience Center" 41 | }, 42 | "familyName": "Veen", 43 | "givenName": "Lourens" 44 | }, 45 | { 46 | "@type": "Person", 47 | "affiliation": { 48 | "@type": "Organization", 49 | "legalName": "Nederlands eScience Center" 50 | }, 51 | "familyName": "van Haren", 52 | "givenName": "Ronald" 53 | }, 54 | { 55 | "@type": "Person", 56 | "affiliation": { 57 | "@type": "Organization", 58 | "legalName": "Nederlands eScience Center" 59 | }, 60 | "familyName": "Verhoeven", 61 | "givenName": "Stefan" 62 | }, 63 | { 64 | "@type": "Person", 65 | "affiliation": { 66 | "@type": "Organization", 67 | "legalName": "Nederlands eScience Center" 68 | }, 69 | "familyName": "van der Ploeg", 70 | "givenName": "Atze" 71 | } 72 | ], 73 | "codeRepository": "https://github.com/NLeSC/scriptcwl", 74 | "license": "http://www.apache.org/licenses/LICENSE-2.0", 75 | "name": "scriptcwl 0.8.1" 76 | } 77 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = scriptcwl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_templates/sidebarintro.html: -------------------------------------------------------------------------------- 1 |

About

2 |

3 | Scriptcwl is a Python package for creating Common Workflow Language (CWL) workflows. 4 |

5 |

Useful Links

6 | 12 | -------------------------------------------------------------------------------- /docs/adding_outputs.rst: -------------------------------------------------------------------------------- 1 | Specifying workflow outputs 2 | =========================== 3 | 4 | When all steps of the workflow have been added, the user can specify 5 | workflow outputs by calling ``wf.add_outputs()``: 6 | :: 7 | 8 | wf.add_outputs(final_answer=answer2) 9 | -------------------------------------------------------------------------------- /docs/adding_workflow_steps.rst: -------------------------------------------------------------------------------- 1 | Adding workflow steps 2 | ===================== 3 | 4 | After loading steps and adding workflow inputs, the steps of the workflow should 5 | be specified. To add a step to a workflow, its method must 6 | be called on the ``WorkflowGenerator`` object. For example, to add a step 7 | called ``add`` [#]_ to the workflow, the following method must be called: 8 | :: 9 | 10 | answer1 = wf.add(x=num1, y=num2) 11 | 12 | The method expects a list of ``key=value`` pairs as input parameters. (To find 13 | out what inputs a step needs call ``wf.inputs()``. This method prints 14 | all inputs and their types.) ``wf.()`` returns a string if the step has 15 | a single output and a tuple of strings if the step has multiple output parameters: 16 | :: 17 | 18 | output1, output2 = wf.someStep(input=input) 19 | 20 | The order of the outputs is the same as in the step specification, and can be 21 | determined by printing the step signatures using ``print(wf.list_steps())``. 22 | 23 | The strings returned by ``wf.()`` contain output 24 | names that can be used as input for later steps, or that can be connected 25 | to workflow outputs. For example, in a later step, ``answer1`` can be used as input: 26 | :: 27 | 28 | answer2 = wf.multiply(x=answer1, y=num2) 29 | 30 | Scattering steps 31 | ################ 32 | 33 | Scriptcwl supports `scattering steps `_. 34 | To scatter a step, keyword arguments 35 | ``scatter`` and ``scatter_method`` must be provided when a step is added to the 36 | workflow. To scatter a step called ``echo``, which has a single input argument 37 | ``message``, this would look like: 38 | :: 39 | 40 | output = wf.echo(message=input1, scatter='message', scatter_method='dotproduct') 41 | 42 | The type of ``message``, should be array (e.g., an array of strings). 43 | 44 | To scatter over multiple variables, ``scatter`` also accepts a list of input names: 45 | :: 46 | 47 | output = wf.echo(message1=input1, message2=input2, scatter=['message1', 'message2'], scatter_method='dotproduct') 48 | 49 | .. [#] Scriptcwl contains two example command line tools, ``add`` and ``multiply``. The Python and CWL files can be found in the `examples folder `_. 50 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # scriptcwl documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Nov 13 15:12:14 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | 22 | from recommonmark.parser import CommonMarkParser 23 | 24 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = ['sphinx.ext.autodoc'] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix(es) of source filenames. 41 | # You can specify multiple suffix as a list of string: 42 | # 43 | source_parsers = { 44 | '.md': CommonMarkParser, 45 | } 46 | source_suffix = ['.rst', '.md'] 47 | # source_suffix = '.rst' 48 | 49 | # The master toctree document. 50 | master_doc = 'index' 51 | 52 | # General information about the project. 53 | project = u'scriptcwl' 54 | copyright = u'2017, Netherlands eScience Center' 55 | author = u'Netherlands eScience Center' 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | version = u'0.8' 63 | # The full version, including alpha/beta/rc tags. 64 | release = u'0.8.1' 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | # 69 | # This is also used if you do content translation via gettext catalogs. 70 | # Usually you set "language" from the command line for these cases. 71 | language = None 72 | 73 | # List of patterns, relative to source directory, that match files and 74 | # directories to ignore when looking for source files. 75 | # This patterns also effect to html_static_path and html_extra_path 76 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 77 | 78 | # The name of the Pygments (syntax highlighting) style to use. 79 | pygments_style = 'sphinx' 80 | 81 | # If true, `todo` and `todoList` produce output, else they produce nothing. 82 | todo_include_todos = False 83 | 84 | # Also document constructors. 85 | autoclass_content = 'both' 86 | 87 | 88 | # -- Run apidoc plug-in manually, as readthedocs doesn't support it ------- 89 | # See https://github.com/rtfd/readthedocs.org/issues/1139 90 | def run_apidoc(_): 91 | here = os.path.dirname(__file__) 92 | out = os.path.abspath(os.path.join(here, 'apidocs')) 93 | src = os.path.abspath(os.path.join(here, '..', 'scriptcwl')) 94 | 95 | ignore_paths = [] 96 | 97 | argv = [ 98 | "-f", 99 | "-T", 100 | "-e", 101 | "-M", 102 | "-o", out, 103 | src 104 | ] + ignore_paths 105 | 106 | try: 107 | # Sphinx 1.7+ 108 | from sphinx.ext import apidoc 109 | apidoc.main(argv) 110 | except ImportError: 111 | # Sphinx 1.6 (and earlier) 112 | from sphinx import apidoc 113 | argv.insert(0, apidoc.__file__) 114 | apidoc.main(argv) 115 | 116 | 117 | def setup(app): 118 | app.connect('builder-inited', run_apidoc) 119 | 120 | 121 | # -- Options for HTML output ---------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | # 126 | html_theme = 'alabaster' 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # 132 | # html_theme_options = {} 133 | 134 | # Add any paths that contain custom static files (such as style sheets) here, 135 | # relative to this directory. They are copied after the builtin static files, 136 | # so a file named "default.css" will overwrite the builtin "default.css". 137 | html_static_path = ['_static'] 138 | 139 | # Custom sidebar templates, must be a dictionary that maps document names 140 | # to template names. 141 | # 142 | # This is required for the alabaster theme 143 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 144 | html_sidebars = { 145 | 'index': ['sidebarintro.html', 'sourcelink.html', 'searchbox.html'], 146 | '**': ['localtoc.html', 'relations.html', 147 | 'sourcelink.html', 'searchbox.html'] 148 | } 149 | 150 | # -- Options for HTMLHelp output ------------------------------------------ 151 | 152 | # Output file base name for HTML help builder. 153 | htmlhelp_basename = 'scriptcwldoc' 154 | 155 | 156 | # -- Options for LaTeX output --------------------------------------------- 157 | 158 | latex_elements = { 159 | # The paper size ('letterpaper' or 'a4paper'). 160 | # 161 | # 'papersize': 'letterpaper', 162 | 163 | # The font size ('10pt', '11pt' or '12pt'). 164 | # 165 | # 'pointsize': '10pt', 166 | 167 | # Additional stuff for the LaTeX preamble. 168 | # 169 | # 'preamble': '', 170 | 171 | # Latex figure (float) alignment 172 | # 173 | # 'figure_align': 'htbp', 174 | } 175 | 176 | # Grouping the document tree into LaTeX files. List of tuples 177 | # (source start file, target name, title, 178 | # author, documentclass [howto, manual, or own class]). 179 | latex_documents = [ 180 | (master_doc, 'scriptcwl.tex', u'scriptcwl Documentation', 181 | u'Netherlands eScience Center', 'manual'), 182 | ] 183 | 184 | 185 | # -- Options for manual page output --------------------------------------- 186 | 187 | # One entry per manual page. List of tuples 188 | # (source start file, name, description, authors, manual section). 189 | man_pages = [ 190 | (master_doc, 'scriptcwl', u'scriptcwl Documentation', 191 | [author], 1) 192 | ] 193 | 194 | 195 | # -- Options for Texinfo output ------------------------------------------- 196 | 197 | # Grouping the document tree into Texinfo files. List of tuples 198 | # (source start file, target name, title, author, 199 | # dir menu entry, description, category) 200 | texinfo_documents = [ 201 | (master_doc, 'scriptcwl', u'scriptcwl Documentation', 202 | author, 'scriptcwl', 'One line description of project.', 203 | 'Miscellaneous'), 204 | ] 205 | -------------------------------------------------------------------------------- /docs/cwl_tips_tricks.rst: -------------------------------------------------------------------------------- 1 | CWL Tips and Tricks 2 | =================== 3 | 4 | Have a look at the `CWL User Guide: Recommended Practices 5 | `_. 6 | 7 | Generate yaml file with workflow inputs 8 | ####################################### 9 | 10 | You can use ``cwltool --make-template`` to generate a yaml file with all the workflow inputs: 11 | :: 12 | 13 | cwltool --make-template add_multiply_example.cwl > inputs.yml 14 | 15 | ``inputs.yml`` contains: 16 | :: 17 | 18 | num1: 0 19 | num2: 0 20 | 21 | Use your favorite text editor to set the inputs to appropriate values. Save the 22 | file, and use it as input for your workflow: 23 | :: 24 | 25 | cwltool add_multiply_example.cwl inputs.yml 26 | 27 | Using cwl-runner 28 | ################ 29 | 30 | Install the ``cwlref-runner`` package to set ``cwl-runner`` to ``cwltool``: 31 | :: 32 | 33 | pip install cwlref-runner 34 | 35 | If ``cwl-runner`` is set, you can run workflows by typing: 36 | :: 37 | 38 | chmod +x workflow.cwl 39 | ./workflow.cwl 40 | 41 | If you have other CWL implementations installed and want ``cwl-runner`` to use one 42 | of these implementations, you should define a symlink that points to the implementation 43 | you want to use; e.g., by manually creating a symlink and adding it to your ``$PATH`` 44 | variable, or by using the linux `alternatives `_ system. 45 | -------------------------------------------------------------------------------- /docs/enable_logging.rst: -------------------------------------------------------------------------------- 1 | Enable logging for debugging 2 | ============================ 3 | 4 | If you get errors while creating workflows, and scriptcwl doesn't give you a 5 | proper error message, you might want to enable logging to try and figure out 6 | what goes wrong. 7 | 8 | To enable logging, do: 9 | 10 | :: 11 | 12 | import logging 13 | logging.basicConfig(format="%(asctime)s [%(process)d] %(levelname)-8s " 14 | "%(name)s,%(lineno)s\t%(message)s") 15 | logging.getLogger().setLevel('DEBUG') 16 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | Example workflows 4 | ================= 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | nlppln_anonymize 10 | -------------------------------------------------------------------------------- /docs/images/add-multiply-example-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLeSC/scriptcwl/d2dcfa0686eaa893dd4b3efbd232e9bd1e15194d/docs/images/add-multiply-example-workflow.png -------------------------------------------------------------------------------- /docs/images/nlppln-anonymize-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLeSC/scriptcwl/d2dcfa0686eaa893dd4b3efbd232e9bd1e15194d/docs/images/nlppln-anonymize-workflow.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. scriptcwl documentation master file, created by 2 | sphinx-quickstart on Mon Nov 13 15:12:14 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the Scriptcwl Documentation! 7 | ======================================= 8 | 9 | Scriptcwl is a Python package for creating `Common Workflow Language (CWL) `_ workflows. 10 | 11 | .. image:: images/add-multiply-example-workflow.png 12 | :alt: add multiply example workflow 13 | :align: center 14 | 15 | Given CWL ``CommandLineTools`` for ``add`` and ``multiply`` (these are available 16 | in `scriptcwl `_), 17 | a CWL specification of this workflow can be written as: 18 | :: 19 | 20 | from scriptcwl import WorkflowGenerator 21 | 22 | with WorkflowGenerator() as wf: 23 | wf.load(steps_dir='/path_to_scriptcwl/scriptcwl/examples/') 24 | 25 | num1 = wf.add_input(num1='int') 26 | num2 = wf.add_input(num2='int') 27 | 28 | answer1 = wf.add(x=num1, y=num2) 29 | answer2 = wf.multiply(x=answer1, y=num2) 30 | 31 | wf.add_outputs(final_answer=answer2) 32 | 33 | wf.save('add_multiply_example_workflow.cwl') 34 | 35 | The workflow has two integers as inputs (``num1`` and ``num2``), and first adds 36 | these two numbers (``wf.add(x=num1, y=num2)``), and then multiplies the answer 37 | with the second input (``num2``). The result of that processing step is the output 38 | of the workflow. Finally, the workflow is saved to a file. The result looks like: 39 | 40 | .. code-block:: none 41 | 42 | #!/usr/bin/env cwl-runner 43 | cwlVersion: v1.0 44 | class: Workflow 45 | inputs: 46 | num1: int 47 | num2: int 48 | outputs: 49 | final_answer: 50 | type: int 51 | outputSource: multiply/answer 52 | steps: 53 | add: 54 | run: add.cwl 55 | in: 56 | y: num2 57 | x: num1 58 | out: 59 | - answer 60 | multiply: 61 | run: multiply.cwl 62 | in: 63 | y: num2 64 | x: add/answer 65 | out: 66 | - answer 67 | 68 | More examples of workflows created using scriptcwl can be found under :ref:`examples`. 69 | 70 | Contents 71 | ======== 72 | 73 | .. toctree:: 74 | :maxdepth: 3 75 | 76 | user_manual 77 | installation 78 | examples 79 | useful_tools 80 | cwl_tips_tricks 81 | 82 | API Reference 83 | ============= 84 | 85 | .. toctree:: 86 | :maxdepth: 2 87 | 88 | scriptcwl 89 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | * pip 5 | 6 | Install using pip: 7 | 8 | .. code-block:: sh 9 | 10 | pip install scriptcwl 11 | 12 | For development: 13 | 14 | .. code-block:: sh 15 | 16 | git clone git@github.com:NLeSC/scriptcwl.git 17 | cd scriptcwl 18 | python setup.py develop 19 | 20 | Run tests (including coverage) with: 21 | 22 | .. code-block:: sh 23 | 24 | python setup.py test 25 | 26 | * conda 27 | * Windows issues 28 | * for development 29 | -------------------------------------------------------------------------------- /docs/listing_steps.rst: -------------------------------------------------------------------------------- 1 | Listing steps 2 | ============= 3 | 4 | Steps that are loaded into the WorkflowGenerator's steps library can be listed by running: 5 | :: 6 | 7 | print(wf.list_steps()) 8 | 9 | For the example workflow, the output would be: 10 | :: 11 | 12 | Steps 13 | add...................... answer = wf.add(x, y) 14 | multiply................. answer = wf.multiply(x, y) 15 | 16 | Workflows 17 | 18 | This means that there are two processing steps and no (sub)workflows loaded into the 19 | steps library. The listing contains the complete command to add the step to the workflow 20 | (e.g., ``answer = wf.add(x, y)``). The command is supplied for convenient copy/pasting. 21 | -------------------------------------------------------------------------------- /docs/loading_steps.rst: -------------------------------------------------------------------------------- 1 | Loading steps 2 | ============= 3 | 4 | Before you can create workflows with scriptcwl, you need to load processing steps 5 | (i.e., ``CommandLineTools``, ``ExpressionTools`` and/or (sub) ``Workflows``). 6 | To load a directory of .cwl files, type: 7 | :: 8 | 9 | from scriptcwl import WorkflowGenerator 10 | 11 | with WorkflowGenerator() as wf: 12 | wf.load(steps_dir='/path/to/dir/with/cwl/steps/') 13 | 14 | To load a single cwl file, do: 15 | :: 16 | 17 | with WorkflowGenerator() as wf: 18 | wf.load(step_file='/path/to/workflow.cwl') 19 | 20 | The path to the ``step_file`` can be a local file path or a url. 21 | 22 | You can also load a list of step files and directories: 23 | :: 24 | 25 | al_my_steps = ['step.cwl', 'url.cwl', '/path/to/directory/'] 26 | with WorkflowGenerator() as wf: 27 | wf.load(step_list=all_my_steps) 28 | 29 | ``wf.load()`` can be called multiple times. Step files are added to the 30 | steps library one after the other. For every step that is added to the 31 | steps library, a method with the same name is added to the 32 | WorkflowGenerator object. To add a step to the workflow, this method must 33 | be called (examples below). 34 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=scriptcwl 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/nlppln_anonymize.rst: -------------------------------------------------------------------------------- 1 | Remove named entities from a directory of text files 2 | ==================================================== 3 | 4 | In this example, we create a pipeline that replaces named entities in a collection 5 | of (Dutch) text documents. 6 | Named entities are objects in text referred to by proper names, such as persons, 7 | organizations, and locations. In the workflow, named entities will be 8 | replaced with their named entity type (i.e., PER (person), ORG (organization), 9 | LOC (location), or UNSP (unspecified)). 10 | The workflow can be used as part of a data anonimization procedure. 11 | 12 | The workflow consists of the following steps: 13 | 14 | * Extract named entities from text documents using an existing tool called `frog `_ 15 | * Convert frog output to `SAF, a generic representation for text data `_ 16 | * Aggregate data about named entities that occur in the text files 17 | * Replace named entities with their named entity type in the SAF documents 18 | * Convert SAF documents to text 19 | 20 | All steps required for this workflow are available through `nlppln `_. 21 | 22 | Workflow 23 | ######## 24 | 25 | .. image:: images/nlppln-anonymize-workflow.png 26 | :alt: add multiply example workflow 27 | :align: center 28 | 29 | Scriptcwl script 30 | ################ 31 | 32 | :: 33 | 34 | from scriptcwl import WorkflowGenerator 35 | 36 | with WorkflowGenerator() as wf: 37 | wf.load(steps_dir='/path/to/dir/with/cwl/steps/') 38 | 39 | doc = """Workflow that replaces named entities in text files. 40 | 41 | Input: 42 | txt_dir: directory containing text files 43 | 44 | Output: 45 | ner_stats: csv-file containing statistics about named entities in the text files 46 | txt: text files with named enities replaced 47 | """ 48 | wf.set_documentation(doc) 49 | 50 | txt_dir = wf.add_inputs(txt_dir='Directory') 51 | 52 | frogout = wf.frog_dir(in_files=txt_dir) 53 | saf = wf.frog_to_saf(in_files=frogout) 54 | ner_stats = wf.save_ner_data(in_files=saf) 55 | new_saf = wf.replace_ner(metadata=ner_stats, in_files=saf) 56 | txt = wf.saf_to_txt(in_files=new_saf) 57 | 58 | wf.add_outputs(ner_stats=ner_stats, txt=txt) 59 | 60 | wf.save('anonymize.cwl') 61 | 62 | 63 | CWL workflow 64 | ############ 65 | 66 | :: 67 | 68 | cwlVersion: v1.0 69 | class: Workflow 70 | inputs: 71 | txt-dir: Directory 72 | mode: string? 73 | 74 | outputs: 75 | ner_stats: 76 | type: File 77 | outputSource: save-ner-data/ner_statistics 78 | 79 | out_files: 80 | type: 81 | type: array 82 | items: File 83 | outputSource: saf-to-txt/out_files 84 | 85 | steps: 86 | frog-ner: 87 | run: frog-dir.cwl 88 | in: 89 | dir_in: txt-dir 90 | out: [frogout] 91 | 92 | frog-to-saf: 93 | run: frog-to-saf.cwl 94 | in: 95 | in_files: frog-ner/frogout 96 | out: [saf] 97 | 98 | save-ner-data: 99 | run: save-ner-data.cwl 100 | in: 101 | in_files: frog-to-saf/saf 102 | out: [ner_statistics] 103 | 104 | replace-ner: 105 | run: replace-ner.cwl 106 | in: 107 | metadata: save-ner-data/ner_statistics 108 | in_files: frog-to-saf/saf 109 | mode: mode 110 | out: [out_files] 111 | 112 | saf-to-txt: 113 | run: saf-to-txt.cwl 114 | in: 115 | in_files: replace-ner/out_files 116 | out: [out_files] 117 | -------------------------------------------------------------------------------- /docs/printing_workflows.rst: -------------------------------------------------------------------------------- 1 | Printing workflows 2 | ================== 3 | 4 | To view its contents, a workflow can be printed at any time: 5 | 6 | .. code-block:: python 7 | 8 | with scriptcwl.WorkflowGenerator() as wf: 9 | print(wf) 10 | 11 | For an empty workflow, this looks like: 12 | 13 | .. code-block:: none 14 | 15 | #!/usr/bin/env cwl-runner 16 | cwlVersion: v1.0 17 | class: Workflow 18 | inputs: {} 19 | outputs: {} 20 | steps: {} 21 | 22 | In a printed workflow, steps are referred to by their absolute paths. 23 | **Therefore, do not use this method for saving workflows. 24 | The absolute paths make them unportable.** 25 | -------------------------------------------------------------------------------- /docs/saving_workflows.rst: -------------------------------------------------------------------------------- 1 | Saving workflows 2 | ================ 3 | 4 | To save a workflow call the ``WorkflowGenerator.save()`` method: 5 | :: 6 | 7 | wf.save('workflow.cwl') 8 | 9 | By default, the paths in the ``run`` field of workflow steps are absolute. This means 10 | that a workflow created on one machine cannot be run on another machine. However, 11 | there are multiple options for creating portable workflows. 12 | 13 | Saving workflows with relative paths 14 | #################################### 15 | 16 | To get relative paths in the ``run`` field of workflow steps, use ``mode='rel'``: 17 | :: 18 | 19 | wf.save('workflow.cwl', mode='rel') 20 | 21 | The paths in the ``run`` field are relative to where the workflow is saved. This 22 | option is convenient when you are creating workflows using a single directory 23 | with possible workflow steps. 24 | 25 | Using a working directory 26 | ######################### 27 | 28 | If you have multiple directories containing workflow steps and the locations of 29 | these directories may differ depending on where software is installed (for example, 30 | if you want to use the generic NLP steps from nlppln, but also need project specific 31 | data processing steps), it is possible to specify a working directory when creating 32 | the ``WorkflowGenerator`` object. If you this, all steps are copied to the working 33 | directory. When you save the workflow using ``mode='wd'``, the paths in the ``run`` 34 | fields are set to the basename of the step (because all steps are in the same 35 | directory). 36 | :: 37 | 38 | from scriptcwl import WorkflowGenerator 39 | 40 | with WorkflowGenerator(working_dir='path/to/working_dir') as wf: 41 | wf.load(steps_dir='some/path/') 42 | wf.load(steps_dir='some/other/path/') 43 | 44 | # add inputs, steps and outputs 45 | 46 | wf.save('workflow', mode='wd') 47 | 48 | The workflow is saved in the working directory and then copied to 49 | the specified location. To be able to run the workflow, use the copy in the 50 | working directory (please note that the working directory is not deleted automatically). 51 | 52 | Also, steps from urls are not copied to the working directory. 53 | 54 | Pack workflows 55 | ############## 56 | 57 | Another way to create workflows with all steps in one file is to save it with ``mode='pack'``: 58 | :: 59 | 60 | wf.save('workflow.cwl', mode='pack') 61 | 62 | Please note that packed workflows cannot be used as a building block in ``scriptcwl``. 63 | If you try to load a packed workflow, you will get a warning. 64 | 65 | Saved With ``mode='pack'``, the example workflow looks like: 66 | :: 67 | 68 | { 69 | "cwlVersion": "v1.0", 70 | "$graph": [ 71 | { 72 | "class": "CommandLineTool", 73 | "baseCommand": [ 74 | "python", 75 | "-m", 76 | "scriptcwl.examples.add" 77 | ], 78 | "inputs": [ 79 | { 80 | "type": "int", 81 | "inputBinding": { 82 | "position": 1 83 | }, 84 | "id": "#add.cwl/x" 85 | }, 86 | { 87 | "type": "int", 88 | "inputBinding": { 89 | "position": 2 90 | }, 91 | "id": "#add.cwl/y" 92 | } 93 | ], 94 | "stdout": "cwl.output.json", 95 | "outputs": [ 96 | { 97 | "type": "int", 98 | "id": "#add.cwl/answer" 99 | } 100 | ], 101 | "id": "#add.cwl" 102 | }, 103 | { 104 | "class": "CommandLineTool", 105 | "baseCommand": [ 106 | "python", 107 | "-m", 108 | "scriptcwl.examples.multiply" 109 | ], 110 | "inputs": [ 111 | { 112 | "type": "int", 113 | "inputBinding": { 114 | "position": 1 115 | }, 116 | "id": "#multiply.cwl/x" 117 | }, 118 | { 119 | "type": "int", 120 | "inputBinding": { 121 | "position": 2 122 | }, 123 | "id": "#multiply.cwl/y" 124 | } 125 | ], 126 | "stdout": "cwl.output.json", 127 | "outputs": [ 128 | { 129 | "type": "int", 130 | "id": "#multiply.cwl/answer" 131 | } 132 | ], 133 | "id": "#multiply.cwl" 134 | }, 135 | { 136 | "class": "Workflow", 137 | "inputs": [ 138 | { 139 | "type": "int", 140 | "id": "#main/num1" 141 | }, 142 | { 143 | "type": "int", 144 | "id": "#main/num2" 145 | } 146 | ], 147 | "outputs": [ 148 | { 149 | "type": "int", 150 | "outputSource": "#main/multiply-1/answer", 151 | "id": "#main/final_answer" 152 | } 153 | ], 154 | "steps": [ 155 | { 156 | "run": "#add.cwl", 157 | "in": [ 158 | { 159 | "source": "#main/num1", 160 | "id": "#main/add-1/x" 161 | }, 162 | { 163 | "source": "#main/num2", 164 | "id": "#main/add-1/y" 165 | } 166 | ], 167 | "out": [ 168 | "#main/add-1/answer" 169 | ], 170 | "id": "#main/add-1" 171 | }, 172 | { 173 | "run": "#multiply.cwl", 174 | "in": [ 175 | { 176 | "source": "#main/add-1/answer", 177 | "id": "#main/multiply-1/x" 178 | }, 179 | { 180 | "source": "#main/num2", 181 | "id": "#main/multiply-1/y" 182 | } 183 | ], 184 | "out": [ 185 | "#main/multiply-1/answer" 186 | ], 187 | "id": "#main/multiply-1" 188 | } 189 | ], 190 | "id": "#main" 191 | } 192 | ] 193 | } 194 | 195 | Workflow validation 196 | ################### 197 | 198 | Before the workflow is saved, it is validated using ``cwltool``. Validation can also be 199 | triggered manually: 200 | :: 201 | 202 | wf.validate() 203 | 204 | It is also possible to disable workflow validation on save: 205 | :: 206 | 207 | wf.save('workflow.cwl', validate=False) 208 | 209 | File encoding 210 | ############# 211 | 212 | By default, the encoding used to save workflows is ``utf-8``. If necessary, 213 | a different encoding can be specified: 214 | :: 215 | 216 | wf.save('workflow.cwl', encoding='utf-16') 217 | -------------------------------------------------------------------------------- /docs/setting_documentation.rst: -------------------------------------------------------------------------------- 1 | Adding workflow documentation 2 | ============================== 3 | 4 | To add documentation to your workflow, use the ``set_documentation()`` method: 5 | :: 6 | 7 | doc = """Workflow that performs a special calculation with two numbers 8 | 9 | The two numbers are added and the answer is multiplied by the second number. 10 | 11 | Input: 12 | num1: int 13 | num2: int 14 | 15 | Output: 16 | answer: int 17 | """ 18 | wf.set_documentation(doc) 19 | 20 | Setting labels 21 | ############## 22 | 23 | Instead of or in addition to documentation, it is also possible to set a label 24 | for a workflow: 25 | :: 26 | 27 | wf.set_label('Calculate special number') 28 | -------------------------------------------------------------------------------- /docs/useful_tools.rst: -------------------------------------------------------------------------------- 1 | Useful tools 2 | ============ 3 | 4 | To use scriptcwl for creating CWL workflows, you need CWL ``CommandLineTools``. 5 | There are some software packages that help with generating those. 6 | 7 | * `argparse2tool `_: Generate CWL ``CommandLineTool`` wrappers (and/or Galaxy tool descriptions) from Python programs that use argparse. Also supports the `click `_ argument parser 8 | * `pypi2cwl `_: Automatically run argparse2cwl on any package in PyPi 9 | * `python-cwlgen `_: Generate CommandLineTool and DockerRequirement programmatically 10 | -------------------------------------------------------------------------------- /docs/user_manual.rst: -------------------------------------------------------------------------------- 1 | User Manual 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | loading_steps 8 | listing_steps 9 | workflow_inputs 10 | setting_documentation 11 | adding_workflow_steps 12 | adding_outputs 13 | printing_workflows 14 | saving_workflows 15 | enable_logging 16 | -------------------------------------------------------------------------------- /docs/workflow_inputs.rst: -------------------------------------------------------------------------------- 1 | Workflow inputs 2 | =============== 3 | 4 | Wokflow inputs can be added by calling ``add_input()``: 5 | :: 6 | 7 | num1 = wf.add_input(num1='int') 8 | num2 = wf.add_input(num2='int') 9 | 10 | The ``add_input()`` method expects a ``name=type`` pair as input parameter. 11 | The pair connects an input name (``num1`` in the example) to a CWL type 12 | (``'int'``). An overview of CWL types can be found in the 13 | `specification `_. 14 | 15 | Optional inputs 16 | ############### 17 | 18 | Workflow inputs can be made optional by adding a questionmark to the type: 19 | :: 20 | 21 | num1 = wf.add_input(num1='int?') 22 | 23 | Default values 24 | ############## 25 | 26 | When adding an input parameter to a workflow, you can set a default value: 27 | :: 28 | 29 | num1 = wf.add_input(num1='int', default=5) 30 | 31 | As a consequence, ``default`` cannot be used as a name for a workflow input parameter. 32 | 33 | Labels 34 | ###### 35 | 36 | You can also add a label to a workflow input: 37 | :: 38 | 39 | num1 = wf.add_input(num1='int', label='The first number that is processed.') 40 | 41 | Again, this means ``label`` cannot be used as a name for a workflow input parameter. 42 | 43 | Arrays and other complex input types 44 | #################################### 45 | 46 | Arrays of workflow inputs can be specified with ``[]``: 47 | :: 48 | 49 | numbers = wf.add_input(numbers='int[]') 50 | 51 | You can also specify the input using a dictionary with two keys: ``{'type': 52 | 'array', 'items': 'int'}``. 53 | :: 54 | 55 | numbers = wf.add_input(numbers=dict(type='array', items='int')) 56 | 57 | This way you also can specify more complex inputs. For example, to create an 58 | array of arrays of strings, do: 59 | :: 60 | 61 | inp = dict(type='array', items=dict(type='array', items='string')) 62 | strings = wf.add_input(my_array_of_array_of_strings=inp) 63 | 64 | Use ``print(wf)`` and ``wf.validate()`` to make sure your inputs are correct. 65 | 66 | Enums 67 | ##### 68 | 69 | To use an enum as a workflow input, do: 70 | :: 71 | 72 | mode = wf.add_input(mode='enum', symbols=['one', 'two', 'three']) 73 | 74 | The ``symbols`` should be a list of strings (lists containing other types are 75 | converted lists of to strings). 76 | Again, ``symbols`` cannot be used as a name for a workflow input parameter. 77 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLeSC/scriptcwl/d2dcfa0686eaa893dd4b3efbd232e9bd1e15194d/requirements.txt -------------------------------------------------------------------------------- /scriptcwl/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .workflow import WorkflowGenerator 4 | 5 | __version__ = '0.8.1' 6 | 7 | logging.getLogger(__name__).addHandler(logging.NullHandler()) 8 | -------------------------------------------------------------------------------- /scriptcwl/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLeSC/scriptcwl/d2dcfa0686eaa893dd4b3efbd232e9bd1e15194d/scriptcwl/examples/__init__.py -------------------------------------------------------------------------------- /scriptcwl/examples/add.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: CommandLineTool 4 | baseCommand: ["python", "-m", "scriptcwl.examples.add"] 5 | 6 | inputs: 7 | x: 8 | type: int 9 | inputBinding: 10 | position: 1 11 | y: 12 | type: int 13 | inputBinding: 14 | position: 2 15 | 16 | stdout: cwl.output.json 17 | 18 | outputs: 19 | answer: 20 | type: int 21 | -------------------------------------------------------------------------------- /scriptcwl/examples/add.py: -------------------------------------------------------------------------------- 1 | import click 2 | import json 3 | 4 | 5 | @click.command() 6 | @click.argument('x', type=int) 7 | @click.argument('y', type=int) 8 | def add(x, y): 9 | click.echo(json.dumps({'answer': x+y})) 10 | 11 | 12 | if __name__ == '__main__': 13 | add() 14 | -------------------------------------------------------------------------------- /scriptcwl/examples/add_multiply_example.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: Workflow 4 | inputs: 5 | num1: int 6 | num2: int 7 | outputs: 8 | final_answer: 9 | type: int 10 | outputSource: multiply/answer 11 | steps: 12 | add: 13 | run: add.cwl 14 | in: 15 | y: num2 16 | x: num1 17 | out: 18 | - answer 19 | multiply: 20 | run: multiply.cwl 21 | in: 22 | y: num2 23 | x: add/answer 24 | out: 25 | - answer 26 | -------------------------------------------------------------------------------- /scriptcwl/examples/multiply.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: CommandLineTool 4 | baseCommand: ["python", "-m", "scriptcwl.examples.multiply"] 5 | 6 | inputs: 7 | x: 8 | type: int 9 | inputBinding: 10 | position: 1 11 | y: 12 | type: int 13 | inputBinding: 14 | position: 2 15 | 16 | stdout: cwl.output.json 17 | 18 | outputs: 19 | answer: 20 | type: int 21 | -------------------------------------------------------------------------------- /scriptcwl/examples/multiply.py: -------------------------------------------------------------------------------- 1 | import click 2 | import json 3 | 4 | 5 | @click.command() 6 | @click.argument('x', type=int) 7 | @click.argument('y', type=int) 8 | def multiply(x, y): 9 | click.echo(json.dumps({'answer': x*y})) 10 | 11 | 12 | if __name__ == '__main__': 13 | multiply() 14 | -------------------------------------------------------------------------------- /scriptcwl/library.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | import logging 5 | import sys 6 | import warnings 7 | 8 | from six.moves.urllib.parse import urlparse 9 | 10 | from schema_salad.validate import ValidationException 11 | 12 | from ruamel import yaml 13 | 14 | from .scriptcwl import is_url 15 | from .step import Step, PackedWorkflowException 16 | 17 | logger = logging.getLogger(__name__) 18 | logger.setLevel(logging.WARNING) 19 | fh = logging.StreamHandler() 20 | fh_formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s') 21 | fh.setFormatter(fh_formatter) 22 | logger.addHandler(fh) 23 | 24 | 25 | class StepsLibrary(object): 26 | """Oject to store steps that can be used to build workflows 27 | """ 28 | def __init__(self, working_dir=None): 29 | self.steps = {} 30 | self.step_ids = [] 31 | self.working_dir = working_dir 32 | self.python_names2step_names = {} 33 | 34 | def load(self, steps_dir=None, step_file=None, step_list=None): 35 | steps_to_load = load_steps(working_dir=self.working_dir, 36 | steps_dir=steps_dir, 37 | step_file=step_file, 38 | step_list=step_list) 39 | 40 | for n, step in steps_to_load.items(): 41 | if n in self.steps.keys(): 42 | msg = 'Step "{}" already in steps library.'.format(n) 43 | warnings.warn(UserWarning(msg)) 44 | elif step.python_name in self.python_names2step_names.keys(): 45 | pn = self.python_names2step_names.get(step.python_name) 46 | msg = 'step "{}.cwl" has the same python name as "{}.cwl". ' \ 47 | 'Please rename file "{}.cwl", so it can be ' \ 48 | 'loaded.'.format(n, pn, n) 49 | warnings.warn(UserWarning(msg)) 50 | else: 51 | self.steps[n] = step 52 | self.python_names2step_names[step.python_name] = n 53 | 54 | def get_step(self, name): 55 | return self.steps.get(name) 56 | 57 | def list_steps(self): 58 | steps = [] 59 | workflows = [] 60 | template = u' {:.<25} {}' 61 | for name, step in self.steps.items(): 62 | if step.is_workflow: 63 | workflows.append(template.format(name, step)) 64 | else: 65 | steps.append(template.format(name, step)) 66 | 67 | steps.sort() 68 | workflows.sort() 69 | result = [u'Steps\n', u'\n'.join(steps), u'\n\nWorkflows\n', 70 | u'\n'.join(workflows)] 71 | return u''.join(result) 72 | 73 | 74 | def name_in_workflow(iri): 75 | """Extract the name of a step in a subworkflow. 76 | """ 77 | parsed_iri = urlparse(iri) 78 | if parsed_iri.fragment: 79 | return parsed_iri.fragment 80 | return None 81 | 82 | 83 | def load_steps(working_dir=None, steps_dir=None, step_file=None, 84 | step_list=None): 85 | """Return a dictionary containing Steps read from file. 86 | 87 | Args: 88 | steps_dir (str, optional): path to directory containing CWL files. 89 | step_file (str, optional): path or http(s) url to a single CWL file. 90 | step_list (list, optional): a list of directories, urls or local file 91 | paths to CWL files or directories containing CWL files. 92 | 93 | Return: 94 | dict containing (name, Step) entries. 95 | 96 | """ 97 | if steps_dir is not None: 98 | step_files = glob.glob(os.path.join(steps_dir, '*.cwl')) 99 | elif step_file is not None: 100 | step_files = [step_file] 101 | elif step_list is not None: 102 | step_files = [] 103 | for path in step_list: 104 | if os.path.isdir(path): 105 | step_files += glob.glob(os.path.join(path, '*.cwl')) 106 | else: 107 | step_files.append(path) 108 | else: 109 | step_files = [] 110 | 111 | if working_dir is not None: 112 | step_files = sort_loading_order(step_files) 113 | 114 | steps = {} 115 | for f in step_files: 116 | if working_dir is not None: 117 | # Copy file to working_dir 118 | if not working_dir == os.path.dirname(f) and not is_url(f): 119 | copied_file = os.path.join(working_dir, os.path.basename(f)) 120 | shutil.copy2(f, copied_file) 121 | f = copied_file 122 | 123 | # Create steps 124 | try: 125 | s = Step(f) 126 | steps[s.name] = s 127 | except (NotImplementedError, ValidationException, 128 | PackedWorkflowException) as e: 129 | logger.warning(e) 130 | 131 | return steps 132 | 133 | 134 | def load_yaml(filename): 135 | """Return object in yaml file.""" 136 | with open(filename) as myfile: 137 | content = myfile.read() 138 | if "win" in sys.platform: 139 | content = content.replace("\\", "/") 140 | 141 | try: 142 | obj = yaml.safe_load(content) 143 | 144 | # packed workflow, will be ignored later 145 | if obj.get('$graph'): 146 | obj = {} 147 | # packed workflow, will be ignored later 148 | # (it seems in some cases a packed workflow gives an ParserError, while 149 | # in other cases it is loaded correctly) 150 | except yaml.parser.ParserError: 151 | obj = {} 152 | return obj 153 | 154 | 155 | def sort_loading_order(step_files): 156 | """Sort step files into correct loading order. 157 | 158 | The correct loading order is first tools, then workflows without 159 | subworkflows, and then workflows with subworkflows. This order is 160 | required to avoid error messages when a working directory is used. 161 | """ 162 | tools = [] 163 | workflows = [] 164 | workflows_with_subworkflows = [] 165 | 166 | for f in step_files: 167 | # assume that urls are tools 168 | if f.startswith('http://') or f.startswith('https://'): 169 | tools.append(f) 170 | else: 171 | obj = load_yaml(f) 172 | if obj.get('class', '') == 'Workflow': 173 | if 'requirements' in obj.keys(): 174 | subw = {'class': 'SubworkflowFeatureRequirement'} 175 | if subw in obj['requirements']: 176 | workflows_with_subworkflows.append(f) 177 | else: 178 | workflows.append(f) 179 | else: 180 | workflows.append(f) 181 | else: 182 | tools.append(f) 183 | return tools + workflows + workflows_with_subworkflows 184 | -------------------------------------------------------------------------------- /scriptcwl/reference.py: -------------------------------------------------------------------------------- 1 | from six import text_type 2 | 3 | 4 | class Reference: 5 | """Represents a reference to a source of data. 6 | 7 | A Reference can refer to an input, or to the output of a step. 8 | 9 | Either `input_name` must be given, or both `step_name` and 10 | `output_name` must be given. 11 | 12 | Args: 13 | input_name (str): The name of a workflow input. 14 | step_name (str): The name of a step whose output to refer to. 15 | output_name (str): The name of the output to refer to. 16 | """ 17 | def __init__(self, input_name=None, step_name=None, output_name=None): 18 | self.input_name = input_name 19 | self.step_name = step_name 20 | self.output_name = output_name 21 | if input_name: 22 | self.target_str = input_name 23 | elif step_name and output_name: 24 | self.target_str = ''.join([step_name, '/', output_name]) 25 | else: 26 | raise RuntimeError('Invalid input when constructing Reference') 27 | 28 | def __repr__(self): 29 | return self.target_str 30 | 31 | def refers_to_wf_input(self): 32 | return self.input_name is not None 33 | 34 | def refers_to_step_output(self): 35 | return self.step_name is not None 36 | 37 | 38 | def reference_presenter(dmpr, data): 39 | return dmpr.represent_scalar('tag:yaml.org,2002:str', text_type(data)) 40 | -------------------------------------------------------------------------------- /scriptcwl/scriptcwl.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import logging 4 | 5 | from contextlib import contextmanager 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | legacy_cwltool = False 10 | 11 | 12 | # Helper function to make the import of cwltool.load_tool quiet 13 | @contextmanager 14 | def quiet(): 15 | # save stdout/stderr 16 | # Jupyter doesn't support setting it back to 17 | # sys.__stdout__ and sys.__stderr__ 18 | _sys_stdout = sys.stdout 19 | _sys_stderr = sys.stderr 20 | # Divert stdout and stderr to devnull 21 | sys.stdout = sys.stderr = open(os.devnull, "w") 22 | try: 23 | yield 24 | finally: 25 | # Revert back to standard stdout/stderr 26 | sys.stdout = _sys_stdout 27 | sys.stderr = _sys_stderr 28 | 29 | 30 | with quiet(): 31 | # all is quiet in this scope 32 | from cwltool.load_tool import fetch_document 33 | 34 | try: 35 | from cwltool.load_tool import resolve_and_validate_document 36 | except ImportError: 37 | from cwltool.load_tool import validate_document 38 | 39 | legacy_cwltool = True 40 | 41 | 42 | def load_cwl(fname): 43 | """Load and validate CWL file using cwltool 44 | """ 45 | logger.debug('Loading CWL file "{}"'.format(fname)) 46 | # Fetching, preprocessing and validating cwl 47 | 48 | # Older versions of cwltool 49 | if legacy_cwltool: 50 | try: 51 | (document_loader, workflowobj, uri) = fetch_document(fname) 52 | (document_loader, _, processobj, metadata, uri) = \ 53 | validate_document(document_loader, workflowobj, uri) 54 | except TypeError: 55 | from cwltool.context import LoadingContext, getdefault 56 | from cwltool import workflow 57 | from cwltool.resolver import tool_resolver 58 | from cwltool.load_tool import resolve_tool_uri 59 | 60 | loadingContext = LoadingContext() 61 | loadingContext.construct_tool_object = getdefault( 62 | loadingContext.construct_tool_object, 63 | workflow.default_make_tool) 64 | loadingContext.resolver = getdefault(loadingContext.resolver, 65 | tool_resolver) 66 | 67 | uri, tool_file_uri = resolve_tool_uri( 68 | fname, resolver=loadingContext.resolver, 69 | fetcher_constructor=loadingContext.fetcher_constructor) 70 | 71 | document_loader, workflowobj, uri = fetch_document( 72 | uri, resolver=loadingContext.resolver, 73 | fetcher_constructor=loadingContext.fetcher_constructor) 74 | document_loader, avsc_names, processobj, metadata, uri = \ 75 | validate_document( 76 | document_loader, workflowobj, uri, 77 | loadingContext.overrides_list, {}, 78 | enable_dev=loadingContext.enable_dev, 79 | strict=loadingContext.strict, 80 | preprocess_only=False, 81 | fetcher_constructor=loadingContext.fetcher_constructor, 82 | skip_schemas=False, 83 | do_validate=loadingContext.do_validate) 84 | # Recent versions of cwltool 85 | else: 86 | (loading_context, workflowobj, uri) = fetch_document(fname) 87 | loading_context, uri = resolve_and_validate_document(loading_context, 88 | workflowobj, uri) 89 | document_loader = loading_context.loader 90 | processobj = workflowobj 91 | metadata = loading_context.metadata 92 | 93 | return document_loader, processobj, metadata, uri 94 | 95 | 96 | def is_url(path): 97 | return path.startswith('http://') or path.startswith('https://') 98 | -------------------------------------------------------------------------------- /scriptcwl/step.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | 4 | import six 5 | from six.moves.urllib.parse import urlparse 6 | 7 | from ruamel.yaml.comments import CommentedMap, CommentedSeq 8 | 9 | from .scriptcwl import load_cwl 10 | from .reference import Reference 11 | 12 | 13 | class PackedWorkflowException(Exception): 14 | """Error raised when trying to load a packed workflow.""" 15 | pass 16 | 17 | 18 | class Step(object): 19 | """Representation of a CWL step. 20 | 21 | The Step can be a CommandLineTool or a Workflow. Steps are read from file 22 | and validated using ``cwltool``. 23 | """ 24 | 25 | def __init__(self, fname): 26 | fname = str(fname) 27 | if fname.startswith('http://') or fname.startswith('https://'): 28 | self.run = fname 29 | self.from_url = True 30 | else: 31 | self.run = os.path.abspath(fname) 32 | self.from_url = False 33 | 34 | bn = os.path.basename(fname) 35 | self.name = os.path.splitext(bn)[0] 36 | self.python_name = python_name(self.name) 37 | 38 | self.step_inputs = {} 39 | self.input_names = [] 40 | self.input_types = {} 41 | self.optional_input_names = [] 42 | self.optional_input_types = {} 43 | self.output_names = [] 44 | self.output_types = {} 45 | self.is_workflow = False 46 | self.is_scattered = False 47 | self.scattered_inputs = [] 48 | self.python_names = {} 49 | 50 | document_loader, processobj, metadata, uri = load_cwl(fname) 51 | s = processobj 52 | 53 | self.command_line_tool = s 54 | valid_classes = ('CommandLineTool', 'Workflow', 'ExpressionTool') 55 | if 'class' in s and s['class'] in valid_classes: 56 | self.is_workflow = s['class'] == 'Workflow' 57 | for inp in s['inputs']: 58 | # Due to preprocessing of cwltool the id has become an 59 | # absolute iri, for ease of use we keep only the fragment 60 | short_id = iri2fragment(inp['id']) 61 | if self._input_optional(inp): 62 | self.optional_input_names.append(short_id) 63 | self.optional_input_types[short_id] = inp['type'] 64 | self.python_names[python_name(short_id)] = short_id 65 | else: 66 | self.input_names.append(short_id) 67 | self.input_types[short_id] = inp['type'] 68 | self.python_names[python_name(short_id)] = short_id 69 | 70 | for o in s['outputs']: 71 | short_id = iri2fragment(o['id']) 72 | self.output_names.append(short_id) 73 | self.output_types[short_id] = o['type'] 74 | self.python_names[python_name(short_id)] = short_id 75 | else: 76 | if isinstance(s, CommentedSeq): 77 | msg = 'Not loading "{}", because it is a packed workflow.' 78 | raise PackedWorkflowException(msg.format(self.run)) 79 | else: 80 | msg = '"{}" is a unsupported' 81 | raise NotImplementedError(msg.format(self.name)) 82 | 83 | def get_input_names(self): 84 | """Return the Step's input names (including optional input names). 85 | 86 | Returns: 87 | list of strings. 88 | """ 89 | return self.input_names + self.optional_input_names 90 | 91 | def set_input(self, p_name, value): 92 | """Set a Step's input variable to a certain value. 93 | 94 | The value comes either from a workflow input or output of a previous 95 | step. 96 | 97 | Args: 98 | name (str): the name of the Step input 99 | value (str): the name of the output variable that provides the 100 | value for this input. 101 | 102 | Raises: 103 | ValueError: The name provided is not a valid input name for this 104 | Step. 105 | """ 106 | name = self.python_names.get(p_name) 107 | if p_name is None or name not in self.get_input_names(): 108 | raise ValueError('Invalid input "{}"'.format(p_name)) 109 | self.step_inputs[name] = value 110 | 111 | def _set_name_in_workflow(self, name): 112 | self.name_in_workflow = name 113 | 114 | def output_reference(self, name): 115 | """Return a reference to the given output for use in an input 116 | of a next Step. 117 | 118 | For a Step named `echo` that has an output called `echoed`, the 119 | reference `echo/echoed` is returned. 120 | 121 | Args: 122 | name (str): the name of the Step output 123 | Raises: 124 | ValueError: The name provided is not a valid output name for this 125 | Step. 126 | """ 127 | if name not in self.output_names: 128 | raise ValueError('Invalid output "{}"'.format(name)) 129 | return Reference(step_name=self.name_in_workflow, output_name=name) 130 | 131 | @staticmethod 132 | def _input_optional(inp): 133 | """Returns True if a step input parameter is optional. 134 | 135 | Args: 136 | inp (dict): a dictionary representation of an input. 137 | 138 | Raises: 139 | ValueError: The inp provided is not valid. 140 | """ 141 | if 'default' in inp.keys(): 142 | return True 143 | 144 | typ = inp.get('type') 145 | if isinstance(typ, six.string_types): 146 | return typ.endswith('?') 147 | elif isinstance(typ, dict): 148 | # TODO: handle case where iput type is dict 149 | return False 150 | elif isinstance(typ, list): 151 | # The cwltool validation expands optional arguments to 152 | # [u'null', ] 153 | return bool(u'null' in typ) 154 | else: 155 | raise ValueError('Invalid input "{}"'.format(inp.get['id'])) 156 | 157 | def _to_embedded_obj(self): 158 | embedded_clt = copy.deepcopy(self.command_line_tool) 159 | 160 | try: 161 | name_in_workflow = self.name_in_workflow 162 | except AttributeError: 163 | # Step has not yet been added to a workflow, so we use the step 164 | # name for the id fields of the embedded object. 165 | name_in_workflow = self.name 166 | 167 | # Remove shebang line 168 | # This is a bit magical, digging into ruamel.yaml, but there 169 | # does not seem to be a better way. 170 | try: 171 | global_comments = embedded_clt.ca.comment[1] 172 | except TypeError: 173 | global_comments = None 174 | if global_comments: 175 | if global_comments[0].value.startswith('#!'): 176 | del(global_comments[0]) 177 | 178 | # Give inputs and outputs a JSON-LD local identifier, instead of 179 | # the default absolute path that doesn't exist on other machines. 180 | def to_local_id(iri, name_in_workflow): 181 | parsed_iri = urlparse(iri) 182 | input_id = name_in_workflow 183 | if parsed_iri.fragment: 184 | input_id += '#' + parsed_iri.fragment 185 | if not input_id.startswith('_:'): 186 | input_id = '_:' + input_id 187 | return input_id 188 | 189 | for inp in embedded_clt['inputs']: 190 | inp['id'] = to_local_id(inp['id'], name_in_workflow) 191 | 192 | for outp in embedded_clt['outputs']: 193 | outp['id'] = to_local_id(outp['id'], name_in_workflow) 194 | 195 | embedded_clt['id'] = to_local_id(embedded_clt['id'], name_in_workflow) 196 | 197 | # If the step is a (sub)workflow, the source fields of the steps in the 198 | # workflow must be removed. 199 | if embedded_clt['class'] == 'Workflow': 200 | for step in embedded_clt['steps']: 201 | for inp in step['in']: 202 | del inp['source'] 203 | 204 | return embedded_clt 205 | 206 | def to_obj(self, wd=False, pack=False, relpath=None): 207 | """Return the step as an dict that can be written to a yaml file. 208 | 209 | Returns: 210 | dict: yaml representation of the step. 211 | """ 212 | obj = CommentedMap() 213 | if pack: 214 | obj['run'] = self.orig 215 | elif relpath is not None: 216 | if self.from_url: 217 | obj['run'] = self.run 218 | else: 219 | obj['run'] = os.path.relpath(self.run, relpath) 220 | elif wd: 221 | if self.from_url: 222 | obj['run'] = self.run 223 | else: 224 | obj['run'] = os.path.basename(self.run) 225 | else: 226 | obj['run'] = self.run 227 | obj['in'] = self.step_inputs 228 | obj['out'] = self.output_names 229 | if self.is_scattered: 230 | obj['scatter'] = self.scattered_inputs 231 | # scatter_method is optional when scattering over a single variable 232 | if self.scatter_method is not None: 233 | obj['scatterMethod'] = self.scatter_method 234 | 235 | return obj 236 | 237 | def __str__(self): 238 | if self.optional_input_names: 239 | template = u'{} = wf.{}({}[, {}])' 240 | else: 241 | template = u'{} = wf.{}({})' 242 | out_names = [python_name(n) for n in self.output_names] 243 | in_names = [python_name(n) for n in self.input_names] 244 | opt_in_names = [python_name(n) for n in self.optional_input_names] 245 | return template.format(u', '.join(out_names), self.python_name, 246 | u', '.join(in_names), u', '.join( 247 | opt_in_names)) 248 | 249 | def __repr__(self): 250 | return str(self) 251 | 252 | def list_inputs(self): 253 | """Return a string listing all the Step's input names and their types. 254 | 255 | The types are returned in a copy/pastable format, so if the type is 256 | `string`, `'string'` (with single quotes) is returned. 257 | 258 | Returns: 259 | str containing all input names and types. 260 | """ 261 | doc = [] 262 | for inp, typ in self.input_types.items(): 263 | if isinstance(typ, six.string_types): 264 | typ = "'{}'".format(typ) 265 | doc.append('{}: {}'.format(inp, typ)) 266 | return '\n'.join(doc) 267 | 268 | 269 | def iri2fragment(iri): 270 | """Return the fragment of an IRI. 271 | 272 | Args: 273 | iri (str): the iri. 274 | 275 | Returns: 276 | str: the fragment of the iri. 277 | """ 278 | o = urlparse(iri) 279 | return o.fragment 280 | 281 | 282 | def python_name(name): 283 | """Transform cwl step name into a python method name. 284 | 285 | Args: 286 | name (str): CWL step name to convert. 287 | 288 | Returns: 289 | str: converted name. 290 | """ 291 | name = name.replace('-', '_') 292 | 293 | return name 294 | -------------------------------------------------------------------------------- /scriptcwl/workflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import codecs 4 | import copy 5 | import os 6 | import shutil 7 | from functools import partial 8 | 9 | import tempfile 10 | import six 11 | from ruamel.yaml.comments import CommentedMap 12 | 13 | from .scriptcwl import load_cwl, quiet 14 | from .step import python_name 15 | from .yamlutils import save_yaml, yaml2string 16 | from .library import StepsLibrary 17 | from .reference import Reference 18 | 19 | import warnings 20 | 21 | # import cwltool.load_tool functions 22 | with quiet(): 23 | # all is quiet in this scope 24 | from cwltool.main import print_pack 25 | 26 | warnings.simplefilter('always', DeprecationWarning) 27 | 28 | 29 | class WorkflowGenerator(object): 30 | """Class for creating a CWL workflow. 31 | 32 | The WorkflowGenerator class allows users to tie together inputs and outputs 33 | of the steps that need to be executed to perform a data processing task. 34 | The steps (i.e., command line tools and subworkflows) must be added to the 35 | steps library of the WorkflowGenerator object before they can be added to 36 | the workflow. To add steps to the steps library, the `load` method can be 37 | called with either a path to a directory containing CWL files: 38 | :: 39 | 40 | from scriptcwl import WorkflowGenerator 41 | 42 | with WorkflowGenerator() as wf: 43 | wf.load(steps_dir='/path/to/dir/with/cwl/steps/') 44 | 45 | Or a single CWL file: 46 | :: 47 | 48 | with WorkflowGenerator() as wf: 49 | wf.load(step_file='/path/to/cwl/step/file') 50 | 51 | ``wf.load()`` can be called multiple times. Step files are added to the 52 | steps library one after the other. For every step that is added to the 53 | steps library, a method with the same name is added to the 54 | WorkflowGenerator object. To add a step to the workflow, this method must 55 | be called (examples below). 56 | 57 | Next, the user should add one or more workflow inputs: 58 | :: 59 | txt_dir = wf.add_input(txt_dir='Directory') 60 | 61 | The ``add_input()`` method expects a ``name=type`` pair as input parameter. 62 | The pair connects an input name (``txt_dir`` in the example) to a CWL type 63 | (``'Directory'``). Optionally, a default value can be specified using 64 | ``default=value``. 65 | 66 | The ``add_input()`` method returns a string containing the name 67 | that can be used to connect this input parameter to step input parameter 68 | names. 69 | 70 | Next, workflow steps can be added. To add a workflow step, its method must 71 | be called on the WorkflowGenerator object. This method expects a list of 72 | (key, value) pairs as input parameters. (To find out what inputs a step 73 | needs call ``wf.inputs()``. This method prints all the inputs 74 | and their types.) The method returns a list of strings containing output 75 | names that can be used as input for later steps, or that can be connected 76 | to workflow outputs. 77 | 78 | For example, to add a step called ``frog-dir`` to the workflow, the 79 | following method must be called: 80 | :: 81 | 82 | frogout = wf.frog_dir(dir_in=txt_dir) 83 | 84 | In a next step, ``frogout`` can be used as input: 85 | :: 86 | saf = wf.frog_to_saf(in_files=frogout) 87 | txt = wf.saf_to_txt(in_files=saf) 88 | 89 | Etcetera. 90 | 91 | When all steps of the workflow have been added, the user can specify 92 | workflow outputs: 93 | :: 94 | 95 | wf.add_outputs(txt=txt) 96 | 97 | Finally, the workflow can be saved to file: 98 | :: 99 | 100 | wf.save('workflow.cwl') 101 | 102 | To list steps and signatures available in the steps library, call: 103 | :: 104 | 105 | wf.list_steps() 106 | """ 107 | 108 | def __init__(self, steps_dir=None, working_dir=None): 109 | self.working_dir = working_dir 110 | if self.working_dir: 111 | self.working_dir = os.path.abspath(self.working_dir) 112 | if not os.path.exists(self.working_dir): 113 | os.makedirs(self.working_dir) 114 | self.wf_steps = CommentedMap() 115 | self.wf_inputs = CommentedMap() 116 | self.wf_outputs = CommentedMap() 117 | self.step_output_types = {} 118 | self.steps_library = StepsLibrary(working_dir=working_dir) 119 | self.has_workflow_step = False 120 | self.has_scatter_requirement = False 121 | self.has_multiple_inputs = False 122 | 123 | self._wf_closed = False 124 | 125 | self.load(steps_dir) 126 | 127 | def __enter__(self): 128 | self._wf_closed = False 129 | 130 | return self 131 | 132 | def __exit__(self, *args): 133 | self.wf_steps = None 134 | self.wf_inputs = None 135 | self.wf_outputs = None 136 | self.step_output_types = None 137 | self.steps_library = None 138 | self.has_workflow_step = None 139 | self.has_scatter_requirement = None 140 | self.working_dir = None 141 | 142 | self._wf_closed = True 143 | 144 | def __getattr__(self, name, **kwargs): 145 | name = self.steps_library.python_names2step_names.get(name, None) 146 | step = self._get_step(name) 147 | return partial(self._make_step, step, **kwargs) 148 | 149 | def __str__(self): 150 | # use absolute paths for printing 151 | return yaml2string(self, 152 | pack=False, 153 | relpath=None, 154 | wd=False) 155 | 156 | def _closed(self): 157 | if self._wf_closed: 158 | raise ValueError('Operation on closed WorkflowGenerator.') 159 | 160 | def load(self, steps_dir=None, step_file=None, step_list=None): 161 | """Load CWL steps into the WorkflowGenerator's steps library. 162 | 163 | Adds steps (command line tools and workflows) to the 164 | ``WorkflowGenerator``'s steps library. These steps can be used to 165 | create workflows. 166 | 167 | Args: 168 | steps_dir (str): path to directory containing CWL files. All CWL in 169 | the directory are loaded. 170 | step_file (str): path to a file containing a CWL step that will be 171 | added to the steps library. 172 | """ 173 | self._closed() 174 | 175 | self.steps_library.load(steps_dir=steps_dir, step_file=step_file, 176 | step_list=step_list) 177 | 178 | def list_steps(self): 179 | """Return string with the signature of all steps in the steps library. 180 | """ 181 | self._closed() 182 | 183 | return self.steps_library.list_steps() 184 | 185 | def _has_requirements(self): 186 | """Returns True if the workflow needs a requirements section. 187 | 188 | Returns: 189 | bool: True if the workflow needs a requirements section, False 190 | otherwise. 191 | """ 192 | self._closed() 193 | 194 | return any([self.has_workflow_step, self.has_scatter_requirement, 195 | self.has_multiple_inputs]) 196 | 197 | def inputs(self, name): 198 | """List input names and types of a step in the steps library. 199 | 200 | Args: 201 | name (str): name of a step in the steps library. 202 | """ 203 | self._closed() 204 | 205 | step = self._get_step(name, make_copy=False) 206 | return step.list_inputs() 207 | 208 | def _add_step(self, step): 209 | """Add a step to the workflow. 210 | 211 | Args: 212 | step (Step): a step from the steps library. 213 | """ 214 | self._closed() 215 | 216 | self.has_workflow_step = self.has_workflow_step or step.is_workflow 217 | self.wf_steps[step.name_in_workflow] = step 218 | 219 | def add_input(self, **kwargs): 220 | """Add workflow input. 221 | 222 | Args: 223 | kwargs (dict): A dict with a `name: type` item 224 | and optionally a `default: value` item, where name is the 225 | name (id) of the workflow input (e.g., `dir_in`) and type is 226 | the type of the input (e.g., `'Directory'`). 227 | The type of input parameter can be learned from 228 | `step.inputs(step_name=input_name)`. 229 | 230 | Returns: 231 | inputname 232 | 233 | Raises: 234 | ValueError: No or multiple parameter(s) have been specified. 235 | """ 236 | self._closed() 237 | 238 | def _get_item(args): 239 | """Get a single item from args.""" 240 | if not args: 241 | raise ValueError("No parameter specified.") 242 | item = args.popitem() 243 | if args: 244 | raise ValueError("Too many parameters, not clear what to do " 245 | "with {}".format(kwargs)) 246 | return item 247 | 248 | symbols = None 249 | input_dict = CommentedMap() 250 | 251 | if 'default' in kwargs: 252 | input_dict['default'] = kwargs.pop('default') 253 | if 'label' in kwargs: 254 | input_dict['label'] = kwargs.pop('label') 255 | if 'symbols' in kwargs: 256 | symbols = kwargs.pop('symbols') 257 | 258 | name, input_type = _get_item(kwargs) 259 | 260 | if input_type == 'enum': 261 | typ = CommentedMap() 262 | typ['type'] = 'enum' 263 | # make sure symbols is set 264 | if symbols is None: 265 | raise ValueError("Please specify the enum's symbols.") 266 | # make sure symbols is not empty 267 | if symbols == []: 268 | raise ValueError("The enum's symbols cannot be empty.") 269 | # make sure the symbols are a list 270 | if type(symbols) != list: 271 | raise ValueError('Symbols should be a list.') 272 | # make sure symbols is a list of strings 273 | symbols = [str(s) for s in symbols] 274 | 275 | typ['symbols'] = symbols 276 | input_dict['type'] = typ 277 | else: 278 | # Set the 'type' if we can't use simple notation (because there is 279 | # a default value or a label) 280 | if bool(input_dict): 281 | input_dict['type'] = input_type 282 | 283 | msg = '"{}" is already used as a workflow input. Please use a ' +\ 284 | 'different name.' 285 | if name in self.wf_inputs: 286 | raise ValueError(msg.format(name)) 287 | 288 | # Add 'type' for complex input types, so the user doesn't have to do it 289 | if isinstance(input_type, dict): 290 | input_dict['type'] = input_type 291 | 292 | # Make sure we can use the notation without 'type' if the input allows 293 | # it. 294 | if bool(input_dict): 295 | self.wf_inputs[name] = input_dict 296 | else: 297 | self.wf_inputs[name] = input_type 298 | 299 | return Reference(input_name=name) 300 | 301 | def add_outputs(self, **kwargs): 302 | """Add workflow outputs. 303 | 304 | The output type is added automatically, based on the steps in the steps 305 | library. 306 | 307 | Args: 308 | kwargs (dict): A dict containing ``name=source name`` pairs. 309 | ``name`` is the name of the workflow output (e.g., 310 | ``txt_files``) and source name is the name of the step that 311 | produced this output plus the output name (e.g., 312 | ``saf-to-txt/out_files``). 313 | """ 314 | self._closed() 315 | 316 | for name, source_name in kwargs.items(): 317 | obj = {} 318 | obj['outputSource'] = source_name 319 | obj['type'] = self.step_output_types[source_name] 320 | self.wf_outputs[name] = obj 321 | 322 | def set_documentation(self, doc): 323 | """Set workflow documentation. 324 | 325 | Args: 326 | doc (str): documentation string. 327 | """ 328 | self._closed() 329 | 330 | self.documentation = doc 331 | 332 | def set_label(self, label): 333 | """Set workflow label. 334 | 335 | Args: 336 | label (str): short description of workflow. 337 | """ 338 | self._closed() 339 | 340 | self.label = label 341 | 342 | def _get_step(self, name, make_copy=True): 343 | """Return step from steps library. 344 | 345 | Optionally, the step returned is a deep copy from the step in the steps 346 | library, so additional information (e.g., about whether the step was 347 | scattered) can be stored in the copy. 348 | 349 | Args: 350 | name (str): name of the step in the steps library. 351 | make_copy (bool): whether a deep copy of the step should be 352 | returned or not (default: True). 353 | 354 | Returns: 355 | Step from steps library. 356 | 357 | Raises: 358 | ValueError: The requested step cannot be found in the steps 359 | library. 360 | """ 361 | self._closed() 362 | 363 | s = self.steps_library.get_step(name) 364 | if s is None: 365 | msg = '"{}" not found in steps library. Please check your ' \ 366 | 'spelling or load additional steps' 367 | raise ValueError(msg.format(name)) 368 | if make_copy: 369 | s = copy.deepcopy(s) 370 | return s 371 | 372 | def _generate_step_name(self, step_name): 373 | name = step_name 374 | i = 1 375 | 376 | while name in self.steps_library.step_ids: 377 | name = '{}-{}'.format(step_name, i) 378 | i += 1 379 | 380 | return name 381 | 382 | def to_obj(self, wd=False, pack=False, relpath=None): 383 | """Return the created workflow as a dict. 384 | 385 | The dict can be written to a yaml file. 386 | 387 | Returns: 388 | A yaml-compatible dict representing the workflow. 389 | """ 390 | self._closed() 391 | 392 | obj = CommentedMap() 393 | obj['cwlVersion'] = 'v1.0' 394 | obj['class'] = 'Workflow' 395 | try: 396 | obj['doc'] = self.documentation 397 | except (AttributeError, ValueError): 398 | pass 399 | try: 400 | obj['label'] = self.label 401 | except (AttributeError, ValueError): 402 | pass 403 | if self._has_requirements(): 404 | obj['requirements'] = [] 405 | if self.has_workflow_step: 406 | obj['requirements'].append( 407 | {'class': 'SubworkflowFeatureRequirement'}) 408 | if self.has_scatter_requirement: 409 | obj['requirements'].append({'class': 'ScatterFeatureRequirement'}) 410 | if self.has_multiple_inputs: 411 | obj['requirements'].append( 412 | {'class': 'MultipleInputFeatureRequirement'}) 413 | obj['inputs'] = self.wf_inputs 414 | obj['outputs'] = self.wf_outputs 415 | 416 | steps_obj = CommentedMap() 417 | for key in self.wf_steps: 418 | steps_obj[key] = self.wf_steps[key].to_obj(relpath=relpath, 419 | pack=pack, 420 | wd=wd) 421 | obj['steps'] = steps_obj 422 | 423 | return obj 424 | 425 | def to_script(self, wf_name='wf'): 426 | """Generated and print the scriptcwl script for the currunt workflow. 427 | 428 | Args: 429 | wf_name (str): string used for the WorkflowGenerator object in the 430 | generated script (default: ``wf``). 431 | """ 432 | self._closed() 433 | 434 | script = [] 435 | 436 | # Workflow documentation 437 | # if self.documentation: 438 | # if is_multiline(self.documentation): 439 | # print('doc = """') 440 | # print(self.documentation) 441 | # print('"""') 442 | # print('{}.set_documentation(doc)'.format(wf_name)) 443 | # else: 444 | # print('{}.set_documentation(\'{}\')'.format(wf_name, 445 | # self.documentation)) 446 | 447 | # Workflow inputs 448 | params = [] 449 | returns = [] 450 | for name, typ in self.wf_inputs.items(): 451 | params.append('{}=\'{}\''.format(name, typ)) 452 | returns.append(name) 453 | script.append('{} = {}.add_inputs({})'.format( 454 | ', '.join(returns), wf_name, ', '.join(params))) 455 | 456 | # Workflow steps 457 | returns = [] 458 | for name, step in self.wf_steps.items(): 459 | pyname = step.python_name 460 | returns = ['{}_{}'.format(pyname, o) for o in step['out']] 461 | params = ['{}={}'.format(name, python_name(param)) 462 | for name, param in step['in'].items()] 463 | script.append('{} = {}.{}({})'.format( 464 | ', '.join(returns), wf_name, pyname, ', '.join(params))) 465 | 466 | # Workflow outputs 467 | params = [] 468 | for name, details in self.wf_outputs.items(): 469 | params.append('{}={}'.format( 470 | name, python_name(details['outputSource']))) 471 | script.append('{}.add_outputs({})'.format(wf_name, ', '.join(params))) 472 | 473 | return '\n'.join(script) 474 | 475 | @staticmethod 476 | def _get_input_type(step, input_name): 477 | input_type = step.input_types.get(input_name) 478 | if not input_type: 479 | input_type = step.optional_input_types[input_name] 480 | 481 | if step.is_scattered: 482 | for scattered_input in step.scattered_inputs: 483 | if scattered_input == input_name: 484 | input_type += '[]' 485 | 486 | return input_type 487 | 488 | def _get_source_type(self, ref): 489 | if isinstance(ref, list): 490 | self.has_multiple_inputs = True 491 | return [self._get_source_type_single(r) for r in ref] 492 | else: 493 | return self._get_source_type_single(ref) 494 | 495 | def _get_source_type_single(self, ref): 496 | if ref.refers_to_step_output(): 497 | step = self.wf_steps[ref.step_name] 498 | return step.output_types[ref.output_name] 499 | else: 500 | input_def = self.wf_inputs[ref.input_name] 501 | if isinstance(input_def, six.string_types): 502 | return input_def 503 | return input_def['type'] 504 | 505 | @staticmethod 506 | def _types_match(type1, type2): 507 | """Returns False only if it can show that no value of type1 508 | can possibly match type2. 509 | 510 | Supports only a limited selection of types. 511 | """ 512 | if isinstance(type1, six.string_types) and \ 513 | isinstance(type2, six.string_types): 514 | type1 = type1.rstrip('?') 515 | type2 = type2.rstrip('?') 516 | if type1 != type2: 517 | return False 518 | 519 | return True 520 | 521 | def _type_check_reference(self, step, input_name, reference): 522 | input_type = self._get_input_type(step, input_name) 523 | source_type = self._get_source_type(reference) 524 | if isinstance(source_type, list): 525 | # all source_types must be equal 526 | if len(set(source_type)) > 1: 527 | inputs = ['{} ({})'.format(n, t) 528 | for n, t in zip(reference, source_type)] 529 | msg = 'The types of the workflow inputs/step outputs for ' \ 530 | '"{}" are not equal: {}.'.format(input_name, 531 | ', '.join(inputs)) 532 | raise ValueError(msg) 533 | 534 | # continue type checking using the first item from the list 535 | source_type = source_type[0] 536 | input_type = input_type['items'] 537 | reference = reference[0] 538 | 539 | if self._types_match(source_type, input_type): 540 | return True 541 | else: 542 | if step.is_scattered: 543 | scattered = ' (scattered)' 544 | else: 545 | scattered = '' 546 | if reference.refers_to_wf_input(): 547 | msg = 'Workflow input "{}" of type "{}" is not' 548 | msg += ' compatible with{} step input "{}" of type "{}"' 549 | msg = msg.format( 550 | reference.input_name, source_type, 551 | scattered, 552 | python_name(input_name), input_type) 553 | else: 554 | msg = 'Step output "{}" of type "{}" is not' 555 | msg += ' compatible with{} step input "{}" of type "{}"' 556 | msg = msg.format( 557 | reference, source_type, 558 | scattered, 559 | python_name(input_name), input_type) 560 | raise ValueError(msg) 561 | 562 | def _make_step(self, step, **kwargs): 563 | self._closed() 564 | 565 | for k in step.get_input_names(): 566 | p_name = python_name(k) 567 | if p_name in kwargs.keys(): 568 | if isinstance(kwargs[p_name], Reference): 569 | step.set_input(p_name, six.text_type(kwargs[p_name])) 570 | elif isinstance(kwargs[p_name], list): 571 | if all(isinstance(n, Reference) for n in kwargs[p_name]): 572 | step.set_input(p_name, kwargs[k]) 573 | else: 574 | raise ValueError( 575 | 'List of inputs contains an input with an ' 576 | 'incorrect type for keyword argument {} (should ' 577 | 'be a value returned by set_input or from adding ' 578 | 'a step).'.format(p_name)) 579 | else: 580 | raise ValueError( 581 | 'Incorrect type (should be a value returned' 582 | 'by set_inputs() or from adding a step) for keyword ' 583 | 'argument {}'.format(p_name)) 584 | elif k not in step.optional_input_names: 585 | raise ValueError( 586 | 'Expecting "{}" as a keyword argument.'.format(p_name)) 587 | 588 | if 'scatter' in kwargs.keys() or 'scatter_method' in kwargs.keys(): 589 | # Check whether 'scatter' keyword is present 590 | if not kwargs.get('scatter'): 591 | raise ValueError('Expecting "scatter" as a keyword argument.') 592 | 593 | # Check whether the scatter variables are valid for this step 594 | scatter_vars = kwargs.get('scatter') 595 | if isinstance(scatter_vars, six.string_types): 596 | scatter_vars = [scatter_vars] 597 | 598 | for var in scatter_vars: 599 | if var not in step.get_input_names(): 600 | msg = 'Invalid variable "{}" for scatter.' 601 | raise ValueError(msg.format(var)) 602 | step.scattered_inputs.append(var) 603 | 604 | # Check whether 'scatter_method' keyword is present if there is 605 | # more than 1 scatter variable 606 | if not kwargs.get('scatter_method') and len(scatter_vars) > 1: 607 | msg = 'Expecting "scatter_method" as a keyword argument.' 608 | raise ValueError(msg) 609 | 610 | # Check validity of scatterMethod 611 | scatter_methods = ['dotproduct', 'nested_crossproduct', 612 | 'flat_crossproduct'] 613 | m = kwargs.get('scatter_method') 614 | if m and m not in scatter_methods: 615 | msg = 'Invalid scatterMethod "{}". Please use one of ({}).' 616 | raise ValueError(msg.format(m, ', '.join(scatter_methods))) 617 | step.scatter_method = m 618 | 619 | # Update step output types (outputs are now arrays) 620 | for name, typ in step.output_types.items(): 621 | step.output_types[name] = {'type': 'array', 'items': typ} 622 | 623 | self.has_scatter_requirement = True 624 | step.is_scattered = True 625 | 626 | # Check types of references 627 | for k in step.get_input_names(): 628 | p_name = python_name(k) 629 | if p_name in kwargs.keys(): 630 | self._type_check_reference(step, k, kwargs[p_name]) 631 | 632 | # Make sure the step has a unique name in the workflow (so command line 633 | # tools can be added to the same workflow multiple times). 634 | name_in_wf = self._generate_step_name(step.name) 635 | step._set_name_in_workflow(name_in_wf) 636 | self.steps_library.step_ids.append(name_in_wf) 637 | 638 | # Create a reference for each output for use in subsequent 639 | # steps' inputs. 640 | outputs = [] 641 | for n in step.output_names: 642 | ref = step.output_reference(n) 643 | self.step_output_types[ref] = step.output_types[n] 644 | outputs.append(ref) 645 | 646 | self._add_step(step) 647 | 648 | if len(outputs) == 1: 649 | return outputs[0] 650 | return outputs 651 | 652 | def validate(self): 653 | """Validate workflow object. 654 | 655 | This method currently validates the workflow object with the use of 656 | cwltool. It writes the workflow to a tmp CWL file, reads it, validates 657 | it and removes the tmp file again. By default, the workflow is written 658 | to file using absolute paths to the steps. 659 | """ 660 | # define tmpfile 661 | (fd, tmpfile) = tempfile.mkstemp() 662 | os.close(fd) 663 | try: 664 | # save workflow object to tmpfile, 665 | # do not recursively call validate function 666 | self.save(tmpfile, mode='abs', validate=False) 667 | # load workflow from tmpfile 668 | document_loader, processobj, metadata, uri = load_cwl(tmpfile) 669 | finally: 670 | # cleanup tmpfile 671 | os.remove(tmpfile) 672 | 673 | def _pack(self, fname, encoding): 674 | """Save workflow with ``--pack`` option 675 | 676 | This means that al tools and subworkflows are included in the workflow 677 | file that is created. A packed workflow cannot be loaded and used in 678 | scriptcwl. 679 | """ 680 | (fd, tmpfile) = tempfile.mkstemp() 681 | os.close(fd) 682 | try: 683 | self.save(tmpfile, mode='abs', validate=False) 684 | document_loader, processobj, metadata, uri = load_cwl(tmpfile) 685 | finally: 686 | # cleanup tmpfile 687 | os.remove(tmpfile) 688 | 689 | with codecs.open(fname, 'wb', encoding=encoding) as f: 690 | f.write(print_pack(document_loader, processobj, uri, metadata)) 691 | 692 | def save(self, fname, mode=None, validate=True, encoding='utf-8', 693 | wd=False, inline=False, relative=False, pack=False): 694 | """Save the workflow to file. 695 | 696 | Save the workflow to a CWL file that can be run with a CWL runner. 697 | 698 | Args: 699 | fname (str): file to save the workflow to. 700 | mode (str): one of (rel, abs, wd, inline, pack) 701 | encoding (str): file encoding to use (default: ``utf-8``). 702 | """ 703 | self._closed() 704 | 705 | if mode is None: 706 | mode = 'abs' 707 | if pack: 708 | mode = 'pack' 709 | elif wd: 710 | mode = 'wd' 711 | elif relative: 712 | mode = 'rel' 713 | 714 | msg = 'Using deprecated save method. Please save the workflow ' \ 715 | 'with: wf.save(\'{}\', mode=\'{}\'). Redirecting to new ' \ 716 | 'save method.'.format(fname, mode) 717 | warnings.warn(msg, DeprecationWarning) 718 | 719 | modes = ('rel', 'abs', 'wd', 'inline', 'pack') 720 | if mode not in modes: 721 | msg = 'Illegal mode "{}". Choose one of ({}).'\ 722 | .format(mode, ','.join(modes)) 723 | raise ValueError(msg) 724 | 725 | if validate: 726 | self.validate() 727 | 728 | dirname = os.path.dirname(os.path.abspath(fname)) 729 | if not os.path.exists(dirname): 730 | os.makedirs(dirname) 731 | 732 | if mode == 'inline': 733 | msg = ('Inline saving is deprecated. Please save the workflow ' 734 | 'using mode=\'pack\'. Setting mode to pack.') 735 | warnings.warn(msg, DeprecationWarning) 736 | mode = 'pack' 737 | 738 | if mode == 'rel': 739 | relpath = dirname 740 | save_yaml(fname=fname, wf=self, pack=False, relpath=relpath, 741 | wd=False) 742 | 743 | if mode == 'abs': 744 | save_yaml(fname=fname, wf=self, pack=False, relpath=None, 745 | wd=False) 746 | 747 | if mode == 'pack': 748 | self._pack(fname, encoding) 749 | 750 | if mode == 'wd': 751 | if self.get_working_dir() is None: 752 | raise ValueError('Working directory not set.') 753 | else: 754 | # save in working_dir 755 | bn = os.path.basename(fname) 756 | wd_file = os.path.join(self.working_dir, bn) 757 | save_yaml(fname=wd_file, wf=self, pack=False, relpath=None, 758 | wd=True) 759 | # and copy workflow file to other location (as though all steps 760 | # are in the same directory as the workflow) 761 | try: 762 | shutil.copy2(wd_file, fname) 763 | except shutil.Error: 764 | pass 765 | 766 | def get_working_dir(self): 767 | return self.working_dir 768 | 769 | def add_inputs(self, **kwargs): 770 | """Deprecated function, use add_input(self, **kwargs) instead. 771 | Add workflow input. 772 | 773 | Args: 774 | kwargs (dict): A dict with a `name: type` item 775 | and optionally a `default: value` item, where name is the 776 | name (id) of the workflow input (e.g., `dir_in`) and type is 777 | the type of the input (e.g., `'Directory'`). 778 | The type of input parameter can be learned from 779 | `step.inputs(step_name=input_name)`. 780 | 781 | Returns: 782 | inputname 783 | 784 | Raises: 785 | ValueError: No or multiple parameter(s) have been specified. 786 | """ 787 | msg = ('The add_inputs() function is deprecation in favour of the ' 788 | 'add_input() function, redirecting...') 789 | warnings.warn(msg, DeprecationWarning) 790 | return self.add_input(**kwargs) 791 | -------------------------------------------------------------------------------- /scriptcwl/yamlutils.py: -------------------------------------------------------------------------------- 1 | """Functionality for saving yaml files. 2 | """ 3 | import codecs 4 | 5 | from ruamel import yaml 6 | 7 | from .reference import Reference, reference_presenter 8 | 9 | 10 | def is_multiline(s): 11 | """Return True if a str consists of multiple lines. 12 | 13 | Args: 14 | s (str): the string to check. 15 | 16 | Returns: 17 | bool 18 | """ 19 | return len(s.splitlines()) > 1 20 | 21 | 22 | def str_presenter(dmpr, data): 23 | """Return correct str_presenter to write multiple lines to a yaml field. 24 | 25 | 26 | Source: http://stackoverflow.com/a/33300001 27 | """ 28 | if is_multiline(data): 29 | return dmpr.represent_scalar('tag:yaml.org,2002:str', data, style='|') 30 | return dmpr.represent_scalar('tag:yaml.org,2002:str', data) 31 | 32 | 33 | def yaml2string(wf, pack, relpath, wd): 34 | s = [u'#!/usr/bin/env cwl-runner', 35 | yaml.dump(wf.to_obj(pack=pack, relpath=relpath, wd=wd), 36 | Dumper=yaml.RoundTripDumper)] 37 | return u'\n'.join(s) 38 | 39 | 40 | def save_yaml(fname, wf, pack, relpath, wd, encoding='utf-8'): 41 | with codecs.open(fname, 'wb', encoding=encoding) as yaml_file: 42 | yaml_file.write(yaml2string(wf=wf, 43 | pack=pack, 44 | relpath=relpath, 45 | wd=wd)) 46 | 47 | 48 | yaml.add_representer(str, str_presenter, Dumper=yaml.RoundTripDumper) 49 | yaml.add_representer(Reference, reference_presenter, 50 | Dumper=yaml.RoundTripDumper) 51 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | 4 | [aliases] 5 | # Define `python setup.py test` 6 | test=pytest 7 | 8 | [coverage:run] 9 | branch = True 10 | source = scriptcwl 11 | 12 | [tool:pytest] 13 | testpaths = tests 14 | addopts = --cov --cov-report xml --cov-report term --cov-report html 15 | 16 | # Define `python setup.py build_sphinx` 17 | [build_sphinx] 18 | source-dir = docs 19 | build-dir = docs/_build 20 | all_files = 1 21 | builder = html 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Tool to generate CWL workflows""" 2 | # Always prefer setuptools over distutils 3 | from os import path 4 | 5 | from setuptools import find_packages, setup 6 | 7 | 8 | def read(fname): 9 | """Define read function to read README.md in long description.""" 10 | return open(path.join(path.dirname(__file__), fname)).read() 11 | 12 | 13 | setup( 14 | name='scriptcwl', 15 | 16 | # Versions should comply with PEP440. For a discussion on single-sourcing 17 | # the version across setup.py and the project code, see 18 | # https://packaging.python.org/en/latest/single_source_version.html 19 | version='0.8.1', 20 | 21 | description=__doc__, 22 | long_description=read('README.rst'), 23 | # The project's main homepage. 24 | url='https://github.com/nlesc/scriptcwl', 25 | 26 | download_url='https://github.com/NLeSC/scriptcwl/archive/0.8.0.tar.gz', 27 | 28 | # Author details 29 | author='Janneke van der Zwaan', 30 | author_email='j.vanderzwaan@esciencecenter.nl', 31 | 32 | # Choose your license 33 | license='Apache 2.0', 34 | 35 | include_package_data=True, 36 | 37 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 38 | classifiers=[ 39 | # How mature is this project? Common values are 40 | # 3 - Alpha 41 | # 4 - Beta 42 | # 5 - Production/Stable 43 | 'Development Status :: 3 - Alpha', 44 | 45 | # Indicate who your project is intended for 46 | 'Intended Audience :: Developers', 47 | 48 | # Pick your license as you wish (should match "license" above) 49 | 'License :: OSI Approved :: Apache Software License', 50 | 51 | # Specify the Python versions you support here. In particular, ensure 52 | # that you indicate whether you support Python 2, Python 3 or both. 53 | 'Programming Language :: Python :: 3.6', 54 | 'Programming Language :: Python :: 3.7' 55 | 56 | ], 57 | 58 | # What does your project relate to? 59 | keywords='cwl, workflow, pipeline, common workflow language', 60 | 61 | # You can just specify the packages manually here if your project is 62 | # simple. Or you can use find_packages(). 63 | packages=find_packages(), 64 | 65 | # List run-time dependencies here. These will be installed by pip when 66 | # your project is installed. For an analysis of "install_requires" vs pip's 67 | # requirements files see: 68 | # https://packaging.python.org/en/latest/requirements.html 69 | install_requires=[ 70 | 'six', 71 | 'cwltool==1.0.20180721142728', 72 | 'click'], 73 | setup_requires=[ 74 | # dependency for `python setup.py test` 75 | 'pytest-runner', 76 | # dependencies for `python setup.py build_sphinx` 77 | 'sphinx', 78 | 'recommonmark' 79 | ], 80 | tests_require=[ 81 | 'pytest', 82 | 'pytest-cov', 83 | 'pycodestyle', 84 | 'codacy-coverage', 85 | 'pytest-datafiles', 86 | ], 87 | ) 88 | -------------------------------------------------------------------------------- /tests/data/echo-no-shebang.cwl: -------------------------------------------------------------------------------- 1 | cwlVersion: v1.0 2 | class: CommandLineTool 3 | baseCommand: echo 4 | inputs: 5 | message: 6 | type: string 7 | inputBinding: 8 | position: 1 9 | outputs: 10 | echoed: 11 | type: stdout 12 | -------------------------------------------------------------------------------- /tests/data/echo-wc.workflowstep.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: Workflow 5 | requirements: 6 | - class: SubworkflowFeatureRequirement 7 | inputs: 8 | wfmessage: string 9 | outputs: 10 | wfcount: 11 | type: File 12 | outputSource: echo-wc/wfcount 13 | steps: 14 | echo-wc: 15 | run: workflows/echo-wc.cwl 16 | in: 17 | wfmessage: wfmessage 18 | out: 19 | - wfcount 20 | -------------------------------------------------------------------------------- /tests/data/echo.scattered.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: Workflow 4 | 5 | requirements: 6 | - class: ScatterFeatureRequirement 7 | 8 | inputs: 9 | wfmessages: string[] 10 | 11 | outputs: 12 | out_files: 13 | outputSource: echo/echoed 14 | type: 15 | items: File 16 | type: array 17 | 18 | steps: 19 | echo: 20 | run: tools/echo.cwl 21 | in: 22 | message: wfmessages 23 | out: 24 | - echoed 25 | scatter: [message] 26 | scatterMethod: nested_crossproduct 27 | -------------------------------------------------------------------------------- /tests/data/file-names/echo-with-minuses.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/file-names/echo-with-minuses_and_underscores.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/file-names/echo_with_minuses-and-underscores.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/file-names/echo_with_underscores.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/misc/align-dir-pack.cwl: -------------------------------------------------------------------------------- 1 | { 2 | "$graph": [ 3 | { 4 | "class": "Workflow", 5 | "requirements": [ 6 | { 7 | "class": "ScatterFeatureRequirement" 8 | } 9 | ], 10 | "inputs": [ 11 | { 12 | "default": "merged_changes.csv", 13 | "type": "string", 14 | "id": "#align-texts-wf.cwl/align_c" 15 | }, 16 | { 17 | "default": "merged_metadata.csv", 18 | "type": "string", 19 | "id": "#align-texts-wf.cwl/align_m" 20 | }, 21 | { 22 | "type": { 23 | "type": "array", 24 | "items": "File" 25 | }, 26 | "id": "#align-texts-wf.cwl/gs" 27 | }, 28 | { 29 | "type": { 30 | "type": "array", 31 | "items": "File" 32 | }, 33 | "id": "#align-texts-wf.cwl/ocr" 34 | } 35 | ], 36 | "outputs": [ 37 | { 38 | "outputSource": "#align-texts-wf.cwl/char-align-1/out_file", 39 | "type": { 40 | "type": "array", 41 | "items": "File" 42 | }, 43 | "id": "#align-texts-wf.cwl/alignments" 44 | }, 45 | { 46 | "outputSource": "#align-texts-wf.cwl/merge-json-3/merged", 47 | "type": "File", 48 | "id": "#align-texts-wf.cwl/changes" 49 | }, 50 | { 51 | "outputSource": "#align-texts-wf.cwl/merge-json-2/merged", 52 | "type": "File", 53 | "id": "#align-texts-wf.cwl/metadata" 54 | } 55 | ], 56 | "steps": [ 57 | { 58 | "run": "#align.cwl", 59 | "in": [ 60 | { 61 | "source": "#align-texts-wf.cwl/ocr", 62 | "id": "#align-texts-wf.cwl/align-1/file1" 63 | }, 64 | { 65 | "source": "#align-texts-wf.cwl/gs", 66 | "id": "#align-texts-wf.cwl/align-1/file2" 67 | } 68 | ], 69 | "out": [ 70 | "#align-texts-wf.cwl/align-1/changes", 71 | "#align-texts-wf.cwl/align-1/metadata" 72 | ], 73 | "scatter": [ 74 | "#align-texts-wf.cwl/align-1/file1", 75 | "#align-texts-wf.cwl/align-1/file2" 76 | ], 77 | "scatterMethod": "dotproduct", 78 | "id": "#align-texts-wf.cwl/align-1" 79 | }, 80 | { 81 | "run": "#char-align.cwl", 82 | "in": [ 83 | { 84 | "source": "#align-texts-wf.cwl/gs", 85 | "id": "#align-texts-wf.cwl/char-align-1/gs_text" 86 | }, 87 | { 88 | "source": "#align-texts-wf.cwl/align-1/metadata", 89 | "id": "#align-texts-wf.cwl/char-align-1/metadata" 90 | }, 91 | { 92 | "source": "#align-texts-wf.cwl/ocr", 93 | "id": "#align-texts-wf.cwl/char-align-1/ocr_text" 94 | } 95 | ], 96 | "out": [ 97 | "#align-texts-wf.cwl/char-align-1/out_file" 98 | ], 99 | "scatter": [ 100 | "#align-texts-wf.cwl/char-align-1/gs_text", 101 | "#align-texts-wf.cwl/char-align-1/ocr_text", 102 | "#align-texts-wf.cwl/char-align-1/metadata" 103 | ], 104 | "scatterMethod": "dotproduct", 105 | "id": "#align-texts-wf.cwl/char-align-1" 106 | }, 107 | { 108 | "run": "#merge-json.cwl", 109 | "in": [ 110 | { 111 | "source": "#align-texts-wf.cwl/align-1/metadata", 112 | "id": "#align-texts-wf.cwl/merge-json-2/in_files" 113 | }, 114 | { 115 | "source": "#align-texts-wf.cwl/align_m", 116 | "id": "#align-texts-wf.cwl/merge-json-2/name" 117 | } 118 | ], 119 | "out": [ 120 | "#align-texts-wf.cwl/merge-json-2/merged" 121 | ], 122 | "id": "#align-texts-wf.cwl/merge-json-2" 123 | }, 124 | { 125 | "run": "#merge-json.cwl", 126 | "in": [ 127 | { 128 | "source": "#align-texts-wf.cwl/align-1/changes", 129 | "id": "#align-texts-wf.cwl/merge-json-3/in_files" 130 | }, 131 | { 132 | "source": "#align-texts-wf.cwl/align_c", 133 | "id": "#align-texts-wf.cwl/merge-json-3/name" 134 | } 135 | ], 136 | "out": [ 137 | "#align-texts-wf.cwl/merge-json-3/merged" 138 | ], 139 | "id": "#align-texts-wf.cwl/merge-json-3" 140 | } 141 | ], 142 | "id": "#align-texts-wf.cwl" 143 | }, 144 | { 145 | "class": "CommandLineTool", 146 | "baseCommand": [ 147 | "python", 148 | "-m", 149 | "ochre.char_align" 150 | ], 151 | "requirements": [ 152 | { 153 | "envDef": [ 154 | { 155 | "envValue": "C.UTF-8", 156 | "envName": "LANG" 157 | }, 158 | { 159 | "envValue": "C.UTF-8", 160 | "envName": "LC_ALL" 161 | } 162 | ], 163 | "class": "EnvVarRequirement" 164 | } 165 | ], 166 | "inputs": [ 167 | { 168 | "type": "File", 169 | "inputBinding": { 170 | "position": 2 171 | }, 172 | "id": "#char-align.cwl/gs_text" 173 | }, 174 | { 175 | "type": "File", 176 | "inputBinding": { 177 | "position": 3 178 | }, 179 | "id": "#char-align.cwl/metadata" 180 | }, 181 | { 182 | "type": "File", 183 | "inputBinding": { 184 | "position": 1 185 | }, 186 | "id": "#char-align.cwl/ocr_text" 187 | } 188 | ], 189 | "outputs": [ 190 | { 191 | "type": "File", 192 | "outputBinding": { 193 | "glob": "*.json" 194 | }, 195 | "id": "#char-align.cwl/out_file" 196 | } 197 | ], 198 | "id": "#char-align.cwl" 199 | }, 200 | { 201 | "class": "CommandLineTool", 202 | "baseCommand": [ 203 | "python", 204 | "-m", 205 | "nlppln.commands.ls" 206 | ], 207 | "doc": "List files in a directory.\n\nThis command can be used to convert a ``Directory`` into a list of files. This list can be filtered on file name by specifying ``--endswith``.\n", 208 | "requirements": [ 209 | { 210 | "envDef": [ 211 | { 212 | "envValue": "C.UTF-8", 213 | "envName": "LANG" 214 | }, 215 | { 216 | "envValue": "C.UTF-8", 217 | "envName": "LC_ALL" 218 | } 219 | ], 220 | "class": "EnvVarRequirement" 221 | } 222 | ], 223 | "inputs": [ 224 | { 225 | "type": [ 226 | "null", 227 | "string" 228 | ], 229 | "inputBinding": { 230 | "prefix": "--endswith" 231 | }, 232 | "id": "#ls.cwl/endswith" 233 | }, 234 | { 235 | "type": "Directory", 236 | "inputBinding": { 237 | "position": 2 238 | }, 239 | "id": "#ls.cwl/in_dir" 240 | }, 241 | { 242 | "type": [ 243 | "null", 244 | "boolean" 245 | ], 246 | "inputBinding": { 247 | "prefix": "--recursive" 248 | }, 249 | "id": "#ls.cwl/recursive" 250 | } 251 | ], 252 | "stdout": "cwl.output.json", 253 | "outputs": [ 254 | { 255 | "type": { 256 | "type": "array", 257 | "items": "File" 258 | }, 259 | "id": "#ls.cwl/out_files" 260 | } 261 | ], 262 | "id": "#ls.cwl" 263 | }, 264 | { 265 | "class": "CommandLineTool", 266 | "baseCommand": [ 267 | "python", 268 | "-m", 269 | "ochre.merge_json" 270 | ], 271 | "requirements": [ 272 | { 273 | "envDef": [ 274 | { 275 | "envValue": "C.UTF-8", 276 | "envName": "LANG" 277 | }, 278 | { 279 | "envValue": "C.UTF-8", 280 | "envName": "LC_ALL" 281 | } 282 | ], 283 | "class": "EnvVarRequirement" 284 | }, 285 | { 286 | "listing": "$(inputs.in_files)", 287 | "class": "InitialWorkDirRequirement" 288 | } 289 | ], 290 | "arguments": [ 291 | { 292 | "valueFrom": "$(runtime.outdir)", 293 | "position": 1 294 | } 295 | ], 296 | "inputs": [ 297 | { 298 | "type": { 299 | "type": "array", 300 | "items": "File" 301 | }, 302 | "id": "#merge-json.cwl/in_files" 303 | }, 304 | { 305 | "type": [ 306 | "null", 307 | "string" 308 | ], 309 | "inputBinding": { 310 | "prefix": "--name=", 311 | "separate": false 312 | }, 313 | "id": "#merge-json.cwl/name" 314 | } 315 | ], 316 | "outputs": [ 317 | { 318 | "type": "File", 319 | "outputBinding": { 320 | "glob": "*.csv" 321 | }, 322 | "id": "#merge-json.cwl/merged" 323 | } 324 | ], 325 | "id": "#merge-json.cwl" 326 | }, 327 | { 328 | "class": "ExpressionTool", 329 | "requirements": [ 330 | { 331 | "class": "InlineJavascriptRequirement" 332 | } 333 | ], 334 | "doc": "Save a list of files to a directory.\n\nIf the ``dir_name`` is not specified, it is set to the string before the rightmost - of the ``nameroot`` of the first input file\n(e.g., ``input-file-1-0000.txt`` becomes ``input-file-1``). If the file name does not contain a -, the ``nameroot`` is used (e.g.\n``input.txt`` becomes ``input``).\n", 335 | "inputs": [ 336 | { 337 | "type": [ 338 | "null", 339 | "string" 340 | ], 341 | "id": "#save-files-to-dir.cwl/dir_name" 342 | }, 343 | { 344 | "type": { 345 | "type": "array", 346 | "items": "File" 347 | }, 348 | "id": "#save-files-to-dir.cwl/in_files" 349 | } 350 | ], 351 | "outputs": [ 352 | { 353 | "type": "Directory", 354 | "id": "#save-files-to-dir.cwl/out" 355 | } 356 | ], 357 | "expression": "${\n var dir_name;\n if (inputs.dir_name == null ){\n var parts = inputs.in_files[0].nameroot.split('-');\n if (parts.length > 1){\n dir_name = parts.slice(0, -1).join('-')\n } else {\n dir_name = parts[0]\n }\n\n } else {\n dir_name = inputs.dir_name;\n }\n return {\"out\": {\n \"class\": \"Directory\",\n \"basename\": dir_name,\n \"listing\": inputs.in_files\n } };\n}\n", 358 | "id": "#save-files-to-dir.cwl" 359 | }, 360 | { 361 | "class": "Workflow", 362 | "requirements": [ 363 | { 364 | "class": "SubworkflowFeatureRequirement" 365 | } 366 | ], 367 | "inputs": [ 368 | { 369 | "default": "align", 370 | "type": "string", 371 | "id": "#main/align_dir_name" 372 | }, 373 | { 374 | "type": "Directory", 375 | "id": "#main/gs" 376 | }, 377 | { 378 | "type": "Directory", 379 | "id": "#main/ocr" 380 | } 381 | ], 382 | "outputs": [ 383 | { 384 | "outputSource": "#main/save-files-to-dir-2/out", 385 | "type": "Directory", 386 | "id": "#main/align" 387 | } 388 | ], 389 | "steps": [ 390 | { 391 | "run": "#align-texts-wf.cwl", 392 | "in": [ 393 | { 394 | "source": "#main/ls-2/out_files", 395 | "id": "#main/align-texts-wf/gs" 396 | }, 397 | { 398 | "source": "#main/ls-5/out_files", 399 | "id": "#main/align-texts-wf/ocr" 400 | } 401 | ], 402 | "out": [ 403 | "#main/align-texts-wf/alignments", 404 | "#main/align-texts-wf/changes", 405 | "#main/align-texts-wf/metadata" 406 | ], 407 | "id": "#main/align-texts-wf" 408 | }, 409 | { 410 | "run": "#ls.cwl", 411 | "in": [ 412 | { 413 | "source": "#main/gs", 414 | "id": "#main/ls-2/in_dir" 415 | } 416 | ], 417 | "out": [ 418 | "#main/ls-2/out_files" 419 | ], 420 | "id": "#main/ls-2" 421 | }, 422 | { 423 | "run": "#ls.cwl", 424 | "in": [ 425 | { 426 | "source": "#main/ocr", 427 | "id": "#main/ls-5/in_dir" 428 | } 429 | ], 430 | "out": [ 431 | "#main/ls-5/out_files" 432 | ], 433 | "id": "#main/ls-5" 434 | }, 435 | { 436 | "run": "#save-files-to-dir.cwl", 437 | "in": [ 438 | { 439 | "source": "#main/align_dir_name", 440 | "id": "#main/save-files-to-dir-2/dir_name" 441 | }, 442 | { 443 | "source": "#main/align-texts-wf/alignments", 444 | "id": "#main/save-files-to-dir-2/in_files" 445 | } 446 | ], 447 | "out": [ 448 | "#main/save-files-to-dir-2/out" 449 | ], 450 | "id": "#main/save-files-to-dir-2" 451 | } 452 | ], 453 | "id": "#main" 454 | }, 455 | { 456 | "class": "CommandLineTool", 457 | "baseCommand": [ 458 | "python", 459 | "/align.py" 460 | ], 461 | "hints": [ 462 | { 463 | "class": "DockerRequirement", 464 | "dockerPull": "nlppln/edlib-align:0.1.2" 465 | } 466 | ], 467 | "inputs": [ 468 | { 469 | "type": "File", 470 | "inputBinding": { 471 | "position": 1 472 | }, 473 | "id": "#align.cwl/file1" 474 | }, 475 | { 476 | "type": "File", 477 | "inputBinding": { 478 | "position": 2 479 | }, 480 | "id": "#align.cwl/file2" 481 | }, 482 | { 483 | "type": [ 484 | "null", 485 | "Directory" 486 | ], 487 | "inputBinding": { 488 | "prefix": "--out_dir=", 489 | "separate": false 490 | }, 491 | "id": "#align.cwl/out_dir" 492 | } 493 | ], 494 | "outputs": [ 495 | { 496 | "type": "File", 497 | "outputBinding": { 498 | "glob": "*-changes.json" 499 | }, 500 | "id": "#align.cwl/changes" 501 | }, 502 | { 503 | "type": "File", 504 | "outputBinding": { 505 | "glob": "*-metadata.json" 506 | }, 507 | "id": "#align.cwl/metadata" 508 | } 509 | ], 510 | "id": "#align.cwl" 511 | } 512 | ], 513 | "cwlVersion": "v1.0" 514 | } -------------------------------------------------------------------------------- /tests/data/misc/echo2.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string[] 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/misc/echo3.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | msg1: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | msg2: 12 | type: string 13 | inputBinding: 14 | position: 2 15 | outputs: 16 | echoed: 17 | type: stdout 18 | -------------------------------------------------------------------------------- /tests/data/misc/non-python-names.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | first-message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | optional-message: 12 | type: string? 13 | inputBinding: 14 | position: 2 15 | 16 | outputs: 17 | echo-out: 18 | type: stdout 19 | -------------------------------------------------------------------------------- /tests/data/tools/echo.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: echo 6 | inputs: 7 | message: 8 | type: string 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | echoed: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/tools/multiple-out-args.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: CommandLineTool 4 | baseCommand: ["python", "-m", "nlppln.commands.extract_annotations"] 5 | 6 | inputs: 7 | in_files: 8 | type: 9 | type: array 10 | items: File 11 | inputBinding: 12 | position: 2 13 | out_dir: 14 | type: Directory? 15 | inputBinding: 16 | prefix: --out_dir= 17 | separate: false 18 | counselors: 19 | type: 20 | type: array 21 | items: string 22 | inputBinding: 23 | prefix: -c 24 | 25 | stdout: missing_introductions.json 26 | 27 | outputs: 28 | out_files: 29 | type: 30 | type: array 31 | items: File 32 | outputBinding: 33 | glob: "*.txt" 34 | meta_out: 35 | type: File 36 | outputBinding: 37 | glob: "missing_introductions.json" 38 | -------------------------------------------------------------------------------- /tests/data/tools/wc.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | baseCommand: wc 6 | inputs: 7 | file2count: 8 | type: File 9 | inputBinding: 10 | position: 1 11 | outputs: 12 | wced: 13 | type: stdout 14 | -------------------------------------------------------------------------------- /tests/data/workflows/echo-wc.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: Workflow 5 | doc: Counts words of a message via echo and wc 6 | inputs: 7 | wfmessage: string 8 | outputs: 9 | wfcount: 10 | type: File 11 | outputSource: wc/wced 12 | steps: 13 | echo: 14 | run: ../tools/echo.cwl 15 | in: 16 | message: wfmessage 17 | out: 18 | - echoed 19 | wc: 20 | run: ../tools/wc.cwl 21 | in: 22 | file2count: echo/echoed 23 | out: 24 | - wced 25 | -------------------------------------------------------------------------------- /tests/data/workflows/echo-wc_inline.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: Workflow 4 | doc: Counts words of a message via echo and wc 5 | inputs: 6 | wfmessage: string 7 | outputs: 8 | wfcount: 9 | type: File 10 | outputSource: wc/wced 11 | steps: 12 | echo: 13 | run: 14 | cwlVersion: v1.0 15 | class: CommandLineTool 16 | baseCommand: echo 17 | inputs: 18 | - type: string 19 | inputBinding: 20 | position: 1 21 | id: _:echo#message 22 | outputs: 23 | - type: File 24 | id: _:echo#echoed 25 | outputBinding: 26 | glob: 8341e6646e16f373b00fc5a45b4f299d5901b0ad 27 | id: _:echo 28 | stdout: 8341e6646e16f373b00fc5a45b4f299d5901b0ad 29 | in: 30 | message: wfmessage 31 | out: 32 | - echoed 33 | wc: 34 | run: 35 | cwlVersion: v1.0 36 | class: CommandLineTool 37 | baseCommand: wc 38 | inputs: 39 | - type: File 40 | inputBinding: 41 | position: 1 42 | id: _:wc#file2count 43 | outputs: 44 | - type: File 45 | id: _:wc#wced 46 | outputBinding: 47 | glob: bcd587c62be60d5d0473ee2c39dc73257b20ecca 48 | id: _:wc 49 | stdout: bcd587c62be60d5d0473ee2c39dc73257b20ecca 50 | in: 51 | file2count: echo/echoed 52 | out: 53 | - wced 54 | -------------------------------------------------------------------------------- /tests/data/workflows/echo-wc_wd.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: Workflow 4 | inputs: 5 | wfmessage: string 6 | outputs: 7 | wfcount: 8 | type: File 9 | outputSource: wc/wced 10 | steps: 11 | echo: 12 | run: echo.cwl 13 | in: 14 | message: wfmessage 15 | out: 16 | - echoed 17 | wc: 18 | run: wc.cwl 19 | in: 20 | file2count: echo/echoed 21 | out: 22 | - wced 23 | -------------------------------------------------------------------------------- /tests/test_library.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import os 4 | from pathlib import Path 5 | 6 | from scriptcwl.library import load_yaml, load_steps 7 | 8 | 9 | data_dir = Path(os.path.dirname(os.path.realpath(__file__))) / 'data' / 'misc' 10 | 11 | 12 | @pytest.mark.datafiles(Path(data_dir) / 'align-dir-pack.cwl') 13 | def test_load_yaml_packed(datafiles): 14 | cwl_file = str(datafiles.listdir()[0]) 15 | 16 | assert {} == load_yaml(cwl_file) 17 | 18 | 19 | @pytest.mark.datafiles(Path(data_dir) / 'align-dir-pack.cwl') 20 | def test_load_steps_file_packed(datafiles): 21 | cwl_file = str(datafiles.listdir()[0]) 22 | 23 | assert {} == load_steps(step_file=cwl_file) 24 | -------------------------------------------------------------------------------- /tests/test_lint.py: -------------------------------------------------------------------------------- 1 | """ Lint tests """ 2 | import os 3 | import textwrap 4 | 5 | import pycodestyle # formerly known as pep8 6 | 7 | 8 | def test_pep8_conformance(): 9 | """Test that we conform to PEP-8.""" 10 | check_paths = [ 11 | 'scriptcwl', 12 | 'tests', 13 | ] 14 | exclude_paths = [] 15 | 16 | print("PEP8 check of directories: {}\n".format(', '.join(check_paths))) 17 | 18 | # Get paths wrt package root 19 | package_root = os.path.dirname(os.path.dirname(__file__)) 20 | for paths in (check_paths, exclude_paths): 21 | for i, path in enumerate(paths): 22 | paths[i] = os.path.join(package_root, path) 23 | 24 | style = pycodestyle.StyleGuide() 25 | style.options.exclude.extend(exclude_paths) 26 | 27 | success = style.check_files(check_paths).total_errors == 0 28 | 29 | if not success: 30 | print(textwrap.dedent(""" 31 | Your Python code does not conform to the official Python style 32 | guide (PEP8), see https://www.python.org/dev/peps/pep-0008 33 | 34 | A list of warning and error messages can be found above, 35 | prefixed with filename:line number:column number. 36 | 37 | Run `yapf -i yourfile.py` to automatically fix most errors. 38 | Run `yapf -d yourfile.py` to preview what would be changed. 39 | Run `pip install --upgrade yapf` to install the latest version 40 | of yapf. 41 | """)) 42 | 43 | assert success, "Your code does not conform to PEP8" 44 | -------------------------------------------------------------------------------- /tests/test_scriptcwl.py: -------------------------------------------------------------------------------- 1 | from scriptcwl.scriptcwl import is_url 2 | 3 | 4 | def test_is_url(): 5 | assert is_url('https://www.esciencecenter.nl/') 6 | assert is_url('http://www.esciencecenter.nl/') 7 | assert not is_url('file:///home/xxx/cwl-working-dir/test/cwl') 8 | -------------------------------------------------------------------------------- /tests/test_step.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from schema_salad.validate import ValidationException 4 | from scriptcwl.step import Step 5 | 6 | 7 | def test_filenotfound(): 8 | with pytest.raises(ValidationException): 9 | Step('tests/data/tools/idontexist.cwl') 10 | 11 | 12 | class TestWithCommandLineTool(object): 13 | @pytest.fixture 14 | def step(self): 15 | return Step('tests/data/tools/echo.cwl') 16 | 17 | def test_is_workflow(self, step): 18 | assert not step.is_workflow 19 | 20 | def test_get_input_names(self, step): 21 | names = step.get_input_names() 22 | assert len(names) == 1 23 | firstname = names[0] 24 | assert firstname.endswith('message') 25 | 26 | 27 | class TestWithWorkflow(object): 28 | @pytest.fixture 29 | def step(self): 30 | return Step('tests/data/workflows/echo-wc.cwl') 31 | 32 | def test_is_workflow(self, step): 33 | assert step.is_workflow 34 | 35 | def test_get_input_names(self, step): 36 | names = step.get_input_names() 37 | assert len(names) == 1 38 | firstname = names[0] 39 | assert firstname.endswith('wfmessage') 40 | 41 | 42 | class TestInputOptional(object): 43 | @pytest.fixture 44 | def step(self): 45 | return Step('tests/data/tools/echo.cwl') 46 | 47 | def test_argument_is_optional(self, step): 48 | assert step._input_optional({'type': 'string?'}) 49 | assert step._input_optional({'type': [u'null', 'string']}) 50 | assert step._input_optional({'type': 'string', 'default': 'test'}) 51 | 52 | def test_argument_is_not_optional(self, step): 53 | assert not step._input_optional({'type': 'string'}) 54 | 55 | 56 | class TestMultipleOutputArgs(object): 57 | @pytest.fixture 58 | def step(self): 59 | return Step('tests/data/tools/multiple-out-args.cwl') 60 | 61 | def test_has_multiple_out_args(self, step): 62 | assert len(step.to_obj()['out']) == 2 63 | 64 | 65 | class TestStepNameInWorkflow(object): 66 | @pytest.fixture 67 | def step(self): 68 | return Step('tests/data/tools/echo.cwl') 69 | 70 | def test_no_name_in_workflow(self, step): 71 | with pytest.raises(AttributeError): 72 | step.name_in_workflow == 'echo' 73 | 74 | def test_set_name_in_workflow(self, step): 75 | step._set_name_in_workflow('echo') 76 | assert step.name_in_workflow == 'echo' 77 | 78 | 79 | class TestPrintStep(object): 80 | 81 | def test_str_(self): 82 | step = Step('tests/data/tools/echo.cwl') 83 | assert str(step) == 'echoed = wf.echo(message)' 84 | 85 | def test_str_non_python_names(self): 86 | step = Step('tests/data/misc/non-python-names.cwl') 87 | o = 'echo_out = wf.non_python_names(first_message[, optional_message])' 88 | assert str(step) == o 89 | -------------------------------------------------------------------------------- /tests/test_workflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import pytest 4 | import os 5 | 6 | from shutil import copytree 7 | from ruamel import yaml 8 | 9 | from schema_salad.validate import ValidationException 10 | 11 | from scriptcwl import WorkflowGenerator 12 | from scriptcwl.library import load_yaml 13 | 14 | 15 | def setup_workflowgenerator(tmpdir): 16 | toolsdir = tmpdir.join('tools').strpath 17 | workflows = tmpdir.join('workflows').strpath 18 | filenames = tmpdir.join('file-names').strpath 19 | misc = tmpdir.join('misc').strpath 20 | copytree('tests/data/tools', toolsdir) 21 | copytree('tests/data/workflows', workflows) 22 | copytree('tests/data/file-names', filenames) 23 | copytree('tests/data/misc', misc) 24 | wf = WorkflowGenerator() 25 | return wf 26 | 27 | 28 | class TestWorkflowGenerator(object): 29 | def test_load(self): 30 | wf = WorkflowGenerator() 31 | wf.load('tests/data/tools') 32 | 33 | step_keys = wf.steps_library.steps.keys() 34 | step_keys = sorted(step_keys) 35 | assert step_keys == ['echo', 'multiple-out-args', 'wc'] 36 | 37 | def test_load_with_list(self): 38 | wf = WorkflowGenerator() 39 | wf.load(step_list=['tests/data/workflows/echo-wc.cwl', 40 | 'tests/data/tools']) 41 | # 'https://raw.githubusercontent.com/WhatWorksWhenForWhom/nlppln/develop/cwl/anonymize.cwl',\ 42 | step_keys = wf.steps_library.steps.keys() 43 | step_keys = sorted(step_keys) 44 | assert step_keys == ['echo', 'echo-wc', 'multiple-out-args', 'wc'] 45 | 46 | def test_load_duplicate_cwl_step(self, tmpdir): 47 | wf = setup_workflowgenerator(tmpdir) 48 | wf.load(steps_dir=tmpdir.join('tools').strpath) 49 | with pytest.warns(UserWarning): 50 | wf.load(step_file=tmpdir.join('tools', 'echo.cwl').strpath) 51 | 52 | def test_save_with_tools_deprecated(self, tmpdir): 53 | wf = setup_workflowgenerator(tmpdir) 54 | wf.load(steps_dir=tmpdir.join('tools').strpath) 55 | wf.set_documentation('Counts words of a message via echo and wc') 56 | 57 | wfmessage = wf.add_input(wfmessage='string') 58 | echoed = wf.echo(message=wfmessage) 59 | wced = wf.wc(file2count=echoed) 60 | wf.add_outputs(wfcount=wced) 61 | 62 | wf_filename = tmpdir.join('workflows/echo-wc.cwl').strpath 63 | with pytest.warns(DeprecationWarning): 64 | wf.save(wf_filename, relative=True) 65 | 66 | # make workflows contents relative to tests/data/tools directory 67 | actual = load_yaml(wf_filename) 68 | expected_wf_filename = 'tests/data/workflows/echo-wc.cwl' 69 | expected = load_yaml(expected_wf_filename) 70 | 71 | print(' actual:', actual) 72 | print('expected:', expected) 73 | assert actual == expected 74 | 75 | def test_save_with_tools(self, tmpdir): 76 | wf = setup_workflowgenerator(tmpdir) 77 | wf.load(steps_dir=tmpdir.join('tools').strpath) 78 | wf.set_documentation('Counts words of a message via echo and wc') 79 | 80 | wfmessage = wf.add_input(wfmessage='string') 81 | echoed = wf.echo(message=wfmessage) 82 | wced = wf.wc(file2count=echoed) 83 | wf.add_outputs(wfcount=wced) 84 | 85 | wf_filename = tmpdir.join('workflows/echo-wc.cwl').strpath 86 | wf.save(wf_filename, mode='rel') 87 | 88 | # make workflows contents relative to tests/data/tools directory 89 | actual = load_yaml(wf_filename) 90 | expected_wf_filename = 'tests/data/workflows/echo-wc.cwl' 91 | expected = load_yaml(expected_wf_filename) 92 | 93 | print(' actual:', actual) 94 | print('expected:', expected) 95 | assert actual == expected 96 | 97 | def test_save_with_workflow(self, tmpdir): 98 | wf = setup_workflowgenerator(tmpdir) 99 | wf.load(tmpdir.join('workflows').strpath) 100 | 101 | wfmessage = wf.add_input(wfmessage='string') 102 | wced = wf.echo_wc(wfmessage=wfmessage) 103 | wf.add_outputs(wfcount=wced) 104 | 105 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 106 | wf.save(wf_filename, mode='rel') 107 | 108 | # make workflows contents relative to tests/data/tools directory 109 | actual = load_yaml(wf_filename) 110 | expected_wf_filename = 'tests/data/echo-wc.workflowstep.cwl' 111 | expected = load_yaml(expected_wf_filename) 112 | 113 | print(' actual:', actual) 114 | print('expected:', expected) 115 | assert actual == expected 116 | 117 | def test_save_with_scattered_step(self, tmpdir): 118 | wf = setup_workflowgenerator(tmpdir) 119 | wf.load(tmpdir.join('tools').strpath) 120 | 121 | msgs = wf.add_input(wfmessages='string[]') 122 | echoed = wf.echo( 123 | message=msgs, 124 | scatter='message', 125 | scatter_method='nested_crossproduct') 126 | wf.add_outputs(out_files=echoed) 127 | 128 | wf_filename = tmpdir.join('echo-scattered.cwl').strpath 129 | wf.save(wf_filename, mode='rel') 130 | 131 | # make workflows contents relative to tests/data/tools directory 132 | actual = load_yaml(wf_filename) 133 | expected_wf_filename = 'tests/data/echo.scattered.cwl' 134 | expected = load_yaml(expected_wf_filename) 135 | 136 | print(' actual:', actual) 137 | print('expected:', expected) 138 | assert actual == expected 139 | 140 | def test_save_with_inline_tools(self, tmpdir): 141 | wf = WorkflowGenerator() 142 | wf.load('tests/data/tools') 143 | wf.set_documentation('Counts words of a message via echo and wc') 144 | 145 | wfmessage = wf.add_input(wfmessage='string') 146 | echoed = wf.echo(message=wfmessage) 147 | wced = wf.wc(file2count=echoed) 148 | wf.add_outputs(wfcount=wced) 149 | 150 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 151 | 152 | with pytest.warns(DeprecationWarning): 153 | wf.save(wf_filename, mode='inline') 154 | 155 | # save with inline=True should result in a packed workflow (that isn't 156 | # loaded). 157 | with WorkflowGenerator() as wf2: 158 | wf2.load(wf_filename) 159 | # wf_filename shouldn't be in the steps library, because it is a 160 | # packed workflow 161 | assert len(wf2.steps_library.steps.keys()) == 0 162 | 163 | def test_save_with_pack_deprecated(self, tmpdir): 164 | wf = WorkflowGenerator() 165 | wf.load('tests/data/tools') 166 | wf.set_documentation('Counts words of a message via echo and wc') 167 | 168 | wfmessage = wf.add_input(wfmessage='string') 169 | echoed = wf.echo(message=wfmessage) 170 | wced = wf.wc(file2count=echoed) 171 | wf.add_outputs(wfcount=wced) 172 | 173 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 174 | with pytest.warns(DeprecationWarning): 175 | wf.save(wf_filename, pack=True) 176 | 177 | with WorkflowGenerator() as wf2: 178 | wf2.load(wf_filename) 179 | # wf_filename shouldn't be in the steps library, because it is a 180 | # packed workflow 181 | assert len(wf2.steps_library.steps.keys()) == 0 182 | 183 | def test_save_with_pack(self, tmpdir): 184 | wf = WorkflowGenerator() 185 | wf.load('tests/data/tools') 186 | wf.set_documentation('Counts words of a message via echo and wc') 187 | 188 | wfmessage = wf.add_input(wfmessage='string') 189 | echoed = wf.echo(message=wfmessage) 190 | wced = wf.wc(file2count=echoed) 191 | wf.add_outputs(wfcount=wced) 192 | 193 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 194 | wf.save(wf_filename, mode='pack') 195 | 196 | with WorkflowGenerator() as wf2: 197 | wf2.load(wf_filename) 198 | # wf_filename shouldn't be in the steps library, because it is a 199 | # packed workflow 200 | assert len(wf2.steps_library.steps.keys()) == 0 201 | 202 | def test_save_with_wd_deprecated(self, tmpdir): 203 | wf = WorkflowGenerator(working_dir=tmpdir.join('wd').strpath) 204 | wf.load('tests/data/tools') 205 | 206 | wfmessage = wf.add_input(wfmessage='string') 207 | echoed = wf.echo(message=wfmessage) 208 | wced = wf.wc(file2count=echoed) 209 | wf.add_outputs(wfcount=wced) 210 | 211 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 212 | with pytest.warns(DeprecationWarning): 213 | wf.save(wf_filename, wd=True) 214 | 215 | actual = load_yaml(wf_filename) 216 | expected_wf_filename = 'tests/data/workflows/echo-wc_wd.cwl' 217 | expected = load_yaml(expected_wf_filename) 218 | 219 | print(' actual:', actual) 220 | print('expected:', expected) 221 | assert actual == expected 222 | 223 | def test_save_with_wd(self, tmpdir): 224 | wf = WorkflowGenerator(working_dir=tmpdir.join('wd').strpath) 225 | wf.load('tests/data/tools') 226 | 227 | wfmessage = wf.add_input(wfmessage='string') 228 | echoed = wf.echo(message=wfmessage) 229 | wced = wf.wc(file2count=echoed) 230 | wf.add_outputs(wfcount=wced) 231 | 232 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 233 | wf.save(wf_filename, mode='wd') 234 | 235 | actual = load_yaml(wf_filename) 236 | expected_wf_filename = 'tests/data/workflows/echo-wc_wd.cwl' 237 | expected = load_yaml(expected_wf_filename) 238 | 239 | print(' actual:', actual) 240 | print('expected:', expected) 241 | assert actual == expected 242 | 243 | def test_save_with_wd_no_wd(self, tmpdir): 244 | wf = WorkflowGenerator() 245 | 246 | assert wf.get_working_dir() is None 247 | 248 | wf.load('tests/data/tools') 249 | 250 | wfmessage = wf.add_input(wfmessage='string') 251 | echoed = wf.echo(message=wfmessage) 252 | wced = wf.wc(file2count=echoed) 253 | wf.add_outputs(wfcount=wced) 254 | 255 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 256 | 257 | with pytest.raises(ValueError): 258 | wf.save(wf_filename, mode='wd') 259 | 260 | def test_save_with_relative_url(self, tmpdir): 261 | wf = WorkflowGenerator() 262 | url = 'https://raw.githubusercontent.com/NLeSC/scriptcwl/master/' \ 263 | 'tests/data/tools/echo.cwl' 264 | wf.load(step_file=url) 265 | 266 | wfmessage = wf.add_input(wfmessage='string') 267 | echoed = wf.echo(message=wfmessage) 268 | wf.add_outputs(echoed=echoed) 269 | 270 | wf_filename = tmpdir.join('echo-wf.cwl').strpath 271 | wf.save(wf_filename, mode='rel') 272 | 273 | def test_add_shebang_to_saved_cwl_file(self, tmpdir): 274 | wf = WorkflowGenerator() 275 | wf.load('tests/data/tools') 276 | 277 | wfmessage = wf.add_input(wfmessage='string') 278 | echoed = wf.echo(message=wfmessage) 279 | wced = wf.wc(file2count=echoed) 280 | wf.add_outputs(wfcount=wced) 281 | 282 | wf_filename = tmpdir.join('echo-wc.cwl').strpath 283 | wf.save(wf_filename, mode='rel', validate=False) 284 | 285 | with open(wf_filename) as f: 286 | shebang = f.readline() 287 | 288 | assert shebang == '#!/usr/bin/env cwl-runner\n' 289 | 290 | def test_detect_wrong_type(self): 291 | wf = WorkflowGenerator() 292 | wf.load('tests/data/tools') 293 | x = wf.add_input(msg='string') 294 | x = 3 295 | with pytest.raises(ValueError): 296 | wf.echo(message=x) 297 | 298 | 299 | class TestPrintWorkflowGenerator(object): 300 | def test_print_wf_absolute_paths(self, tmpdir): 301 | wf = setup_workflowgenerator(tmpdir) 302 | wf.load(steps_dir=tmpdir.join('tools').strpath) 303 | 304 | wf.set_documentation('Counts words of a message via echo and wc') 305 | 306 | wfmessage = wf.add_input(wfmessage='string') 307 | echoed = wf.echo(message=wfmessage) 308 | wced = wf.wc(file2count=echoed) 309 | wf.add_outputs(wfcount=wced) 310 | 311 | actual = wf.__str__() 312 | 313 | # make workflows contents relative to tests/data/tools directory 314 | actual = yaml.safe_load(actual) 315 | 316 | def fix_path(path): 317 | res = path.rsplit(os.sep, 2) 318 | res[0] = '..' 319 | return (os.sep).join(res) 320 | 321 | actual['steps']['echo']['run'] = \ 322 | fix_path(actual['steps']['echo']['run']) 323 | actual['steps']['wc']['run'] = fix_path(actual['steps']['wc']['run']) 324 | 325 | expected_wf_filename = 'tests/data/workflows/echo-wc.cwl' 326 | expected = load_yaml(expected_wf_filename) 327 | 328 | print(' actual:', actual) 329 | print('expected:', expected) 330 | assert actual == expected 331 | 332 | 333 | class TestWorkflowGeneratorWithScatteredStep(object): 334 | def test_scatter_method_incorrect(self): 335 | wf = WorkflowGenerator() 336 | wf.load('tests/data/tools') 337 | 338 | msgs = wf.add_input(wfmessages='string[]') 339 | 340 | wf.validate() 341 | 342 | with pytest.raises(ValueError): 343 | wf.echo(message=msgs, scatter='message', scatter_method='blah') 344 | 345 | def test_scatter_method_correct(self): 346 | scatter_methods = [ 347 | 'dotproduct', 'nested_crossproduct', 'flat_crossproduct' 348 | ] 349 | 350 | for method in scatter_methods: 351 | wf = WorkflowGenerator() 352 | wf.load('tests/data/tools') 353 | 354 | msgs = wf.add_input(wfmessages='string[]') 355 | 356 | echoed = wf.echo( 357 | message=msgs, scatter='message', scatter_method=method) 358 | 359 | wf.validate() 360 | 361 | assert echoed.step_name == 'echo' 362 | assert echoed.output_name == 'echoed' 363 | 364 | def test_scatter_variable_incorrect(self): 365 | wf = WorkflowGenerator() 366 | wf.load('tests/data/tools') 367 | 368 | msgs = wf.add_input(wfmessages='string[]') 369 | 370 | with pytest.raises(ValueError): 371 | wf.echo( 372 | message=msgs, 373 | scatter='incorrect', 374 | scatter_method='nested_crossproduct') 375 | 376 | def test_scatter_variable_correct(self): 377 | scatter_methods = [ 378 | 'dotproduct', 'nested_crossproduct', 'flat_crossproduct' 379 | ] 380 | 381 | for method in scatter_methods: 382 | wf = WorkflowGenerator() 383 | wf.load('tests/data/tools') 384 | 385 | msgs = wf.add_input(wfmessages='string[]') 386 | 387 | echoed = wf.echo( 388 | message=msgs, scatter='message', scatter_method=method) 389 | 390 | wf.validate() 391 | 392 | assert echoed.step_name == 'echo' 393 | assert echoed.output_name == 'echoed' 394 | 395 | def test_missing_scatter_argument(self): 396 | wf = WorkflowGenerator() 397 | wf.load('tests/data/tools') 398 | 399 | msgs = wf.add_input(wfmessages='string[]') 400 | 401 | with pytest.raises(ValueError): 402 | wf.echo(message=msgs, scatter_method='nested_crossproduct') 403 | 404 | def test_missing_scatter_method_argument(self): 405 | wf = WorkflowGenerator() 406 | wf.load('tests/data/tools') 407 | wf.load('tests/data/misc') 408 | 409 | msgs = wf.add_input(wfmessages='string[]') 410 | 411 | with pytest.raises(ValueError): 412 | wf.echo3(msg1=msgs, msg2=msgs, scatter=['msg1', 'msg2']) 413 | 414 | 415 | class TestWorkflowGeneratorTypeChecking(object): 416 | def test_step_with_compatible_input(self): 417 | wf = WorkflowGenerator() 418 | wf.load('tests/data/tools') 419 | 420 | wfmessage = wf.add_input(wfmessage='string') 421 | echoed = wf.echo(message=wfmessage) 422 | 423 | wf.validate() 424 | 425 | def test_step_with_incompatible_input(self): 426 | wf = WorkflowGenerator() 427 | wf.load('tests/data/tools') 428 | 429 | wfmessage = wf.add_input(wfmessage='string') 430 | with pytest.raises(ValueError): 431 | wced = wf.wc(file2count=wfmessage) 432 | 433 | def test_step_with_scattered_input(self): 434 | wf = WorkflowGenerator() 435 | wf.load('tests/data/tools') 436 | 437 | msgs = wf.add_input(wfmessages='string[]') 438 | wf.echo(message=msgs, scatter='message', scatter_method='dotproduct') 439 | 440 | wf.validate() 441 | 442 | def test_step_with_scattered_input_no_scatter_method(self): 443 | wf = WorkflowGenerator() 444 | wf.load('tests/data/tools') 445 | 446 | msgs = wf.add_input(wfmessages='string[]') 447 | wf.echo(message=msgs, scatter='message') 448 | 449 | wf.validate() 450 | 451 | def test_step_with_compatible_step_output(self): 452 | wf = WorkflowGenerator() 453 | wf.load('tests/data/tools') 454 | 455 | wfmessage = wf.add_input(wfmessage='string') 456 | echoed = wf.echo(message=wfmessage) 457 | wced = wf.wc(file2count=echoed) 458 | 459 | wf.validate() 460 | 461 | def test_step_with_incompatible_step_output(self): 462 | wf = WorkflowGenerator() 463 | wf.load('tests/data/tools') 464 | 465 | infile = wf.add_input(infile='File') 466 | wced = wf.wc(file2count=infile) 467 | 468 | wf.validate() 469 | 470 | with pytest.raises(ValueError): 471 | echoed = wf.echo(message=wced) 472 | 473 | def test_step_with_scattered_step_output(self): 474 | wf = WorkflowGenerator() 475 | wf.load('tests/data/tools') 476 | 477 | msgs = wf.add_input(msgs='string[]') 478 | echoed = wf.echo(message=msgs, scatter='message', 479 | scatter_method='dotproduct') 480 | wced = wf.wc(file2count=echoed, scatter='file2count', 481 | scatter_method='dotproduct') 482 | wf.validate() 483 | 484 | def test_scattered_step_with_scalar_input(self): 485 | wf = WorkflowGenerator() 486 | wf.load('tests/data/tools') 487 | 488 | wfmessage = wf.add_input(message='string') 489 | with pytest.raises(ValueError): 490 | echoed = wf.echo(message=wfmessage, scatter='message', 491 | scatter_method='dotproduct') 492 | 493 | def test_optional_type(self): 494 | wf = WorkflowGenerator() 495 | wf.load('tests/data/tools') 496 | 497 | # This could work, if you pass a string for input, even if 498 | # the echo step requires an input. So we expect it to work. 499 | wfmessage = wf.add_input(message='string?') 500 | echod = wf.echo(message=wfmessage) 501 | 502 | wf.validate() 503 | 504 | def test_required_to_optional(self): 505 | wf = WorkflowGenerator() 506 | wf.load('tests/data/tools') 507 | 508 | # out_dir is optional, attaching to non-optional input 509 | # should work. 510 | wf_infiles = wf.add_input(in_files='File[]') 511 | wf_outdir = wf.add_input(out_dir='string') 512 | wf_counselors = wf.add_input(counselors='string[]') 513 | out_files, meta_out = wf.multiple_out_args( 514 | in_files=wf_infiles, out_dir=wf_outdir, 515 | counselors=wf_counselors) 516 | 517 | wf.validate() 518 | 519 | def test_optional_to_optional_type(self): 520 | wf = WorkflowGenerator() 521 | wf.load('tests/data/tools') 522 | 523 | wf_infiles = wf.add_input(in_files='File[]') 524 | wf_outdir = wf.add_input(out_dir='string?') 525 | wf_counselors = wf.add_input(counselors='string[]') 526 | out_files, meta_out = wf.multiple_out_args( 527 | in_files=wf_infiles, out_dir=wf_outdir, 528 | counselors=wf_counselors) 529 | 530 | wf.validate() 531 | 532 | 533 | class TestWorkflowGeneratorWithStepsAddedMultipleTimes(object): 534 | def test_generate_step_name(self): 535 | wf = WorkflowGenerator() 536 | wf.load('tests/data/tools') 537 | 538 | wfmessage = wf.add_input(wfmessage='string') 539 | 540 | name = wf._generate_step_name('echo') 541 | echoed = wf.echo(message=wfmessage) 542 | 543 | assert name == 'echo' 544 | assert name == echoed.step_name 545 | 546 | name = wf._generate_step_name('echo') 547 | echoed2 = wf.echo(message=wfmessage) 548 | 549 | assert name != 'echo' 550 | assert name == echoed2.step_name 551 | 552 | wf.validate() 553 | 554 | 555 | class TestWorkflowGeneratorWithDefaultValuesForInputParameters(object): 556 | def test_default_value_for_workflow_input(self): 557 | wf = WorkflowGenerator() 558 | 559 | wf.add_input(input1='string', default='test') 560 | obj = wf.to_obj()['inputs']['input1'] 561 | print(wf) 562 | assert obj['type'] == 'string' 563 | assert obj['default'] == 'test' 564 | 565 | def test_only_default_for_workflow_input(self): 566 | wf = WorkflowGenerator() 567 | 568 | with pytest.raises(ValueError): 569 | wf.add_input(default='test') 570 | 571 | def test_add_multiple_inputs_and_default(self): 572 | wf = WorkflowGenerator() 573 | 574 | with pytest.raises(ValueError): 575 | wf.add_input(input1='string', input2='string', default='test') 576 | 577 | 578 | class TestWorkflowGeneratorWithLabelsForInputParameters(object): 579 | def test_label_for_workflow_input(self): 580 | wf = WorkflowGenerator() 581 | 582 | wf.add_input(input1='string', label='test label') 583 | 584 | wf.validate() 585 | 586 | obj = wf.to_obj()['inputs']['input1'] 587 | assert obj['type'] == 'string' 588 | assert obj['label'] == 'test label' 589 | 590 | def test_only_label_for_workflow_input(self): 591 | wf = WorkflowGenerator() 592 | 593 | with pytest.raises(ValueError): 594 | wf.add_input(label='test') 595 | 596 | def test_only_label_and_default_for_workflow_input(self): 597 | wf = WorkflowGenerator() 598 | 599 | with pytest.raises(ValueError): 600 | wf.add_input(label='test', default='test') 601 | 602 | 603 | class TestWorkflowGeneratorWithEnumAsInputParameter(object): 604 | def test_enum_as_workflow_input(self): 605 | wf = WorkflowGenerator() 606 | 607 | wf.add_input(input1='enum', symbols=['one', 'two', 'three']) 608 | 609 | wf.validate() 610 | 611 | obj = wf.to_obj()['inputs']['input1'] 612 | assert obj['type']['type'] == 'enum' 613 | assert obj['type']['symbols'] == ['one', 'two', 'three'] 614 | 615 | def test_no_symbols_for_enum_input(self): 616 | wf = WorkflowGenerator() 617 | 618 | with pytest.raises(ValueError): 619 | wf.add_input(input1='enum') 620 | 621 | def test_only_symbols_for_enum_input(self): 622 | wf = WorkflowGenerator() 623 | 624 | with pytest.raises(ValueError): 625 | wf.add_input(symbols=['one', 'two', 'three']) 626 | 627 | def test_empty_symbols_for_enum_input(self): 628 | wf = WorkflowGenerator() 629 | 630 | with pytest.raises(ValueError): 631 | wf.add_input(input1='enum', symbols=[]) 632 | 633 | def test_symbols_is_a_list(self): 634 | wf = WorkflowGenerator() 635 | 636 | with pytest.raises(ValueError): 637 | wf.add_input(input1='enum', symbols='nolist') 638 | 639 | def test_convert_symbols_to_list_of_strings(self): 640 | wf = WorkflowGenerator() 641 | 642 | wf.add_input(input1='enum', symbols=[1, 2, 3]) 643 | obj = wf.to_obj()['inputs']['input1'] 644 | 645 | assert obj['type']['symbols'] == ['1', '2', '3'] 646 | 647 | def test_combine_enum_with_label(self): 648 | wf = WorkflowGenerator() 649 | 650 | wf.add_input(input1='enum', symbols=['one', 'two', 'three'], 651 | label='test label') 652 | obj = wf.to_obj()['inputs']['input1'] 653 | assert obj['label'] == 'test label' 654 | 655 | 656 | class TestWorkflowGeneratorAsContextManager(object): 657 | def test_use_workflow_generator_as_context_manager(self): 658 | with WorkflowGenerator() as wf: 659 | assert wf._wf_closed is False 660 | assert wf._wf_closed is True 661 | 662 | def test_error_on_using_closed_workflow_generator(self): 663 | with WorkflowGenerator() as wf: 664 | pass 665 | with pytest.raises(ValueError): 666 | wf._closed() 667 | 668 | 669 | class TestNamingWorkflowInputs(object): 670 | def test_wf_inputs_with_the_same_name(self): 671 | with WorkflowGenerator() as wf: 672 | wf.add_input(msg='string') 673 | with pytest.raises(ValueError): 674 | wf.add_input(msg='string') 675 | 676 | def test_wf_inputs_with_the_same_name_default_value(self): 677 | with WorkflowGenerator() as wf: 678 | wf.add_input(msg='string', default='Hello World!') 679 | with pytest.raises(ValueError): 680 | wf.add_input(msg='string', default='Hello World!') 681 | 682 | 683 | class TestWorkflowLabels(object): 684 | def test_set_label(self): 685 | with WorkflowGenerator() as wf: 686 | wf.set_label('test') 687 | 688 | obj = wf.to_obj() 689 | assert obj['label'] == 'test' 690 | 691 | 692 | class TestWorkflowStepsWithSpecialFileNames(object): 693 | def test_add_step_with_underscores(self, tmpdir): 694 | wf = setup_workflowgenerator(tmpdir) 695 | step_file = tmpdir.join('file-names/echo_with_underscores.cwl').strpath 696 | wf.load(step_file=step_file) 697 | msg = wf.add_input(msg='string') 698 | wf.echo_with_underscores(message=msg) 699 | 700 | wf.validate() 701 | 702 | def test_add_step_with_minuses(self, tmpdir): 703 | wf = setup_workflowgenerator(tmpdir) 704 | step_file = tmpdir.join('file-names/echo-with-minuses.cwl').strpath 705 | wf.load(step_file=step_file) 706 | msg = wf.add_input(msg='string') 707 | wf.echo_with_minuses(message=msg) 708 | 709 | wf.validate() 710 | 711 | def test_add_step_with_minuses_and_underscores(self, tmpdir): 712 | wf = setup_workflowgenerator(tmpdir) 713 | sf = tmpdir.join('file-names/echo-with-minuses_and_underscores.cwl') 714 | step_file = sf.strpath 715 | wf.load(step_file=step_file) 716 | msg = wf.add_input(msg='string') 717 | wf.echo_with_minuses_and_underscores(message=msg) 718 | 719 | wf.validate() 720 | 721 | def test_load_step_with_duplicate_python_name(self, tmpdir): 722 | wf = setup_workflowgenerator(tmpdir) 723 | with pytest.warns(UserWarning): 724 | wf.load(steps_dir=tmpdir.join('file-names').strpath) 725 | 726 | 727 | class TestWorkflowStepsListOfInputsFromWorkflowInputsOrStepOutputs(object): 728 | def test_add_step_with_list_of_inputs(self, tmpdir): 729 | wf = setup_workflowgenerator(tmpdir) 730 | step_file = tmpdir.join('misc/echo2.cwl').strpath 731 | wf.load(step_file=step_file) 732 | 733 | str1 = wf.add_input(str1='string') 734 | str2 = wf.add_input(str2='string') 735 | 736 | wf.echo2(message=[str1, str2]) 737 | 738 | wf.validate() 739 | 740 | assert wf.has_multiple_inputs 741 | assert wf._has_requirements() 742 | requirements = wf.to_obj()['requirements'] 743 | assert {'class': 'MultipleInputFeatureRequirement'} in requirements 744 | 745 | def test_add_step_with_list_of_inputs_unequal_types(self, tmpdir): 746 | wf = setup_workflowgenerator(tmpdir) 747 | step_file = tmpdir.join('misc/echo2.cwl').strpath 748 | wf.load(step_file=step_file) 749 | 750 | str1 = wf.add_input(str1='string') 751 | str2 = wf.add_input(str2='int') 752 | 753 | with pytest.raises(ValueError): 754 | wf.echo2(message=[str1, str2]) 755 | 756 | def test_add_step_with_list_of_inputs_wrong_type(self, tmpdir): 757 | wf = setup_workflowgenerator(tmpdir) 758 | step_file = tmpdir.join('misc/echo2.cwl').strpath 759 | wf.load(step_file=step_file) 760 | 761 | str1 = wf.add_input(str1='int') 762 | str2 = wf.add_input(str2='int') 763 | 764 | with pytest.raises(ValueError): 765 | wf.echo2(message=[str1, str2]) 766 | 767 | 768 | class TestWorkflowWithNonPythonStepInputAndOutputNames(object): 769 | def test_add_step_with_non_python_input_and_output_names(self, tmpdir): 770 | wf = setup_workflowgenerator(tmpdir) 771 | 772 | step_file = tmpdir.join('misc/non-python-names.cwl').strpath 773 | wf.load(step_file=step_file) 774 | 775 | msg1 = wf.add_input(msg1='string') 776 | msg2 = wf.add_input(msg2='string?') 777 | 778 | echo_out = wf.non_python_names(first_message=msg1, 779 | optional_message=msg2) 780 | 781 | wf.add_outputs(out=echo_out) 782 | 783 | wf.validate() 784 | 785 | def test_type_checking_with_non_python_input_name(self, tmpdir): 786 | wf = setup_workflowgenerator(tmpdir) 787 | 788 | step_file = tmpdir.join('misc/non-python-names.cwl').strpath 789 | wf.load(step_file=step_file) 790 | 791 | msg1 = wf.add_input(msg1='int') 792 | msg2 = wf.add_input(msg2='string?') 793 | 794 | with pytest.raises(ValueError): 795 | wf.non_python_names(first_message=msg1, 796 | optional_message=msg2) 797 | 798 | 799 | class TestArraysAndOtherComplexWFInputTypes(object): 800 | def test_array_bracket_notation(self, tmpdir): 801 | wf = setup_workflowgenerator(tmpdir) 802 | 803 | wf.add_input(arr='int[]') 804 | 805 | wf.validate() 806 | 807 | def test_array_dictionary(self, tmpdir): 808 | wf = setup_workflowgenerator(tmpdir) 809 | 810 | wf.add_input(arr=dict(type='array', items='int')) 811 | 812 | wf.validate() 813 | 814 | def test_array_of_arrays_of_strings(self, tmpdir): 815 | wf = setup_workflowgenerator(tmpdir) 816 | 817 | complex_input = dict(type='array', items=dict(type='array', 818 | items='string')) 819 | wf.add_input(my_array_of_array_of_strings=complex_input) 820 | 821 | wf.validate() 822 | 823 | def test_array_dictionary_with_additional_type_declaration(self, tmpdir): 824 | wf = setup_workflowgenerator(tmpdir) 825 | 826 | wf.add_input(arr=dict(type=dict(type='array', items='int'))) 827 | 828 | with pytest.raises(ValidationException): 829 | wf.validate() 830 | -------------------------------------------------------------------------------- /tests/test_yamlutils.py: -------------------------------------------------------------------------------- 1 | from scriptcwl.yamlutils import is_multiline 2 | from scriptcwl import WorkflowGenerator 3 | 4 | import os 5 | 6 | 7 | def test_is_multiline(): 8 | assert not is_multiline('single line string') 9 | assert is_multiline('multi\nline\nstring') 10 | 11 | 12 | def test_multiline_output(tmpdir): 13 | wf = WorkflowGenerator() 14 | wf.set_documentation('Testing a multiline\ndocumentation string') 15 | tmpfile = os.path.join(str(tmpdir), 'test.cwl') 16 | wf.save(tmpfile, mode='abs') 17 | with open(tmpfile) as f: 18 | contents = f.readlines() 19 | assert len(contents) > 7 20 | --------------------------------------------------------------------------------