├── .coveragerc ├── .coveralls ├── .editorconfig ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── conf.py ├── extending.md ├── getting_started.md ├── index.rst ├── make.bat ├── pipeline.gif ├── reporting.md ├── scripting.md ├── syntax.md └── what_is_metapipe.md ├── metapipe ├── __init__.py ├── __main__.py ├── app.py ├── models │ ├── __init__.py │ ├── command.py │ ├── command_template.py │ ├── command_template_factory.py │ ├── grammar.py │ ├── job.py │ ├── job_template.py │ ├── local_job.py │ ├── pbs_job.py │ ├── queue.py │ ├── reporting.py │ ├── sge_job.py │ └── tokens.py ├── parser.py ├── runtime.py └── templates │ ├── __init__.py │ ├── output_script.tmpl.sh │ └── progress-report.tmpl.html ├── requirements.txt ├── setup.py └── test ├── __init__.py ├── files ├── mp.1.1-1.output ├── mp.1.1.job ├── mp.1.1.job_stderr ├── mp.1.1.job_stdout ├── mp.1.1.output ├── mp.1.1.output.gz ├── mp.1.1.output.testing_file ├── mp.1.2.job ├── mp.1.2.job_stderr ├── mp.1.2.job_stdout ├── mp.1.2.output ├── mp.1.2.output.testing_file ├── mp.2.1.job ├── mp.2.1.job_stderr ├── mp.2.1.job_stdout ├── mp.2.1.output ├── mp.2.2.output ├── mp.3.1.output ├── mp.3.2.output ├── mp.3.3.output ├── somefile.1 ├── somefile.1.bam ├── somefile.1.counts ├── somefile.2 ├── somefile.2.bam ├── somefile.2.counts ├── somefile.3 ├── somefile.3.counts ├── somefile.4 ├── somefile.4.counts ├── somefile.5 ├── somefile.6 ├── somefile.bam └── star.my_output ├── fixtures.py ├── mocks.py ├── test_app.py ├── test_command.py ├── test_command_template.py ├── test_command_template_factory.py ├── test_grammar.py ├── test_job.py ├── test_local_job.py ├── test_parser.py ├── test_pbs_job.py ├── test_queue.py ├── test_runtime.py ├── test_sge_job.py ├── test_template.py └── test_tokens.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = tests/* 3 | -------------------------------------------------------------------------------- /.coveralls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/.coveralls -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | 11 | [*.{js,py}] 12 | charset = utf-8 13 | indent_size = space 14 | indent_size = 4 15 | 16 | [{package.json,.travis.yml}] 17 | indent_style = space 18 | indent_size = 2 19 | 20 | [*.html] 21 | indent_size = 2 22 | indent_style = space 23 | 24 | [*.css] 25 | indent_size = 2 26 | indent_style = space 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | etc/ 2 | docs/_build 3 | cover/ 4 | .coverage 5 | *cache* 6 | *egg* 7 | build/ 8 | dist/ 9 | MANIFEST 10 | pipeline 11 | .metapipe 12 | **.mp 13 | **sample** 14 | *.sh 15 | notes/ 16 | *.pyc 17 | *.swp 18 | htmlcov/ 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '2.7' 4 | - '3.4' 5 | - '3.5' 6 | before_script: 7 | - pip install -r requirements.txt 8 | script: nosetests --with-cov --cov-report html --cov-config .coveragerc --cov metapipe 9 | --cov test 10 | after_success: 11 | - coveralls 12 | deploy: 13 | provider: pypi 14 | user: sonicrocketman 15 | password: 16 | secure: cfDJKNv1BMDsJ3NyOpjwVQwBwO3ZcDMVnEDmcNoS4bwONo/pF7+UFbNm/4+AG2Oo9W5u63YNoR/b1MajbaLd9gBCf7uymrOnLxVtFwq1JDb5BWOsegJwPtlxrKxjKjsBAp5BY7cqOivAWEJZuCi6XjNCyG+QSt0vXKqw4U2xqjfx3KtfSea1Hu0aN1YvFn1otod9faXPK80T/4AZ1Ytmauq12vzla1bLJz7djYS2ApBM+pEJodOhw9V53CknrBpm9SfgFRs5xOkKB7FY8Tq208AxDvcufxkwUsqzoipOzfGcBFhVQdREOOLbWUKmExufCHhyXWVmp7yrkLwGX4REWI+unq6SFU61mknVizLfphJ0DSLGWzcoPxwP3vk39q3PHP1XKojEkCicIb5C6r3YJqYpFtF83YjuEmJXew+9GdP1KWyWS5G1xslhxZvklAdkSPsn65GmABFsSNrMLyVdCTllGpgnjrpcbf1jEMP8MTp6+qc8YVjdEDtzgeJ8aoSyC6K9dRg95qixb1COqzTrF0N4LDKRGKKJrHFg+JXUZDSPYdpju5oz1ohm3/96SmdYGqL+ilO1RT3gxhlFV1X30AymAGUcVKCCLpj9dauQALeA16sKvtcFYVxjunjzJwz+OzM7AYlvFd+ak618x4btnmybsT0Nc93enT9seI+LSlE= 17 | on: 18 | tags: true 19 | distributions: sdist bdist_wheel 20 | branch: master 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include metapipe/templates/* 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Metapipe 2 | 3 | *A pipeline generator and runtime system* 4 | 5 | [![Build Status](https://travis-ci.org/TorkamaniLab/metapipe.svg)](https://travis-ci.org/TorkamaniLab/metapipe) 6 | [![Coverage Status](https://coveralls.io/repos/github/TorkamaniLab/metapipe/badge.svg?branch=master)](https://coveralls.io/github/TorkamaniLab/metapipe?branch=master) 7 | [![Python 2.7 Status](https://img.shields.io/badge/Python-2.7-brightgreen.svg)](https://img.shields.io/badge/Python-2.7-blue.svg) 8 | [![Python 3.4 Status](https://img.shields.io/badge/Python-3.4-brightgreen.svg)](https://img.shields.io/badge/Python-3.4-blue.svg) 9 | [![Python 3.5 Status](https://img.shields.io/badge/Python-3.5-brightgreen.svg)](https://img.shields.io/badge/Python-3.5-blue.svg) 10 | [![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/TorkamaniLab/metapipe/blob/master/LICENSE) 11 | 12 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you. 13 | 14 | 15 | 16 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines. 17 | 18 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue! 19 | 20 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem) 21 | 22 | 23 | ## How do I get it? 24 | 25 | It's super simple! 26 | 27 | `pip install metapipe` 28 | 29 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5! 30 | 31 | 32 | ## What does it do? 33 | 34 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change! 35 | 36 | 37 | ## Documentation & Help 38 | 39 | [Check out the full documentation at ReadTheDocs →](http://metapipe.readthedocs.org/en/latest/index.html) 40 | 41 | If you need help with Metapipe, or you'd like to chat about new features, get in touch by filing an issue, or at `#metapipe` on freenode! 42 | 43 | 44 | ### Here's a sample! 45 | 46 | Let's say you have a few command-line tools that you want to string together into a pipeline. You used to have to know Python, Perl, Bash, or some other scripting language; now you can use Metapipe! 47 | 48 | ```bash 49 | [COMMANDS] 50 | # Let's get the first and third columns from each of 51 | # our files, and put the output in seperate files. 52 | cut -f 1,3 {1||2||3} > {o} 53 | 54 | # Once that's done, we'll need to take the output and 55 | # run each through our custom processing script individually. 56 | # Here we can give a custom extension to the default output file. 57 | python3 my_script.py --output {o.processed.csv} -i {1.*||} 58 | 59 | # Finally, we want to collect each sample and analyze 60 | # them all together. We also need to use a custom version 61 | # of Python for this. 62 | custom_python anaylysis.py -o {o.results.txt} {2.*} 63 | 64 | [FILES] 65 | 1. controls.1.csv 66 | 2. controls.2.csv 67 | 3. controls.3.csv 68 | 69 | [PATHS] 70 | custom_python ~/path/to/my/custom/python/version 71 | ``` 72 | 73 | Excluding the comments, this entire analysis pipeline is 13 lines long, and extremely readable! What's even better? If you want to change any steps, its super easy! That's the power of Metapipe! 74 | 75 | 76 | ## No Queue? No Problem! 77 | 78 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail. 79 | 80 | To run metapipe locally, see the app's help menu! 81 | 82 | `metapipe --help` 83 | 84 | 85 | ## Other Queue Systems 86 | 87 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!* 88 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/metapipe.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/metapipe.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/metapipe" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/metapipe" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # metapipe documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Jan 25 16:10:38 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | from recommonmark.parser import CommonMarkParser 20 | 21 | source_parsers = { 22 | '.md': CommonMarkParser, 23 | } 24 | 25 | # Read the Docs theme 26 | 27 | # on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org 28 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 29 | 30 | if not on_rtd: # only import and set the theme if we're building docs locally 31 | import sphinx_rtd_theme 32 | html_theme = 'sphinx_rtd_theme' 33 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 34 | # otherwise, readthedocs.org uses their theme by default, so no need to specify it 35 | 36 | # If extensions (or modules to document with autodoc) are in another directory, 37 | # add these directories to sys.path here. If the directory is relative to the 38 | # documentation root, use os.path.abspath to make it absolute, like shown here. 39 | #sys.path.insert(0, os.path.abspath('.')) 40 | 41 | # -- General configuration ------------------------------------------------ 42 | 43 | # If your documentation needs a minimal Sphinx version, state it here. 44 | #needs_sphinx = '1.0' 45 | 46 | # Add any Sphinx extension module names here, as strings. They can be 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 48 | # ones. 49 | extensions = [] 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | #templates_path = ['_templates'] 53 | 54 | # The suffix(es) of source filenames. 55 | # You can specify multiple suffix as a list of string: 56 | # source_suffix = ['.rst', '.md'] 57 | source_suffix = ['.rst', '.md'] 58 | 59 | # The encoding of source files. 60 | #source_encoding = 'utf-8-sig' 61 | 62 | # The master toctree document. 63 | master_doc = 'index' 64 | 65 | # General information about the project. 66 | project = 'metapipe' 67 | copyright = '2016, Brian Schrader' 68 | author = 'Brian Schrader' 69 | 70 | # The version info for the project you're documenting, acts as replacement for 71 | # |version| and |release|, also used in various other places throughout the 72 | # built documents. 73 | # 74 | # The short X.Y version. 75 | version = '0.1' 76 | # The full version, including alpha/beta/rc tags. 77 | release = '0.1' 78 | 79 | # The language for content autogenerated by Sphinx. Refer to documentation 80 | # for a list of supported languages. 81 | # 82 | # This is also used if you do content translation via gettext catalogs. 83 | # Usually you set "language" from the command line for these cases. 84 | language = None 85 | 86 | # There are two options for replacing |today|: either, you set today to some 87 | # non-false value, then it is used: 88 | #today = '' 89 | # Else, today_fmt is used as the format for a strftime call. 90 | #today_fmt = '%B %d, %Y' 91 | 92 | # List of patterns, relative to source directory, that match files and 93 | # directories to ignore when looking for source files. 94 | exclude_patterns = ['_build'] 95 | 96 | # The reST default role (used for this markup: `text`) to use for all 97 | # documents. 98 | #default_role = None 99 | 100 | # If true, '()' will be appended to :func: etc. cross-reference text. 101 | #add_function_parentheses = True 102 | 103 | # If true, the current module name will be prepended to all description 104 | # unit titles (such as .. function::). 105 | #add_module_names = True 106 | 107 | # If true, sectionauthor and moduleauthor directives will be shown in the 108 | # output. They are ignored by default. 109 | #show_authors = False 110 | 111 | # The name of the Pygments (syntax highlighting) style to use. 112 | pygments_style = 'sphinx' 113 | 114 | # A list of ignored prefixes for module index sorting. 115 | #modindex_common_prefix = [] 116 | 117 | # If true, keep warnings as "system message" paragraphs in the built documents. 118 | #keep_warnings = False 119 | 120 | # If true, `todo` and `todoList` produce output, else they produce nothing. 121 | todo_include_todos = False 122 | 123 | 124 | # -- Options for HTML output ---------------------------------------------- 125 | 126 | # The theme to use for HTML and HTML Help pages. See the documentation for 127 | # a list of builtin themes. 128 | #html_theme = 'alabaster' 129 | 130 | # Theme options are theme-specific and customize the look and feel of a theme 131 | # further. For a list of options available for each theme, see the 132 | # documentation. 133 | #html_theme_options = {} 134 | 135 | # Add any paths that contain custom themes here, relative to this directory. 136 | #html_theme_path = [] 137 | 138 | # The name for this set of Sphinx documents. If None, it defaults to 139 | # " v documentation". 140 | #html_title = None 141 | 142 | # A shorter title for the navigation bar. Default is the same as html_title. 143 | #html_short_title = None 144 | 145 | # The name of an image file (relative to this directory) to place at the top 146 | # of the sidebar. 147 | #html_logo = None 148 | 149 | # The name of an image file (within the static path) to use as favicon of the 150 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 151 | # pixels large. 152 | #html_favicon = None 153 | 154 | # Add any paths that contain custom static files (such as style sheets) here, 155 | # relative to this directory. They are copied after the builtin static files, 156 | # so a file named "default.css" will overwrite the builtin "default.css". 157 | html_static_path = ['_static'] 158 | 159 | # Add any extra paths that contain custom files (such as robots.txt or 160 | # .htaccess) here, relative to this directory. These files are copied 161 | # directly to the root of the documentation. 162 | #html_extra_path = [] 163 | 164 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 165 | # using the given strftime format. 166 | #html_last_updated_fmt = '%b %d, %Y' 167 | 168 | # If true, SmartyPants will be used to convert quotes and dashes to 169 | # typographically correct entities. 170 | #html_use_smartypants = True 171 | 172 | # Custom sidebar templates, maps document names to template names. 173 | #html_sidebars = {} 174 | 175 | # Additional templates that should be rendered to pages, maps page names to 176 | # template names. 177 | #html_additional_pages = {} 178 | 179 | # If false, no module index is generated. 180 | #html_domain_indices = True 181 | 182 | # If false, no index is generated. 183 | #html_use_index = True 184 | 185 | # If true, the index is split into individual pages for each letter. 186 | #html_split_index = False 187 | 188 | # If true, links to the reST sources are added to the pages. 189 | #html_show_sourcelink = True 190 | 191 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 192 | #html_show_sphinx = True 193 | 194 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 195 | #html_show_copyright = True 196 | 197 | # If true, an OpenSearch description file will be output, and all pages will 198 | # contain a tag referring to it. The value of this option must be the 199 | # base URL from which the finished HTML is served. 200 | #html_use_opensearch = '' 201 | 202 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 203 | #html_file_suffix = None 204 | 205 | # Language to be used for generating the HTML full-text search index. 206 | # Sphinx supports the following languages: 207 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 208 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 209 | #html_search_language = 'en' 210 | 211 | # A dictionary with options for the search language support, empty by default. 212 | # Now only 'ja' uses this config value 213 | #html_search_options = {'type': 'default'} 214 | 215 | # The name of a javascript file (relative to the configuration directory) that 216 | # implements a search results scorer. If empty, the default will be used. 217 | #html_search_scorer = 'scorer.js' 218 | 219 | # Output file base name for HTML help builder. 220 | htmlhelp_basename = 'metapipedoc' 221 | 222 | # -- Options for LaTeX output --------------------------------------------- 223 | 224 | latex_elements = { 225 | # The paper size ('letterpaper' or 'a4paper'). 226 | #'papersize': 'letterpaper', 227 | 228 | # The font size ('10pt', '11pt' or '12pt'). 229 | #'pointsize': '10pt', 230 | 231 | # Additional stuff for the LaTeX preamble. 232 | #'preamble': '', 233 | 234 | # Latex figure (float) alignment 235 | #'figure_align': 'htbp', 236 | } 237 | 238 | # Grouping the document tree into LaTeX files. List of tuples 239 | # (source start file, target name, title, 240 | # author, documentclass [howto, manual, or own class]). 241 | latex_documents = [ 242 | (master_doc, 'metapipe.tex', 'metapipe Documentation', 243 | 'Brian Schrader', 'manual'), 244 | ] 245 | 246 | # The name of an image file (relative to this directory) to place at the top of 247 | # the title page. 248 | #latex_logo = None 249 | 250 | # For "manual" documents, if this is true, then toplevel headings are parts, 251 | # not chapters. 252 | #latex_use_parts = False 253 | 254 | # If true, show page references after internal links. 255 | #latex_show_pagerefs = False 256 | 257 | # If true, show URL addresses after external links. 258 | #latex_show_urls = False 259 | 260 | # Documents to append as an appendix to all manuals. 261 | #latex_appendices = [] 262 | 263 | # If false, no module index is generated. 264 | #latex_domain_indices = True 265 | 266 | 267 | # -- Options for manual page output --------------------------------------- 268 | 269 | # One entry per manual page. List of tuples 270 | # (source start file, name, description, authors, manual section). 271 | man_pages = [ 272 | (master_doc, 'metapipe', 'metapipe Documentation', 273 | [author], 1) 274 | ] 275 | 276 | # If true, show URL addresses after external links. 277 | #man_show_urls = False 278 | 279 | 280 | # -- Options for Texinfo output ------------------------------------------- 281 | 282 | # Grouping the document tree into Texinfo files. List of tuples 283 | # (source start file, target name, title, author, 284 | # dir menu entry, description, category) 285 | texinfo_documents = [ 286 | (master_doc, 'metapipe', 'metapipe Documentation', 287 | author, 'metapipe', 'One line description of project.', 288 | 'Miscellaneous'), 289 | ] 290 | 291 | # Documents to append as an appendix to all manuals. 292 | #texinfo_appendices = [] 293 | 294 | # If false, no module index is generated. 295 | #texinfo_domain_indices = True 296 | 297 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 298 | #texinfo_show_urls = 'footnote' 299 | 300 | # If true, do not generate a @detailmenu in the "Top" node's menu. 301 | #texinfo_no_detailmenu = False 302 | -------------------------------------------------------------------------------- /docs/extending.md: -------------------------------------------------------------------------------- 1 | # Extending Metapipe 2 | 3 | Metapipe provides 2 extension points for developers to extend it's functionality: custom Queues and custom Job Types. In most cases, custom queues are an advanced feature that most users and developers will not need to worry about, but if you must, it is there. 4 | 5 | To add support for a queue system not included with metapipe, all you need to do is add a job type. 6 | 7 | 8 | ## Custom Job types 9 | 10 | All job types are subclasses of the `metapipe.models.Job` class. The base job class implements a lot of the functionality that is common between all job types, and has method stubs for the required functionality that needs to be implemented by any subclass. This section will cover what duty job subclasses have, how to subclass the main `Job` and what to fill in. 11 | 12 | 13 | ### The Root Job Class 14 | 15 | The code for the main job class can be found [here][job]. To create your own job type, simply subclass this as follows: 16 | 17 | ```python 18 | from metapipe.models import Job 19 | 20 | class MyCustomJob(Job): 21 | 22 | def __repr__(self): 23 | return ''.format(self.cmd) 24 | ``` 25 | 26 | There are 6 methods you need to fill in to have a complete job class. Your full job subclass should have the following form: 27 | 28 | ```python 29 | class MyCustomJob(Job): 30 | 31 | def __repr__(self): 32 | return ''.format(self.cmd) 33 | 34 | # Override these... 35 | 36 | @property 37 | def cmd(self): 38 | """ Returns the command needed to submit the calculations. 39 | Normally, this would be just running the command, however if 40 | using a queue system, then this should return the command to 41 | submit the command to the queue. 42 | """ 43 | pass 44 | 45 | def submit(self): 46 | """ Submits the job to be run. If an external queue system is used, 47 | this method submits itself to that queue. Else it runs the job itself. 48 | :see: call 49 | """ 50 | pass 51 | 52 | def is_running(self): 53 | """ Returns whether the job is running or not. """ 54 | pass 55 | 56 | def is_queued(self): 57 | """ Returns whether the job is queued or not. 58 | This function is only used if jobs are submitted to an external queue. 59 | """ 60 | pass 61 | 62 | def is_complete(self): 63 | """ Returns whether the job is complete or not. """ 64 | pass 65 | 66 | def is_error(self): 67 | """ Checks to see if the job errored out. """ 68 | pass 69 | ``` 70 | 71 | The duty of the job types is to submit the jobs when asked by the queue, and to inform the queue about the status of jobs. The queue needs to know when a job is running, queued, complete, or when an error has occurred. 72 | 73 | Each of the `is_*` callbacks should return a boolean value, and the cmd property should return the bash command (as an array of strings) that can be called to run the job. The job class has an attribute `filename` that contains the value of the bash script containing the job command (i.e. `['bash', self.filename]`). 74 | 75 | **IMPORTANT:** All of the above handlers are required for custom job types to function properly. 76 | 77 | Here is the code for the `cmd` property of the `PBSJob` class: 78 | 79 | ```python 80 | class PBSJob(Job): 81 | #... 82 | @property 83 | def cmd(self): 84 | return ['qsub', self.filename] 85 | #... 86 | ``` 87 | 88 | The `submit` call should do any logic pertaining to submitting the job or tracking the number of total submissions. For example, here is the code for submitting a job to the PBS queue: 89 | 90 | ```python 91 | class PBSJob(Job): 92 | #... 93 | def submit(self, job): 94 | if self.attempts == 0: 95 | job.make() 96 | self.attempts += 1 97 | out = call(job.cmd) 98 | self.waiting = False 99 | self.id = out[:out.index('.')] 100 | #... 101 | ``` 102 | 103 | As you can see, it keeps track of the number of times the job was submitted, and then calls the `call` function, provided in the root job module, to execute the job. Since PBS assigns job ids to each job at submission-time, it also captures that information and saves it for later use. 104 | 105 | [job]: https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/models/job.py#L20 106 | 107 | 108 | ## Custom Queues 109 | 110 | In the event that your analysis requires more control over the submission process for jobs, the metapipe module also allows for the customization of queue logic by subclassing `metapipe.models.Queue`. This section will cover how to subclass the root queue, but it is left to the reader to determine why you might want to do this. From personal experience, customizing the queue should be a very rare requirement. 111 | 112 | 113 | ### The Root Queue class 114 | 115 | As is the case for custom job types, all queues inherit from the root Queue in `metapipe.models.Queue`, including the main `JobQueue` that is used by the metapipe command line tool. 116 | 117 | To customize the response of the queue to various types of events subclass it and fill in the following methods, all the methods are optional so just omit any handlers that you don't need. 118 | 119 | ```python 120 | class MyCustomQueue(object): 121 | 122 | def __repr__(self): 123 | return '' % len(self.queue) 124 | 125 | # Callbacks... 126 | 127 | def on_start(self): 128 | """ Called when the queue is starting up. """ 129 | pass 130 | 131 | def on_end(self): 132 | """ Called when the queue is shutting down. """ 133 | pass 134 | 135 | def on_locked(self): 136 | """ Called when the queue is locked and no jobs can proceed. 137 | If this callback returns True, then the queue will be restarted, 138 | else it will be terminated. 139 | """ 140 | return True 141 | 142 | def on_tick(self): 143 | """ Called when a tick of the queue is complete. """ 144 | pass 145 | 146 | def on_ready(self, job): 147 | """ Called when a job is ready to be submitted. 148 | :param job: The given job that is ready. 149 | """ 150 | pass 151 | 152 | def on_submit(self, job): 153 | """ Called when a job has been submitted. 154 | :param job: The given job that has been submitted. 155 | """ 156 | pass 157 | 158 | def on_complete(self, job): 159 | """ Called when a job has completed. 160 | :param job: The given job that has completed. 161 | """ 162 | pass 163 | 164 | def on_error(self, job): 165 | """ Called when a job has errored. 166 | :param job: The given job that has errored. 167 | """ 168 | pass 169 | ``` 170 | 171 | 172 | ## Using Your Custom Code 173 | 174 | Once you have subclassed and filled in the required code for your custom job type or queue, it is time to use your code. If your code adapts metapipe to work on a common computing platform, or system then please consider contributing to the metapipe project. This helps the rest of the community use a broader range of hardware to solve our problems! 175 | 176 | 177 | ### Building your custom pipeline 178 | 179 | Use the following code to build your pipeline. This code is taken directly from [metapipe's app.py][app] tool which is the command line tool that metapipe uses to build pipelines. 180 | 181 | ```python 182 | import MyCustomJob 183 | 184 | JOB_TYPES = { 185 | 'my_custom_job_type': MyCustomJob 186 | } 187 | 188 | parser = Parser(config) 189 | try: 190 | command_templates = parser.consume() 191 | except ValueError as e: 192 | raise SyntaxError('Invalid config file. \n%s' % e) 193 | 194 | pipeline = Runtime(command_templates, JOB_TYPES, 'my_custom_job_type') 195 | ``` 196 | 197 | **IMPORTANT:** Adding custom queues is coming soon! 198 | 199 | For more information on how to script metapipe once you have custom jobs, see [Scripting Metapipe](scripting.html) 200 | 201 | 202 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This section contains a quick guide for installing, and using metapipe. For the detailed syntax guide, see the [Metapipe Syntax][syntax] 4 | 5 | [syntax]: syntax.html 6 | 7 | 8 | ## Installation 9 | 10 | Metapipe is available on PyPi so installing is easy. 11 | 12 | ```bash 13 | $ pip install metapipe 14 | ``` 15 | 16 | 17 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5! 18 | 19 | 20 | ## Using metapipe 21 | 22 | By default, metapipe is both a command line tool and a Python module that can be used to build and run pipelines in code. This means that whether you're a user, or a developer Metapipe can be adapted to fit your needs. 23 | 24 | To see metapipe's help menu, type the following, just as you'd expect. 25 | 26 | ```bash 27 | $ metapipe --help 28 | ``` 29 | 30 | ## Sample Pipeline 31 | 32 | Here's a simple pipeline you can use for testing metapipe. Typically complex pipelines are used for things like bioinformatics, or some batch processing 33 | 34 | But first, we need some sample files to work with. Run these commands to generate them. 35 | 36 | ```bash 37 | $ echo "SAMPLE DATA 1" > test_file.1.txt 38 | $ echo "SAMPLE DATA 2" > test_file.2.txt 39 | $ echo "SAMPLE DATA 3" > test_file.3.txt 40 | ``` 41 | 42 | Now that we have our data, let's analyze it! Here's our sample pipeline: 43 | 44 | ```bash 45 | [COMMANDS] 46 | # Remove the ending number from each of our data files. 47 | cut -f 1-2 -d ' ' {1||2||3} > {o} 48 | 49 | # Paste each of the files together and save it to a final output. 50 | # Since this is our last step, and only 1 output there's no need to have 51 | # metapipe name the output file. We'll call it something ourselves. 52 | paste {1.1,1.2,1.3} > final_output.txt 53 | 54 | [FILES] 55 | 1. test_file.1.txt 56 | 2. test_file.2.txt 57 | 3. test_file.3.txt 58 | ``` 59 | 60 | Save that as `sample_pipeline.mp`, open a terminal, and `cd` to that directory. 61 | 62 | 63 | ### Run the sample pipeline locally 64 | 65 | Local execution is the default for metapipe so you just need to specify your metapipe file and an output destination. 66 | 67 | ```bash 68 | $ metapipe -o pipeline.sh sample_pipeline.mp 69 | ``` 70 | 71 | This will generate an output script named `pipeline.sh` which will run the pipeline. Simply run it to start your pipeline! 72 | 73 | ```bash 74 | $ sh pipeline.sh 75 | ``` 76 | 77 | That's it! Metapipe will run in the foreground watching your jobs complete until everything finishes. 78 | 79 | 80 | ### Run the sample pipeline on PBS 81 | 82 | Simply change the metapipe command to the following: 83 | 84 | ```bash 85 | $ metapipe -o pipeline.sh -j pbs sample_pipeline.mp 86 | ``` 87 | 88 | Then simply submit metapipe as a job: 89 | 90 | ```bash 91 | $ qsub pipeline.sh 92 | ``` 93 | 94 | Metapipe will run as a job on the PBS/Torque queue and submit other jobs to the same queue! It will keep tabs on the running jobs and submit them when they're ready, then exit when all jobs finish. 95 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. metapipe documentation master file, created by 2 | sphinx-quickstart on Mon Jan 25 16:10:38 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to metapipe's documentation! 7 | ==================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | what_is_metapipe.md 15 | getting_started.md 16 | syntax.md 17 | scripting.md 18 | extending.md 19 | reporting.md 20 | 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | 29 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\metapipe.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\metapipe.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /docs/pipeline.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/docs/pipeline.gif -------------------------------------------------------------------------------- /docs/reporting.md: -------------------------------------------------------------------------------- 1 | # Measuring Pipeline Progress 2 | 3 | While Metapipe runs your pipeline, it writes updates to `stdout`. These can be helpful, but most times it can be more helpful to get additional information in a more helpful format. 4 | 5 | Metapipe provides a few different methods of visualizing the progress of your pipeline. These options are specified by the `--report-type` option. 6 | 7 | 8 | ## Text based reporting 9 | 10 | ``` 11 | --report-type text 12 | ``` 13 | 14 | This option is the default. Metapipe will write to `stdout` and this can be redirected to a file. 15 | 16 | 17 | ## HTML based reporting 18 | 19 | ``` 20 | --report-type html 21 | ``` 22 | 23 | Using this option, Metapipe will generate an HTML report of the pipeline as it runs. This static report represents the current state of the pipeline and what steps have already been completed. The report also includes a progress bar that reports a visualization of the rough progress of the pipeline. 24 | 25 | **Important:** This progress indicator is based on the number of overall steps to be completed and represents the number of steps remaining. This has no correlation with the amount of time remaining, as that depends on the length of time each step takes. 26 | -------------------------------------------------------------------------------- /docs/scripting.md: -------------------------------------------------------------------------------- 1 | # Scripting Metapipe 2 | 3 | In addition to being a command line tool, metapipe is also a Python module. You can use this module to extend, or script metapipe to fit your specific uses. This section will discuss scripting metapipe, and building/running jobs using Python. For information on how to extend metapipe's builtin job types or queue system, see [Extending Metapipe](extending.html). 4 | 5 | 6 | ## The Run Interface 7 | 8 | The first, and easiest way to script Metapipe is by invoking it via the Python interface. 9 | 10 | 11 | ```python 12 | from metapipe import run 13 | 14 | config_text = get_config_text() 15 | run(config_text) 16 | ``` 17 | 18 | For detailed information, see the [run method's docstring](https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/app.py#L90). 19 | 20 | -------------------------------------------------------------------------------- /docs/syntax.md: -------------------------------------------------------------------------------- 1 | # Metapipe Syntax 2 | 3 | The syntax for Pipeline Config files is as follows. 4 | 5 | 6 | ## Section Definitions 7 | 8 | In each Metapipe file, there are a number of different sections you can specify. Each has their own purpose and function. Each section is denoted with a header in brackets at the top of the section. 9 | 10 | All sections support comments, and in most sections, they are not parsed as input. 11 | 12 | 13 | ### Commands 14 | 15 | The commands section is the only required Metapipe config section. Specified by the `[COMMANDS]` header, this is where the various steps of the pipeline are specified. Commands are very similar to normal shell commands, and most shell commands are valid. The only difference is in the input/output of each command. For these sections, use Metapipe's command syntax to indicate the location and desired input and output. 16 | 17 | **Example:** 18 | 19 | ```bash 20 | [COMMANDS] 21 | # Here we cat a hardcoded input file into sed 22 | # and redirect the output to a metapipe output token. 23 | cat somefile.txt | sed 's/replace me/with me' > {o} 24 | ``` 25 | 26 | Metapipe automatically creates a filename for the given output token and assigns that file an alias. The alias structure is `command_number.command_iteration-output_number`, where the output number is optional. 27 | 28 | **Important:** Commands are *NOT* run sequentially. As commands are parsed, they are evaluated based on what inputs they take in and what outputs they generate. For more information: see [Command Structure](#command-structure). Commands are run as soon as they are deemed ready and any command that does not specify inputs via Metapipe's input patterns will be run immediately. 29 | 30 | 31 | ### Paths 32 | 33 | The paths section allows users to simplify their commands by creating aliases or short names to binaries. Paths are structured as a single word alias followed by a space and the rest of the line is considered the path. The paths section is denoted by the `[PATHS]` header. 34 | 35 | ```bash 36 | [COMMANDS] 37 | # Here we've aliased Python. When the script is generated, 38 | # the hardcoded path will be substituted in. 39 | python2 my_script.py 40 | 41 | # Here we're using the builtin python and using paths 42 | # to simplify the arguments. 43 | python my_script.py somefile 44 | 45 | [PATHS] 46 | python2 /usr/local/bin/python2.7.4 47 | somefile /a/long/file/path 48 | ``` 49 | 50 | Paths can also be used to create pseudo-variables for long configuration options. When doing this, it's recommended to use a bash-variable-like syntax because it reminds the reader that the variable is not a literal in the command. 51 | 52 | **Reminder**: Paths are substituted in after the inputs have been processed. This means that `{}` characters are treated as literals and not as input markers. 53 | 54 | ```bash 55 | [COMMANDS] 56 | # Here, the braces represent an output token, 57 | # but the $OPTIONS variable will be evaluated 58 | # as a literal {} 59 | python my_script.py -o {o} $OPTIONS 60 | 61 | [PATHS] 62 | $OPTIONS -rfg --do-something --no-save --get --no-get -I {} 63 | ``` 64 | 65 | 66 | ### Files 67 | 68 | For a given pipeline, there is usually a set of input or auxiliary files. These files go through the analysis and other steps require the output of one command as the input for another. This is where most of the power of Metapipe's syntax comes into play. The files section is denoted as `[FILES]`. 69 | 70 | Files are specified using a number followed by a period, and then the path to the given file. The number is the file's alias, and once that alias is assigned, it can be used in commands. 71 | 72 | ```bash 73 | [COMMANDS] 74 | cat {1} | sed 's/replace me/with me' > {o} 75 | cat {2} | cut -f 1 | sort | uniq > {o} 76 | 77 | [FILES] 78 | 1. somefile.1 79 | 2. /path/to/somefile.2 80 | 81 | ``` 82 | 83 | In this example, we use the aliases of files 1 and 2 to perform different analysis on each file. Then, when the input files need to change, they can be changed in the `[FILES]` section and the pipeline remains the same. 84 | 85 | 86 | ### Job Options 87 | 88 | The job options section, denoted by `[JOB_OPTIONS]`, is a section that allows the user to specify a global set of options for all jobs. This helps reduce pipeline redundancy. 89 | 90 | ```bash 91 | # Each of the commands in this pipeline need to 92 | # be working in a scratch directory. 93 | [COMMANDS] 94 | cat somefile.1.txt | sed 's/replace me/with me' > {o} 95 | cat somefile.2.txt | sed 's/replace me/with you' > {o} 96 | cat somefile.3.txt | sed 's/replace you/with me' > {o} 97 | 98 | [JOB_OPTIONS] 99 | set -e 100 | cd /var/my_project/ 101 | 102 | # This config will result in the following: 103 | # ------- Job 1 --------- 104 | set -e 105 | cd /var/my_project/ 106 | cat somefile.1.txt | sed 's/replace me/with me' > {o} 107 | ``` 108 | 109 | The set of commands in Job Options will be carried over to every job in the pipeline. This can be extremely useful when setting configuration comments for a queue system. 110 | 111 | ```bash 112 | # Each of the commands needs 4GB of RAM 113 | [COMMANDS] 114 | cat somefile.1.txt | sed 's/replace me/with me' > {o} 115 | cat somefile.2.txt | sed 's/replace me/with you' > {o} 116 | cat somefile.3.txt | sed 's/replace you/with me' > {o} 117 | 118 | [JOB_OPTIONS] 119 | #PBS -l mem=4096mb 120 | ``` 121 | 122 | Job Options allow users to make their pipelines more clear and less redundant by allowing them to follow the [DRY][dry] principle. 123 | 124 | [dry]: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself 125 | 126 | 127 | ## Command Structure 128 | 129 | Now that all of the concepts and supported sections have been explained, it's time to take a look at the command structure and how to take advantage of Metapipe's advanced features. 130 | 131 | 132 | ### Input Patterns 133 | 134 | Consider the following command: 135 | 136 | ```bash 137 | [COMMANDS] 138 | python somescript {1||2||3} 139 | 140 | [FILES] 141 | 1. some_file1.txt 142 | 2. some_file2.txt 143 | 3. some_file3.txt 144 | ``` 145 | 146 | This command will run the python script 3 times in parallel, once with each 147 | file specified. The output will look something like this: 148 | 149 | ```bash 150 | # Output 151 | # ------ 152 | 153 | python somescript some_file1.txt 154 | python somescript some_file2.txt 155 | python somescript some_file3.txt 156 | ``` 157 | 158 | #### Running a script with multiple inputs 159 | 160 | Let's say that you have a script with takes multiple files as input. In this 161 | case the syntax becomes: 162 | 163 | ```bash 164 | [COMMANDS] 165 | python somescript {1,2,3} 166 | 167 | [FILES] 168 | 1. some_file1.txt 169 | 2. some_file2.txt 170 | 3. some_file3.txt 171 | 172 | # Output 173 | # ------ 174 | 175 | python somescript some_file1.txt some_file2.txt some_file3.txt 176 | ``` 177 | 178 | 179 | ### Output Patterns 180 | 181 | Whenever a script would take an explicit output filename you can use the output 182 | pattern syntax to tell metapipe where/what it should use. 183 | 184 | ```bash 185 | [COMMANDS] 186 | python somescript -o {o} {1||2||3} 187 | 188 | [FILES] 189 | 1. some_file1.txt 190 | 2. some_file2.txt 191 | 3. some_file3.txt 192 | 193 | # Output 194 | # ------ 195 | 196 | python somescript -o mp.1.1.output some_file1.txt 197 | python somescript -o mp.1.2.output some_file2.txt 198 | python somescript -o mp.1.3.output some_file3.txt 199 | ``` 200 | 201 | Metapipe will generate the filename with the command's alias inside. An upcoming feature will provide more useful output names. 202 | 203 | 204 | #### Implicit or Hardcoded output 205 | 206 | In a case where the script or command you want to use generates an output that 207 | is not passed through the command, but you need to use for another step in the 208 | pipeline, you can use output patterns to tell metapipe what to look for. 209 | 210 | Consider this: 211 | 212 | ```bash 213 | [COMMANDS] 214 | # This command doesn't provide an output filename 215 | # so metapipe can't automatically track it. 216 | ./do_count {1||2} 217 | ./analyze.sh {1.*} 218 | 219 | [FILES] 220 | 1. foo.txt 221 | 2. bar.txt 222 | ``` 223 | 224 | This set of commands is invalid because the second command (`./analyze.sh`) 225 | doesn't know what the output of command 1 is because it isn't specified. 226 | The split command generates output based on the input filenames it is given. 227 | 228 | Since we wrote the `./do_count` script, we know that it generates files with a 229 | `.counts` extension. But since we don't explicitly specify the files, in 230 | this case Metapipe cannot assume the file names generated by step 1 and this 231 | config file is invalid. 232 | 233 | We can tell metapipe what the output should look like by using an output pattern. 234 | 235 | ```bash 236 | [COMMANDS] 237 | # We've now told Metapipe what the output file name 238 | # will look like. It can now track the file as normal. 239 | ./do_counts {1||2} #{o:*.counts} 240 | ./analyze.sh {2.*} 241 | 242 | [FILES] 243 | 1. foo.txt 244 | 2. bar.txt 245 | ``` 246 | 247 | The above example tells metapipe that the output of command 1, which is 248 | hardcoded in the script will have an output that ends in `.counts`. Now that 249 | the output of command 1 is known, command 2 will wait until command 1 finishes. 250 | 251 | When the output marker has the form `{o}`, then metapipe will insert a 252 | pregenerated filename to the command. The output marker `{o:}` means 253 | that the output of the script is *not* determined by the input of the script, 254 | but it *will* match given pattern. This means that later commands will be able 255 | to reference the files by name. 256 | 257 | 258 | ### Multiple Inputs and Outputs 259 | 260 | Often times a given shell command will either take multiple dynamic files as input, or generate multiple files as output. In either case, metapipe provides a way to manage and track these files. 261 | 262 | For multiple inputs, metapipe expects the number of inputs per command to be the same, and will iterate over them in order. 263 | 264 | **Example:** 265 | 266 | ```bash 267 | # Given the following: 268 | [COMMANDS] 269 | bash somescript {1||2||3} --conf {4||5||6} > {o} 270 | 271 | [FILES] 272 | 1. somefile.1 273 | 2. somefile.2 274 | 3. somefile.3 275 | 276 | # Metapipe will return this: 277 | bash somescript somefile.1 --conf somefile.4 > mp.1.1.output 278 | bash somescript somefile.2 --conf somefile.5 > mp.1.2.output 279 | bash somescript somefile.3 --conf somefile.6 > mp.1.3.output 280 | ``` 281 | 282 | Metapipe will name the multiple output files as follows (in order from left to right): 283 | 284 | `mp.{command_number}.{sub_command_number}-{output_number}` 285 | 286 | **Example:** 287 | 288 | ```bash 289 | # Given an input like the one below: 290 | [COMMANDS] 291 | bash somescript {1||2||3} --log {o} -r {o} 292 | 293 | [FILES] 294 | 1. somefile.1 295 | 2. somefile.2 296 | 3. somefile.3 297 | 298 | # metapipe will generate the following: 299 | bash somescript somefile.1 --log mp.1.1-1.output -r mp.1.1-2.output 300 | bash somescript somefile.2 --log mp.1.2-1.output -r mp.1.2-2.output 301 | bash somescript somefile.3 --log mp.1.3-1.output -r mp.1.3-2.output 302 | ``` 303 | 304 | 305 | 306 | ## Sample config.mp file 307 | 308 | ```bash 309 | [COMMANDS] 310 | # Here we run our analysis script on every gzipped file 311 | # in the current directory and output the results to a file. 312 | python my_custom_script.py -o {o} {*.gz||} 313 | 314 | # Take all the outputs of step 1 and feed them to cut. 315 | cut -f 1 {1.*||} > {o} 316 | 317 | # Oh no! You hardcode the output name? No problem! Just tell metapipe 318 | # what the filename is. 319 | python my_other_custom_code.py {2.*} #{o:hardcoded_output.csv} 320 | 321 | # Now you want to compare your results to some controls? Ok! 322 | # Metapipe wil compare your hardcoded_output to all 3 323 | # controls at the same time! 324 | python my_compare_script.py -o {o} $OPTIONS --compare {1||2||3} {3.1} 325 | 326 | # Finally, you want to make some pretty graphs? No problem! 327 | # But wait! You want R 2.0 for this code? Just create an alias for R! 328 | Rscript my_cool_graphing_code.r {4.*} > {o} 329 | 330 | [FILES] 331 | 1. controls.1.csv 332 | 2. controls.2.csv 333 | 3. controls.3.csv 334 | 335 | [PATHS] 336 | Rscript ~/path/to/my/custom/R/version 337 | $OPTIONS -rne --get --no-get -v --V --log-level 1 338 | ``` 339 | -------------------------------------------------------------------------------- /docs/what_is_metapipe.md: -------------------------------------------------------------------------------- 1 | # Metapipe 2 | 3 | *A pipeline for building analysis pipelines.* 4 | 5 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you. 6 | 7 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines. 8 | 9 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue! 10 | 11 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem) 12 | 13 | 14 | ## What does it do? 15 | 16 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change! 17 | 18 | A sample metapipe file can be found in [Metapipe Syntax](syntax.html) 19 | 20 | 21 | ## No Queue? No Problem! 22 | 23 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail. 24 | 25 | To run metapipe locally, see the app's help menu! 26 | 27 | `metapipe --help` 28 | 29 | 30 | ## Other Queue Systems 31 | 32 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!* 33 | 34 | -------------------------------------------------------------------------------- /metapipe/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import run 2 | -------------------------------------------------------------------------------- /metapipe/__main__.py: -------------------------------------------------------------------------------- 1 | from app import main 2 | main() 3 | -------------------------------------------------------------------------------- /metapipe/app.py: -------------------------------------------------------------------------------- 1 | """ A pipeline that generates analysis pipelines. 2 | 3 | author: Brian Schrader 4 | since: 2015-12-22 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse, pickle, os, sys 10 | 11 | from multiprocessing import cpu_count 12 | 13 | import pyparsing 14 | 15 | from .parser import Parser 16 | from .models import Command, LocalJob, PBSJob, SGEJob, \ 17 | HtmlReportingJobQueue, TextReportingJobQueue 18 | from .runtime import Runtime 19 | from metapipe.templates import env 20 | 21 | 22 | __version__ = '1.2-1' 23 | 24 | 25 | PIPELINE_ALIAS = "metapipe.queue.job" 26 | 27 | JOB_TYPES = { 28 | 'local': LocalJob, 29 | 'pbs': PBSJob, 30 | 'sge': SGEJob, 31 | } 32 | 33 | QUEUE_TYPES = { 34 | 'text': TextReportingJobQueue, 35 | 'html': HtmlReportingJobQueue, 36 | } 37 | 38 | 39 | def main(): 40 | """ Parses the command-line args, and calls run. """ 41 | parser = argparse.ArgumentParser( 42 | description='A pipeline that generates analysis pipelines.') 43 | parser.add_argument('input', nargs='?', 44 | help='A valid metapipe configuration file.') 45 | parser.add_argument('-o', '--output', 46 | help='An output destination. If none is provided, the ' 47 | 'results will be printed to stdout.', 48 | default=sys.stdout) 49 | parser.add_argument('-t', '--temp', 50 | help='A desired metapipe binary file. This is used to store ' 51 | 'temp data between generation and execution. ' 52 | '(Default: "%(default)s")', default='.metapipe') 53 | parser.add_argument('-s', '--shell', 54 | help='The path to the shell to be used when executing the ' 55 | 'pipeline. (Default: "%(default)s)"', 56 | default='/bin/bash') 57 | parser.add_argument('-r', '--run', 58 | help='Run the pipeline as soon as it\'s ready.', 59 | action='store_true') 60 | parser.add_argument('-n', '--name', 61 | help='A name for the pipeline.', 62 | default='') 63 | parser.add_argument('-j', '--job-type', 64 | help='The destination for calculations (i.e. local, a PBS ' 65 | 'queue on a cluster, etc).\nOptions: {}. ' 66 | '(Default: "%(default)s)"'.format(JOB_TYPES.keys()), 67 | default='local') 68 | parser.add_argument('-p', '--max-jobs', 69 | help='The maximum number of concurrent jobs allowed. ' 70 | 'Defaults to maximum available cores.', 71 | default=None) 72 | parser.add_argument('--report-type', 73 | help='The output report type. By default metapipe will ' 74 | 'print updates to the console. \nOptions: {}. ' 75 | '(Default: "%(default)s)"'.format(QUEUE_TYPES.keys()), 76 | default='text') 77 | parser.add_argument('-v','--version', 78 | help='Displays the current version of the application.', 79 | action='store_true') 80 | args = parser.parse_args() 81 | 82 | if args.version: 83 | print('Version: {}'.format(__version__)) 84 | sys.exit(0) 85 | 86 | try: 87 | with open(args.input) as f: 88 | config = f.read() 89 | except IOError: 90 | print('No valid config file found.') 91 | return -1 92 | 93 | run(config, args.max_jobs, args.output, args.job_type, args.report_type, 94 | args.shell, args.temp, args.run) 95 | 96 | 97 | def run(config, max_jobs, output=sys.stdout, job_type='local', 98 | report_type='text', shell='/bin/bash', temp='.metapipe', run_now=False): 99 | """ Create the metapipe based on the provided input. """ 100 | if max_jobs == None: 101 | max_jobs = cpu_count() 102 | 103 | parser = Parser(config) 104 | try: 105 | command_templates = parser.consume() 106 | except ValueError as e: 107 | raise SyntaxError('Invalid config file. \n%s' % e) 108 | options = '\n'.join(parser.global_options) 109 | 110 | queue_type = QUEUE_TYPES[report_type] 111 | pipeline = Runtime(command_templates,queue_type,JOB_TYPES,job_type,max_jobs) 112 | 113 | template = env.get_template('output_script.tmpl.sh') 114 | with open(temp, 'wb') as f: 115 | pickle.dump(pipeline, f, 2) 116 | script = template.render(shell=shell, 117 | temp=os.path.abspath(temp), options=options) 118 | 119 | if run_now: 120 | output = output if output != sys.stdout else PIPELINE_ALIAS 121 | submit_job = make_submit_job(shell, output, job_type) 122 | submit_job.submit() 123 | 124 | try: 125 | f = open(output, 'w') 126 | output = f 127 | except TypeError: 128 | pass 129 | 130 | output.write(script) 131 | f.close() 132 | 133 | 134 | def make_submit_job(shell, output, job_type): 135 | """ Preps the metapipe main job to be submitted. """ 136 | run_cmd = [shell, output] 137 | submit_command = Command(alias=PIPELINE_ALIAS, cmds=run_cmd) 138 | submit_job = get_job(submit_command, job_type) 139 | submit_job.make() 140 | return submit_job 141 | 142 | 143 | if __name__ == '__main__': 144 | main() 145 | -------------------------------------------------------------------------------- /metapipe/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .command import Command 2 | from .command_template import CommandTemplate 3 | from .command_template_factory import * 4 | from .job import Job, call 5 | from .job_template import JobTemplate 6 | from .local_job import LocalJob 7 | from .grammar import Grammar 8 | from .pbs_job import PBSJob 9 | from .sge_job import SGEJob 10 | from .queue import HtmlReportingJobQueue, TextReportingJobQueue, \ 11 | ReportingJobQueue, BaseQueue 12 | from .tokens import FileToken, Input, Output, PathToken, CommentToken 13 | -------------------------------------------------------------------------------- /metapipe/models/command.py: -------------------------------------------------------------------------------- 1 | """ A command model that can be easily transformed into jobs. 2 | 3 | author: Brian Schrader 4 | since: 2015-12-21 5 | """ 6 | 7 | from .tokens import Input, Output, FileToken, PathToken, alias_pattern 8 | 9 | 10 | class Command(object): 11 | 12 | def __init__(self, alias, parts=[]): 13 | self.alias = alias 14 | self.parts = parts 15 | if len(self.output_parts) > 1: 16 | for i, output in enumerate(self.output_parts): 17 | output.alias = alias_pattern.format(command=self.alias, 18 | output_number=i+1) 19 | else: 20 | for output in self.output_parts: 21 | output.alias = self.alias 22 | 23 | def __repr__(self): 24 | return ''.format(self.alias) 25 | 26 | @property 27 | def depends_on(self): 28 | """ Returns a list of command template aliases that the given command 29 | template depends on. 30 | """ 31 | return [part.command_alias for part in self.input_parts 32 | if part.command_alias is not None] 33 | 34 | @property 35 | def input_parts(self): 36 | """ Returns a list of the input tokens in the list of parts. """ 37 | return [part for part in self.file_parts 38 | if isinstance(part, Input)] 39 | 40 | @property 41 | def output_parts(self): 42 | """ Returns a list of the output tokens in the list of parts. """ 43 | return [part for part in self.file_parts 44 | if isinstance(part, Output)] 45 | 46 | @property 47 | def file_parts(self): 48 | """ Returns a list of the file tokens in the list of parts. """ 49 | file_parts = [] 50 | for part in self.parts: 51 | try: 52 | for sub_part in part: 53 | if isinstance(sub_part, FileToken): 54 | file_parts.append(sub_part) 55 | except TypeError: 56 | if isinstance(part, FileToken): 57 | file_parts.append(part) 58 | return file_parts 59 | 60 | @property 61 | def path_parts(self): 62 | """ Returns a list of the path tokens in the list of parts. """ 63 | return [part for part in self.parts 64 | if isinstance(part, PathToken)] 65 | 66 | def update_dependent_files(self, prev_commands=[]): 67 | """ Update the command's dependencies based on the evaluated input and 68 | output of previous commands. 69 | """ 70 | for command in prev_commands: 71 | for my_input in self.input_parts: 72 | for their_output in command.output_parts: 73 | if their_output == my_input: 74 | my_input.filename = their_output.eval() 75 | 76 | def eval(self): 77 | """ Evaluate the given job and return a complete shell script to be run 78 | by the job manager. 79 | """ 80 | eval = [] 81 | for part in self.parts: 82 | try: 83 | result = part.eval() 84 | except AttributeError: 85 | result = part 86 | if result[-1] != '\n': 87 | result += ' ' 88 | eval.append(result) 89 | return ''.join(eval).strip() 90 | 91 | 92 | -------------------------------------------------------------------------------- /metapipe/models/command_template.py: -------------------------------------------------------------------------------- 1 | """ A template for creating commands. 2 | 3 | author: Brian Schrader 4 | since: 2016-01-13 5 | """ 6 | 7 | import copy, collections 8 | 9 | from .tokens import Input, Output, FileToken, PathToken, CommentToken 10 | from .command import Command 11 | 12 | 13 | class Ticker(object): 14 | 15 | def __init__(self, maxlen, value=0): 16 | self.maxlen = maxlen 17 | self.value = value 18 | 19 | def tick(self, n=1): 20 | self.value += n 21 | if self.value >= self.maxlen: 22 | self.value -= self.maxlen 23 | 24 | 25 | class CommandTemplate(Command): 26 | 27 | def __init__(self, alias, parts=[], dependencies=[]): 28 | self.alias = alias 29 | self.parts = parts 30 | self._dependencies = dependencies 31 | 32 | def __repr__(self): 33 | return ''.format(self.alias, 34 | len(self.parts), len(self._dependencies)) 35 | 36 | @property 37 | def depends_on(self): 38 | """ Returns a list of command template aliases that the given command 39 | template depends on. 40 | """ 41 | return [dep.alias for dep in self._dependencies] 42 | 43 | @property 44 | def file_parts(self): 45 | """ Returns a list of the file tokens in the list of parts. """ 46 | return _search_for_files(self.parts) 47 | 48 | def eval(self): 49 | """ Returns a list of Command objects that can be evaluated as their 50 | string values. Each command will track it's preliminary dependencies, 51 | but these values should not be depended on for running commands. 52 | """ 53 | max_size = _get_max_size(self.parts) 54 | parts_list = _grow([[]], max_size-1) 55 | 56 | counter = Ticker(max_size) 57 | parts = self.parts[:] 58 | while len(parts) > 0: 59 | parts_list, counter = _get_parts_list(parts, 60 | parts_list, counter) 61 | 62 | commands = [] 63 | for i, parts in enumerate(parts_list): 64 | alias = self._get_alias(i+1) 65 | new_parts = copy.deepcopy(parts) 66 | commands.append(Command(alias=alias, parts=new_parts)) 67 | return commands 68 | 69 | def _get_alias(self, index): 70 | """ Given an index, return the string alias for that command. """ 71 | return '{}.{}'.format(self.alias, index) 72 | 73 | 74 | def _get_parts_list(to_go, so_far=[[]], ticker=None): 75 | """ Iterates over to_go, building the list of parts. To provide 76 | items for the beginning, use so_far. 77 | """ 78 | try: 79 | part = to_go.pop(0) 80 | except IndexError: 81 | return so_far, ticker 82 | 83 | # Lists of input groups 84 | if isinstance(part, list) and any(isinstance(e, list) for e in part): 85 | while len(part) > 0: 86 | so_far, ticker = _get_parts_list(part, so_far, ticker) 87 | ticker.tick() 88 | # Input Group 89 | elif isinstance(part, list) and any(isinstance(e, Input) for e in part): 90 | while len(part) > 0: 91 | so_far, ticker = _get_parts_list(part, so_far, ticker) 92 | # Magic Inputs 93 | elif isinstance(part, Input) and part.is_magic: 94 | inputs = part.eval() 95 | while len(inputs) > 0: 96 | so_far, ticker = _get_parts_list(inputs, so_far, ticker) 97 | ticker.tick() 98 | # Normal inputs 99 | elif isinstance(part, Input) and not part.is_magic: 100 | so_far[ticker.value].append(part) 101 | # Everything else 102 | else: 103 | so_far = _append(so_far, part) 104 | 105 | return so_far, ticker 106 | 107 | 108 | def _get_max_size(parts, size=1): 109 | """ Given a list of parts, find the maximum number of commands 110 | contained in it. 111 | """ 112 | max_group_size = 0 113 | for part in parts: 114 | if isinstance(part, list): 115 | group_size = 0 116 | for input_group in part: 117 | group_size += 1 118 | 119 | if group_size > max_group_size: 120 | max_group_size = group_size 121 | 122 | magic_size = _get_magic_size(parts) 123 | return max_group_size * magic_size 124 | 125 | 126 | def _get_magic_size(parts, size=1): 127 | for part in parts: 128 | if isinstance(part, Input) and part.is_magic: 129 | magic_size = len(part.eval()) 130 | if magic_size > size: 131 | return magic_size 132 | elif isinstance(part, list): 133 | size = _get_magic_size(part, size) 134 | return size 135 | 136 | 137 | def _append(so_far, item): 138 | """ Appends an item to all items in a list of lists. """ 139 | for sub_list in so_far: 140 | sub_list.append(item) 141 | return so_far 142 | 143 | 144 | def _grow(list_of_lists, num_new): 145 | """ Given a list of lists, and a number of new lists to add, copy the 146 | content of the first list into the new ones, and add them to the list 147 | of lists. 148 | """ 149 | first = list_of_lists[0] 150 | for i in range(num_new): 151 | list_of_lists.append(copy.deepcopy(first)) 152 | return list_of_lists 153 | 154 | 155 | def _search_for_files(parts): 156 | """ Given a list of parts, return all of the nested file parts. """ 157 | file_parts = [] 158 | for part in parts: 159 | if isinstance(part, list): 160 | file_parts.extend(_search_for_files(part)) 161 | elif isinstance(part, FileToken): 162 | file_parts.append(part) 163 | return file_parts 164 | 165 | 166 | -------------------------------------------------------------------------------- /metapipe/models/command_template_factory.py: -------------------------------------------------------------------------------- 1 | """ A factory for building individual commands based on the full list 2 | of commands and inputs. 3 | 4 | author: Brian Schrader 5 | since: 2016-01-12 6 | """ 7 | 8 | 9 | from .tokens import Input, Output, PathToken, CommentToken 10 | from .command import Command 11 | from .command_template import CommandTemplate 12 | from .grammar import OR_TOKEN, AND_TOKEN 13 | 14 | 15 | def get_command_templates(command_tokens, file_tokens=[], path_tokens=[], 16 | job_options=[]): 17 | """ Given a list of tokens from the grammar, return a 18 | list of commands. 19 | """ 20 | files = get_files(file_tokens) 21 | paths = get_paths(path_tokens) 22 | job_options = get_options(job_options) 23 | 24 | templates = _get_command_templates(command_tokens, files, paths, 25 | job_options) 26 | 27 | for command_template in templates: 28 | command_template._dependencies = _get_prelim_dependencies( 29 | command_template, templates) 30 | return templates 31 | 32 | 33 | def get_files(file_tokens, cwd=None): 34 | """ Given a list of parser file tokens, return a list of input objects 35 | for them. 36 | """ 37 | if not file_tokens: 38 | return [] 39 | 40 | token = file_tokens.pop() 41 | try: 42 | filename = token.filename 43 | except AttributeError: 44 | filename = '' 45 | 46 | if cwd: 47 | input = Input(token.alias, filename, cwd=cwd) 48 | else: 49 | input = Input(token.alias, filename) 50 | 51 | return [input] + get_files(file_tokens) 52 | 53 | 54 | def get_paths(path_tokens): 55 | """ Given a list of parser path tokens, return a list of path objects 56 | for them. 57 | """ 58 | if len(path_tokens) == 0: 59 | return [] 60 | 61 | token = path_tokens.pop() 62 | path = PathToken(token.alias, token.path) 63 | return [path] + get_paths(path_tokens) 64 | 65 | 66 | def get_options(options): 67 | """ Given a list of options, tokenize them. """ 68 | return _get_comments(options) 69 | 70 | 71 | # Internal Implementation 72 | 73 | 74 | def _get_command_templates(command_tokens, files=[], paths=[], job_options=[], 75 | count=1): 76 | """ Reversivly create command templates. """ 77 | if not command_tokens: 78 | return [] 79 | 80 | comment_tokens, command_token = command_tokens.pop() 81 | parts = [] 82 | 83 | parts += job_options + _get_comments(comment_tokens) 84 | for part in command_token[0]: 85 | # Check for file 86 | try: 87 | parts.append(_get_file_by_alias(part, files)) 88 | continue 89 | except (AttributeError, ValueError): 90 | pass 91 | 92 | # Check for path/string 93 | for cut in part.split(): 94 | try: 95 | parts.append(_get_path_by_name(cut, paths)) 96 | continue 97 | except ValueError: 98 | pass 99 | 100 | parts.append(cut) 101 | 102 | command_template = CommandTemplate(alias=str(count), parts=parts) 103 | [setattr(p, 'alias', command_template.alias) 104 | for p in command_template.output_parts] 105 | return [command_template] + _get_command_templates(command_tokens, 106 | files, paths, job_options, count+1) 107 | 108 | 109 | def _get_prelim_dependencies(command_template, all_templates): 110 | """ Given a command_template determine which other templates it 111 | depends on. This should not be used as the be-all end-all of 112 | dependencies and before calling each command, ensure that it's 113 | requirements are met. 114 | """ 115 | deps = [] 116 | for input in command_template.input_parts: 117 | if '.' not in input.alias: 118 | continue 119 | for template in all_templates: 120 | for output in template.output_parts: 121 | if input.fuzzy_match(output): 122 | deps.append(template) 123 | break 124 | return list(set(deps)) 125 | 126 | 127 | def _get_file_by_alias(part, files): 128 | """ Given a command part, find the file it represents. If not found, 129 | then returns a new token representing that file. 130 | :throws ValueError: if the value is not a command file alias. 131 | """ 132 | # Make Output 133 | if _is_output(part): 134 | return Output.from_string(part.pop()) 135 | 136 | # Search/Make Input 137 | else: 138 | inputs = [[]] 139 | 140 | if part.magic_or: 141 | and_or = 'or' 142 | else: 143 | and_or = 'and' 144 | 145 | for cut in part.asList(): 146 | if cut == OR_TOKEN: 147 | inputs.append([]) 148 | continue 149 | if cut == AND_TOKEN: 150 | continue 151 | 152 | input = Input(cut, filename=cut, and_or=and_or) 153 | for file in files: 154 | if file.alias == cut: 155 | # Override the filename 156 | input.filename = file.filename 157 | inputs[-1].append(input) 158 | break 159 | else: 160 | inputs[-1].append(input) 161 | 162 | 163 | return [input for input in inputs if input] 164 | 165 | 166 | def _get_path_by_name(part, paths): 167 | """ Given a command part, find the path it represents. 168 | :throws ValueError: if no valid file is found. 169 | """ 170 | for path in paths: 171 | if path.alias == part: 172 | return path 173 | raise ValueError 174 | 175 | def _get_comments(parts): 176 | """ Given a list of parts representing a list of comments, return the list 177 | of comment tokens 178 | """ 179 | return [CommentToken(part) for part in parts] 180 | 181 | 182 | def _is_output(part): 183 | """ Returns whether the given part represents an output variable. """ 184 | if part[0].lower() == 'o': 185 | return True 186 | elif part[0][:2].lower() == 'o:': 187 | return True 188 | elif part[0][:2].lower() == 'o.': 189 | return True 190 | else: 191 | return False 192 | 193 | -------------------------------------------------------------------------------- /metapipe/models/grammar.py: -------------------------------------------------------------------------------- 1 | """ Grammars for various parts of the input file. """ 2 | 3 | from pyparsing import * 4 | 5 | 6 | approved_printables = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`|~' 7 | 8 | lbrack = Literal('[').suppress() 9 | rbrack = Literal(']').suppress() 10 | pound = Literal('#') 11 | 12 | OR_TOKEN = '<>' 13 | AND_TOKEN = '<>' 14 | 15 | class classproperty(property): 16 | def __get__(self, cls, owner): 17 | return self.fget.__get__(None, owner)() 18 | 19 | 20 | class Grammar(object): 21 | """ A container class for the various grammars in the input files. """ 22 | 23 | _section = lbrack + Word(alphas+'_') + rbrack 24 | line = ~lbrack + Word(printables) + restOfLine 25 | _non_comment_line = ~pound + Group(Word(printables) + restOfLine) 26 | 27 | __command_input_output = ( 28 | Suppress('{') + 29 | OneOrMore( 30 | Group(OneOrMore( 31 | Combine( 32 | Word(alphanums+'.*:/_-') + 33 | Optional('.' + Word(nums)) 34 | ) + 35 | Optional(( 36 | Suppress(',' + FollowedBy('}')) ^ 37 | Suppress(',') 38 | ).addParseAction(replaceWith(AND_TOKEN)).setResultsName('_and')) + 39 | Optional( 40 | ('||' + FollowedBy('}')).addParseAction( 41 | replaceWith(OR_TOKEN)).setResultsName('magic_or') ^ 42 | Suppress('||').addParseAction( 43 | replaceWith(OR_TOKEN)).setResultsName('_or') 44 | ) 45 | ))) + 46 | Suppress('}') 47 | ) 48 | 49 | @classproperty 50 | @staticmethod 51 | def overall(): 52 | """ The overall grammer for pulling apart the main input files. """ 53 | return ZeroOrMore(Grammar.comment) + Dict(ZeroOrMore(Group( 54 | Grammar._section + ZeroOrMore(Group(Grammar.line))) 55 | )) 56 | 57 | @classproperty 58 | @staticmethod 59 | def comment(): 60 | return ('#' + Optional(restOfLine)) 61 | 62 | @classproperty 63 | @staticmethod 64 | def file(): 65 | """ Grammar for files found in the overall input files. """ 66 | return ( 67 | Optional(Word(alphanums).setResultsName('alias') + 68 | Suppress(Literal('.'))) + Suppress(White()) + 69 | Word(approved_printables).setResultsName('filename') 70 | ) 71 | 72 | @classproperty 73 | @staticmethod 74 | def path(): 75 | """ Grammar for paths found in the overall input files. """ 76 | return ( 77 | Word(approved_printables).setResultsName('alias') + 78 | Suppress(White()) + 79 | restOfLine.setResultsName('path') 80 | ) 81 | 82 | @classproperty 83 | @staticmethod 84 | def command_lines(): 85 | """ Grammar for commands found in the overall input files. """ 86 | return ZeroOrMore(Group( 87 | Group(ZeroOrMore(Group(Grammar.comment))) + Grammar._non_comment_line 88 | )) 89 | 90 | @classproperty 91 | @staticmethod 92 | def command(): 93 | """ Grammar for commands found in the overall input files. """ 94 | return ( 95 | OneOrMore( 96 | Word(approved_printables+' ').setResultsName('command', 97 | listAllMatches=True) ^ 98 | Grammar.__command_input_output.setResultsName('_in', 99 | listAllMatches=True) 100 | ) 101 | ) 102 | -------------------------------------------------------------------------------- /metapipe/models/job.py: -------------------------------------------------------------------------------- 1 | """ A basic job model, and local job implementation. 2 | 3 | author: Brian Schrader 4 | since: 2016-01-04 5 | """ 6 | 7 | import os 8 | from subprocess import Popen, PIPE 9 | 10 | 11 | def call(args, stdout=PIPE, stderr=PIPE): 12 | """ Calls the given arguments in a seperate process 13 | and returns the contents of standard out. 14 | """ 15 | p = Popen(args, stdout=stdout, stderr=stderr) 16 | out, err = p.communicate() 17 | 18 | try: 19 | return out.decode(sys.stdout.encoding), err.decode(sys.stdout.encoding) 20 | except Exception: 21 | return out, err 22 | 23 | 24 | class Job(object): 25 | """ A template job class that just runs the given command script locally. 26 | To make your own custom jobs, subclass this Job and override the status methods, the submit method, and cmd property. 27 | 28 | Submitting a job cannot block execution. The submit call should return 29 | immediately so that other jobs can be executed, and tracked. 30 | """ 31 | 32 | JOB_FILE_PATTERN = 'metapipe.{}.job' 33 | MAX_RETRY = 5 34 | 35 | def __init__(self, alias, command, depends_on=[]): 36 | """ Create an new job with the given name, and command. """ 37 | self.command = command 38 | self.depends_on = depends_on 39 | self.alias = alias 40 | self.attempts = 0 41 | self.filename = self.JOB_FILE_PATTERN.format(self.alias) 42 | 43 | def __repr__(self): 44 | return ''.format(self.cmd) 45 | 46 | def __cmp__(self, other): 47 | return cmp(self.alias, other.alias) 48 | 49 | def make(self): 50 | """ Evaluate the command, and write it to a file. """ 51 | eval = self.command.eval() 52 | with open(self.filename, 'w') as f: 53 | f.write(eval) 54 | 55 | @property 56 | def should_retry(self): 57 | return self.attempts < self.MAX_RETRY 58 | 59 | # Override these... 60 | 61 | @property 62 | def cmd(self): 63 | """ Returns the command needed to submit the calculations. 64 | Normally, this would be just running the command, however if 65 | using a queue system, then this should return the command to 66 | submit the command to the queue. 67 | """ 68 | pass 69 | 70 | def submit(self): 71 | """ Submits the job to be run. If an external queue system is used, 72 | this method submits itself to that queue. Else it runs the job itself. 73 | :see: call 74 | """ 75 | pass 76 | 77 | def is_running(self): 78 | """ Returns whether the job is running or not. """ 79 | pass 80 | 81 | def is_queued(self): 82 | """ Returns whether the job is queued or not. 83 | This function is only used if jobs are submitted to an external queue. 84 | """ 85 | pass 86 | 87 | def is_complete(self): 88 | """ Returns whether the job is complete or not. """ 89 | pass 90 | 91 | def is_error(self): 92 | """ Checks to see if the job errored out. """ 93 | pass 94 | 95 | def is_failed(self): 96 | """ Checks to see if the job has failed. This is usually if the job 97 | should not be resubmitted. 98 | """ 99 | pass 100 | 101 | 102 | -------------------------------------------------------------------------------- /metapipe/models/job_template.py: -------------------------------------------------------------------------------- 1 | """ A template that evaluates to muliple jobs and places them back on the queue. 2 | author: Brian Schrader 3 | since: 2016-02-19 4 | """ 5 | 6 | from .job import Job 7 | 8 | 9 | class JobTemplate(Job): 10 | 11 | def __init__(self, alias, command_template, depends_on, queue, job_class): 12 | super(JobTemplate, self).__init__(alias, command_template, depends_on) 13 | self.command_template = command_template 14 | self.queue = queue 15 | self.job_class = job_class 16 | self.jobs = [] 17 | 18 | def __repr__(self): 19 | return ''.format(self.alias) 20 | 21 | def submit(self): 22 | jobs = self._get_jobs_from_template(self.command_template, self.job_class) 23 | [self.queue.push(job) for job in jobs] 24 | self.jobs = jobs 25 | 26 | def is_running(self): 27 | if len(self.jobs) > 0: 28 | return any(job.is_running() for job in self.jobs) 29 | return False 30 | 31 | def is_queued(self): 32 | return False 33 | 34 | def is_complete(self): 35 | if len(self.jobs) > 0: 36 | return all(job.is_complete() for job in self.jobs) 37 | return False 38 | 39 | def is_error(self): 40 | if len(self.jobs) > 0: 41 | return all(job.is_error() for job in self.jobs) 42 | return False 43 | 44 | def is_fail(self): 45 | self.attempts > self.MAX_RETRY 46 | 47 | def _get_jobs_from_template(self, template, job_class): 48 | """ Given a template, a job class, construct jobs from 49 | the given template. 50 | """ 51 | jobs = [] 52 | for command in template.eval(): 53 | alias = command.alias 54 | depends_on = [job.alias 55 | for job in self.queue.all_jobs 56 | for deps in command.depends_on 57 | if deps == job.alias] 58 | command.update_dependent_files([job.command 59 | for job in self.queue.all_jobs 60 | if not isinstance(job, JobTemplate)]) 61 | 62 | job = job_class(alias, command, depends_on) 63 | jobs.append(job) 64 | return jobs 65 | 66 | -------------------------------------------------------------------------------- /metapipe/models/local_job.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | from . import Job, call 4 | 5 | 6 | LOCAL_LOG_FORMAT = '{}_{}' 7 | 8 | 9 | class LocalJobCallThread(threading.Thread): 10 | """ A class that handles calling subprocesses in seperate threads. """ 11 | 12 | def __init__(self, callable, *args, **kwargs): 13 | self.stdout = None 14 | self.stderr = None 15 | self.callable = callable 16 | self.args = args 17 | self.kwargs = kwargs 18 | threading.Thread.__init__(self) 19 | self.stdout = None 20 | self.stderr = None 21 | 22 | def run(self): 23 | self.stdout, self.stderr = self.callable(*self.args, **self.kwargs) 24 | 25 | 26 | class LocalJob(Job): 27 | """ A subclass of job for local calculations. """ 28 | 29 | def __init__(self, alias, command, depends_on=[], shell='bash'): 30 | super(LocalJob, self).__init__(alias, command, depends_on) 31 | self.shell = shell 32 | self._task = None 33 | self._err = False 34 | 35 | def __repr__(self): 36 | return ''.format(self.cmd) 37 | 38 | @property 39 | def cmd(self): 40 | return [self.shell, self.filename] 41 | 42 | def submit(self): 43 | self.make() 44 | self.attempts += 1 45 | self._task = LocalJobCallThread(call, self.cmd) 46 | self._task.start() 47 | 48 | def is_running(self): 49 | try: 50 | return self._task.is_alive() 51 | except AttributeError: 52 | return False 53 | 54 | def is_queued(self): 55 | """ Returns False since local jobs are not submitted to an 56 | external queue. 57 | """ 58 | return False 59 | 60 | def is_complete(self): 61 | try: 62 | if not self._task.is_alive(): 63 | self._task.join() 64 | self._write_log() 65 | return True 66 | except AttributeError: 67 | pass 68 | return False 69 | 70 | def is_error(self): 71 | """ Checks to see if the job errored out. """ 72 | try: 73 | if self._task.is_alive(): 74 | if len(self._task.stderr.readlines()) > 0: 75 | self._task.join() 76 | self._write_log() 77 | return True 78 | except AttributeError: 79 | pass 80 | return False 81 | 82 | def is_fail(self): 83 | return not self.should_retry 84 | 85 | def _write_log(self): 86 | alias = Job.JOB_FILE_PATTERN.format(self.alias) 87 | outlog, errlog = (LOCAL_LOG_FORMAT.format(alias, 'stdout'), 88 | LOCAL_LOG_FORMAT.format(alias, 'stderr')) 89 | 90 | with open(outlog, 'w+') as f: 91 | f.write(str(self._task.stdout)) 92 | with open(errlog, 'w+') as f: 93 | f.write(str(self._task.stderr)) 94 | 95 | -------------------------------------------------------------------------------- /metapipe/models/pbs_job.py: -------------------------------------------------------------------------------- 1 | from . import Job, call 2 | 3 | 4 | class PBSJob(Job): 5 | """ A job subclass for running tasks on a PBS queue. """ 6 | 7 | def __init__(self, alias, command, depends_on=[], queue='work'): 8 | super(PBSJob, self).__init__(alias, command, depends_on) 9 | self.queue = queue 10 | self.id = None 11 | self.waiting = True # The job has yet to be submitted. 12 | 13 | def submit(self): 14 | if self.attempts == 0: 15 | self.make() 16 | self.attempts += 1 17 | out, err = call(self.cmd) 18 | self.waiting = False 19 | self.id = out[:out.index('.')] 20 | 21 | @property 22 | def cmd(self): 23 | return ['qsub', self.filename] 24 | 25 | def is_running(self): 26 | """ Checks to see if the job is running. """ 27 | qstat = self._grep_qstat('running') 28 | if qstat: 29 | return True 30 | return False 31 | 32 | def is_queued(self): 33 | """ Checks to see if the job is queued. """ 34 | qstat = self._grep_qstat('queued') 35 | if qstat: 36 | return True 37 | return False 38 | 39 | def is_complete(self): 40 | """ Checks the job's output or log file to determing if 41 | the completion criteria was met. 42 | """ 43 | qstat = self._grep_qstat('complete') 44 | comp = self._grep_status('complete') 45 | if qstat and comp: 46 | return True 47 | return False 48 | 49 | def is_fail(self): 50 | return not self.should_retry 51 | 52 | def is_error(self): 53 | """ Checks to see if the job errored out. """ 54 | qstat = self._grep_qstat('error') 55 | err = self._grep_status('error') 56 | if qstat and err: 57 | return True 58 | return False 59 | 60 | def _grep_qstat(self, status_type='complete'): 61 | """ Greps qstat -e for information from the queue. 62 | :paramsstatus_type: complete, queued, running, error, gone 63 | """ 64 | args = "qstat -e {}".format(self.id).split() 65 | res, _ = call(args) 66 | if res == '': return False 67 | res = res.split('\n')[2].split()[4] 68 | 69 | if status_type == 'complete' and res == 'C': 70 | return True 71 | elif status_type == 'error' and (res == 'E' or res == 'C'): 72 | return True 73 | elif status_type == 'running' and res == 'R': 74 | return True 75 | elif status_type == 'queued' and res == 'Q': 76 | return True 77 | elif status_type == 'gone' and 'unknown job id' in str(res).lower(): 78 | return True 79 | else: 80 | return False 81 | 82 | def _grep_status(self, status_type): 83 | """ Greps through the job's current status to see if 84 | it returned with the requested status. 85 | status_type: complete, error 86 | """ 87 | args = "qstat -f {}".format(self.id).split() 88 | res, _ = call(args) 89 | exit_status = [line for line in res.split('\n') 90 | if 'exit_status' in line] 91 | try: 92 | _, __, code = exit_status[0].split() 93 | except IndexError: 94 | code = None 95 | 96 | if status_type == 'complete' and code == '0': 97 | return True 98 | elif status_type == 'error' and code != '0': 99 | return True 100 | else: 101 | return False 102 | 103 | -------------------------------------------------------------------------------- /metapipe/models/queue.py: -------------------------------------------------------------------------------- 1 | """ A simple manager for a task queue. 2 | 3 | The manager handles creating, submitting, and managing 4 | running jobs, and can even resubmit jobs that have failed. 5 | 6 | author: Brian Schrader 7 | since: 2015-08-27 8 | """ 9 | from .reporting import BaseReportingMixin, HtmlReportingMixin, TextReportingMixin 10 | from .job_template import JobTemplate 11 | 12 | class BaseQueue(object): 13 | """ An abstract class for managing a queue of jobs. To use this class, 14 | subclass it and fill in the callbacks you need. 15 | """ 16 | 17 | MAX_CONCURRENT_JOBS = 10 18 | def __init__(self, name=''): 19 | self.name = name 20 | self.queue = [] 21 | self.running = [] 22 | self.failed = [] 23 | self.complete = [] 24 | 25 | def __repr__(self): 26 | return '' % str(len(self.active_jobs)) 27 | 28 | @property 29 | def is_empty(self): 30 | return len(self.active_jobs) == 0 31 | 32 | @property 33 | def active_jobs(self): 34 | """ Returns a list of all jobs submitted to the queue, 35 | or in progress. 36 | """ 37 | return list(set(self.queue + self.running)) 38 | 39 | @property 40 | def all_jobs(self): 41 | """ Returns a list of all jobs submitted to the queue, complete, 42 | in-progess or failed. 43 | """ 44 | return list(set(self.complete + self.failed + self.queue + self.running)) 45 | 46 | @property 47 | def progress(self): 48 | """ Returns the percentage, current and total number of 49 | jobs in the queue. 50 | """ 51 | total = len(self.all_jobs) 52 | remaining = total - len(self.active_jobs) if total > 0 else 0 53 | percent = int(100 * (float(remaining) / total)) if total > 0 else 0 54 | return percent 55 | 56 | def ready(self, job): 57 | """ Determines if the job is ready to be sumitted to the 58 | queue. It checks if the job depends on any currently 59 | running or queued operations. 60 | """ 61 | no_deps = len(job.depends_on) == 0 62 | all_complete = all(j.is_complete() for j in self.active_jobs 63 | if j.alias in job.depends_on) 64 | none_failed = not any(True for j in self.failed 65 | if j.alias in job.depends_on) 66 | queue_is_open = len(self.running) < self.MAX_CONCURRENT_JOBS 67 | return queue_is_open and (no_deps or (all_complete and none_failed)) 68 | 69 | def locked(self): 70 | """ Determines if the queue is locked. """ 71 | if len(self.failed) == 0: 72 | return False 73 | for fail in self.failed: 74 | for job in self.active_jobs: 75 | if fail.alias in job.depends_on: 76 | return True 77 | 78 | def push(self, job): 79 | """ Push a job onto the queue. This does not submit the job. """ 80 | self.queue.append(job) 81 | 82 | def tick(self): 83 | """ Submits all the given jobs in the queue and watches their 84 | progress as they proceed. This function yields at the end of 85 | each iteration of the queue. 86 | :raises RuntimeError: If queue is locked. 87 | """ 88 | self.on_start() 89 | while not self.is_empty: 90 | cruft = [] 91 | for job in self.queue: 92 | if not self.ready(job): 93 | continue 94 | self.on_ready(job) 95 | try: 96 | job.submit() 97 | except ValueError: 98 | if job.should_retry: 99 | self.on_error(job) 100 | job.attempts += 1 101 | else: 102 | self.on_fail(job) 103 | cruft.append(job) 104 | self.failed.append(job) 105 | else: 106 | self.running.append(job) 107 | self.on_submit(job) 108 | cruft.append(job) 109 | 110 | self.queue = [job for job in self.queue if job not in cruft] 111 | 112 | cruft = [] 113 | for job in self.running: 114 | if job.is_running() or job.is_queued(): 115 | pass 116 | elif job.is_complete(): 117 | self.on_complete(job) 118 | cruft.append(job) 119 | self.complete.append(job) 120 | elif job.is_fail(): 121 | self.on_fail(job) 122 | cruft.append(job) 123 | self.failed.append(job) 124 | elif job.is_error(): 125 | self.on_error(job) 126 | cruft.append(job) 127 | else: 128 | pass 129 | self.running = [job for job in self.running if job not in cruft] 130 | 131 | if self.locked() and self.on_locked(): 132 | raise RuntimeError 133 | self.on_tick() 134 | yield 135 | self.on_end() 136 | 137 | # Callbacks... 138 | 139 | def on_start(self): 140 | """ Called when the queue is starting up. """ 141 | pass 142 | 143 | def on_end(self): 144 | """ Called when the queue is shutting down. """ 145 | pass 146 | 147 | def on_locked(self): 148 | """ Called when the queue is locked and no jobs can proceed. 149 | If this callback returns True, then the queue will be restarted, 150 | else it will be terminated. 151 | """ 152 | return True 153 | 154 | def on_tick(self): 155 | """ Called when a tick of the queue is complete. """ 156 | pass 157 | 158 | def on_ready(self, job): 159 | """ Called when a job is ready to be submitted. 160 | :param job: The given job that is ready. 161 | """ 162 | pass 163 | 164 | def on_submit(self, job): 165 | """ Called when a job has been submitted. 166 | :param job: The given job that has been submitted. 167 | """ 168 | pass 169 | 170 | def on_complete(self, job): 171 | """ Called when a job has completed. 172 | :param job: The given job that has completed. 173 | """ 174 | pass 175 | 176 | def on_error(self, job): 177 | """ Called when a job has errored. By default, the job 178 | is resubmitted until some max threshold is reached. 179 | :param job: The given job that has errored. 180 | """ 181 | pass 182 | 183 | def on_fail(self, job): 184 | """ Called when a job has failed after multiple resubmissions. The 185 | given job will be removed from the queue. 186 | :param job: The given job that has errored. 187 | """ 188 | pass 189 | 190 | 191 | class ReportingJobQueue(BaseReportingMixin, BaseQueue): 192 | """ An abstract subclass of the Queue which reports on progress. """ 193 | 194 | @property 195 | def real_jobs(self): 196 | """ Returns all jobs that represent work. """ 197 | return [j for j in self.all_jobs if not isinstance(j, JobTemplate)] 198 | 199 | def on_locked(self): 200 | self.render('The queue is locked. Please check the logs.', 201 | self.progress) 202 | return True 203 | 204 | def on_submit(self, job): 205 | if not isinstance(job, JobTemplate): 206 | self.render('Submitted: %s' % job.alias, self.progress) 207 | 208 | def on_complete(self, job): 209 | if not isinstance(job, JobTemplate): 210 | self.render('Complete: %s' % job.alias, self.progress) 211 | 212 | def on_error(self, job): 213 | if not isinstance(job, JobTemplate): 214 | self.render('Error: Job %s has failed, retrying (%s/%s)' 215 | % (job.alias, str(job.attempts), str(job.MAX_RETRY)), self.progress) 216 | 217 | def on_fail(self, job): 218 | if not isinstance(job, JobTemplate): 219 | self.render('Error: Job %s has failed. Retried %s times.' 220 | % (job.alias, str(job.attempts)), self.progress) 221 | 222 | def on_end(self): 223 | self.render('All jobs are complete.', self.progress) 224 | 225 | 226 | class HtmlReportingJobQueue(HtmlReportingMixin, ReportingJobQueue): 227 | """ A queue that generates HTML reports. """ 228 | pass 229 | 230 | 231 | class TextReportingJobQueue(TextReportingMixin, ReportingJobQueue): 232 | """ A queue that generates textual reports. """ 233 | pass 234 | -------------------------------------------------------------------------------- /metapipe/models/reporting.py: -------------------------------------------------------------------------------- 1 | """ A series of mixins for reporting. """ 2 | from datetime import datetime as dt 3 | 4 | from metapipe.templates import env 5 | template = env.get_template('progress-report.tmpl.html') 6 | 7 | 8 | class BaseReportingMixin(object): 9 | """ An abstract mixin for reporting. """ 10 | 11 | message_format = '%Y-%m-%d %H:%M:%S' 12 | 13 | def render(self, message, progress): 14 | """ Render the output of the report. """ 15 | pass 16 | 17 | 18 | class HtmlReportingMixin(BaseReportingMixin): 19 | """ A reporting mixin that writes progress to an HTML report. """ 20 | 21 | messages = [] 22 | output = 'metapipe.report.html' 23 | 24 | def render(self, message, progress): 25 | msg = Message(dt.strftime(dt.now(), self.message_format), message) 26 | self.messages.insert(0, msg) 27 | with open(self.output, 'w') as f: 28 | f.write(self.template.render( 29 | name=self.name, 30 | messages=self.messages, progress=progress, jobs=sorted(self.real_jobs))) 31 | 32 | 33 | class TextReportingMixin(BaseReportingMixin): 34 | """ A reporting mixin that prints any progress to the console. """ 35 | 36 | def render(self, message, progress): 37 | print('[{}%] {} {}'.format(progress, dt.strftime(dt.now(), 38 | self.message_format), message)) 39 | 40 | 41 | class Message(object): 42 | def __init__(self, time, text): 43 | self.time = time 44 | self.text = text 45 | -------------------------------------------------------------------------------- /metapipe/models/sge_job.py: -------------------------------------------------------------------------------- 1 | from . import Job, call 2 | 3 | 4 | class SGEJob(Job): 5 | """ A job subclass for running tasks on a SGE queue. """ 6 | 7 | def __init__(self, alias, command, depends_on=[], queue='work'): 8 | super(SGEJob, self).__init__(alias, command, depends_on) 9 | self.queue = queue 10 | self.id = None 11 | self.waiting = True # The job has yet to be submitted. 12 | 13 | def submit(self): 14 | if self.attempts == 0: 15 | self.make() 16 | self.attempts += 1 17 | out, err = call(self.cmd) 18 | self.waiting = False 19 | self.id = out.split()[2] 20 | 21 | @property 22 | def cmd(self): 23 | return ['qsub', '-cwd', '-V', self.filename] 24 | 25 | def is_running(self): 26 | """ Checks to see if the job is running. """ 27 | qstat = self._grep_qstat('running') 28 | if qstat: 29 | return True 30 | return False 31 | 32 | def is_queued(self): 33 | """ Checks to see if the job is queued. """ 34 | qstat = self._grep_qstat('queued') 35 | if qstat: 36 | return True 37 | return False 38 | 39 | def is_complete(self): 40 | """ Checks the job's output or log file to determing if 41 | the completion criteria was met. 42 | """ 43 | qstat = self._grep_qstat('complete') 44 | comp = self._grep_status('complete') 45 | if qstat and comp: 46 | return True 47 | return False 48 | 49 | def is_error(self): 50 | """ Checks to see if the job errored out. """ 51 | qstat = self._grep_qstat('error') 52 | err = self._grep_status('error') 53 | if qstat and err: 54 | return True 55 | return False 56 | 57 | def _grep_qstat(self, status_type='complete'): 58 | """ Greps qstat -e for information from the queue. 59 | :paramsstatus_type: complete, queued, running, error, gone 60 | """ 61 | args = ("qstat -e %s" % self.id).split() 62 | res, _ = call(args) 63 | if res == '': return False 64 | res = res.split('\n')[2].split()[4] 65 | 66 | if status_type == 'complete' and res == 'c': 67 | return True 68 | elif status_type == 'error' and (res == 'e' or res == 'c'): 69 | return True 70 | elif status_type == 'running' and res == 'r': 71 | return True 72 | elif status_type == 'queued' and res == 'qw': 73 | return True 74 | elif status_type == 'gone' and 'unknown job id' in str(res).lower(): 75 | return True 76 | else: 77 | return False 78 | 79 | def _grep_status(self, status_type): 80 | """ Greps through the job's current status to see if 81 | it returned with the requested status. 82 | status_type: complete, error 83 | """ 84 | args = ("qstat -f %s" % self.id).split() 85 | res, _ = call(args) 86 | exit_status = [line for line in res.split('\n') 87 | if 'exit_status' in line] 88 | try: 89 | _, __, code = exit_status[0].split() 90 | except IndexError: 91 | code = None 92 | 93 | if status_type == 'complete' and code == '0': 94 | return True 95 | elif status_type == 'error' and code != '0': 96 | return True 97 | else: 98 | return False 99 | 100 | -------------------------------------------------------------------------------- /metapipe/models/tokens.py: -------------------------------------------------------------------------------- 1 | """ A set of tokens and convienence functions for input/output files. 2 | 3 | author: Brian Schrader 4 | since: 2015-12-28 5 | """ 6 | 7 | from __future__ import print_function 8 | from collections import namedtuple 9 | import glob, re 10 | 11 | 12 | file_pattern = 'mp.{}.output{}' 13 | alias_pattern = '{command}-{output_number}' 14 | 15 | 16 | class PathToken(object): 17 | """ A model for a given path. """ 18 | 19 | def __init__(self, alias, path): 20 | self.alias = alias 21 | self.path = path 22 | 23 | def __repr__(self): 24 | return ''.format(self.alias, self.path) 25 | 26 | def __eq__(self, other): 27 | try: 28 | return (self.alias == other.alias or 29 | self.path == other.path) 30 | except AttributeError: 31 | return False 32 | 33 | def eval(self): 34 | return self.path 35 | 36 | 37 | class CommentToken(object): 38 | 39 | def __init__(self, parts): 40 | self.parts = parts 41 | 42 | def __repr__(self): 43 | return ''.format(''.join(self.parts)) 44 | 45 | def __eq__(self, other): 46 | return ''.join(self.parts) == ''.join(other.parts) 47 | 48 | def eval(self): 49 | return '{}\n'.format(''.join(self.parts)) 50 | 51 | 52 | class FileToken(object): 53 | """ An abc for input/output data classes. Provides various common 54 | methods. 55 | Warning: This class should not be used directly. 56 | """ 57 | 58 | def __init__(self, alias, filename='', cwd=''): 59 | self.alias = alias 60 | self.filename = filename 61 | 62 | if len(cwd) > 0 and cwd[-1] != '/': 63 | cwd += '/' 64 | self.cwd = cwd 65 | 66 | def __eq__(self, other): 67 | try: 68 | return (self.alias == other.alias or 69 | self.filename == other.filename) 70 | except AttributeError: 71 | return False 72 | 73 | def __hash__(self): 74 | return hash(self.alias) 75 | 76 | @property 77 | def path(self): 78 | return '{}{}'.format(self.cwd, self.filename) 79 | 80 | 81 | class Input(FileToken): 82 | """ A model of a single input to a given command. Input tokens can be 83 | evaluated to obtain their actual filename(s). 84 | """ 85 | 86 | def __init__(self, alias, filename='', cwd='', and_or=''): 87 | super(Input, self).__init__(alias, filename, cwd) 88 | self.and_or = and_or 89 | 90 | def __repr__(self): 91 | try: 92 | eval = self.eval() 93 | except Exception: 94 | eval = '?' 95 | return '[{}]{}>'.format(self.alias, eval, 96 | ' _{}_'.format(self.and_or) if self.and_or else '') 97 | 98 | def fuzzy_match(self, other): 99 | """ Given another token, see if either the major alias identifier 100 | matches the other alias, or if magic matches the alias. 101 | """ 102 | magic, fuzzy = False, False 103 | try: 104 | magic = self.alias == other.magic 105 | except AttributeError: 106 | pass 107 | 108 | if '.' in self.alias: 109 | major = self.alias.split('.')[0] 110 | fuzzy = major == other.alias 111 | return magic or fuzzy 112 | 113 | def eval(self): 114 | """ Evaluates the given input and returns a string containing the 115 | actual filenames represented. If the input token represents multiple 116 | independent files, then eval will return a list of all the input files 117 | needed, otherwise it returns the filenames in a string. 118 | """ 119 | if self.and_or == 'or': 120 | return [Input(self.alias, file, self.cwd, 'and') 121 | for file in self.files] 122 | return ' '.join(self.files) 123 | 124 | @property 125 | def command_alias(self): 126 | """ Returns the command alias for a given input. In most cases this 127 | is just the input's alias but if the input is one of many, then 128 | `command_alias` returns just the beginning of the alias cooresponding to 129 | the command's alias. 130 | """ 131 | if '.' in self.alias: 132 | return self.alias.split('-')[0] 133 | return None 134 | 135 | @property 136 | def is_magic(self): 137 | try: 138 | return isinstance(self.eval(), list) 139 | except ValueError: 140 | return False 141 | 142 | @property 143 | def is_glob(self): 144 | return '*' in self.filename 145 | 146 | @property 147 | def magic_path(self): 148 | match = file_pattern.format(self.alias, '*') 149 | return '{}{}'.format(self.cwd, match) 150 | 151 | @property 152 | def files(self): 153 | """ Returns a list of all the files that match the given 154 | input token. 155 | """ 156 | res = None 157 | if not res: 158 | res = glob.glob(self.path) 159 | if not res and self.is_glob: 160 | res = glob.glob(self.magic_path) 161 | if not res: 162 | res = glob.glob(self.alias) 163 | if not res: 164 | raise ValueError('No files match. %s' % self) 165 | return res 166 | 167 | @staticmethod 168 | def from_string(string, _or=''): 169 | """ Parse a given string and turn it into an input token. """ 170 | if _or: 171 | and_or = 'or' 172 | else: 173 | and_or = '' 174 | return Input(string, and_or=and_or) 175 | 176 | 177 | class Output(FileToken): 178 | """ A model of a single output to a given command. Output tokens can be 179 | evaluated to obtain their actual filename(s). 180 | """ 181 | 182 | def __init__(self, alias, filename='', cwd='', magic=''): 183 | super(Output, self).__init__(alias, filename, cwd) 184 | self.ext = '' 185 | self.magic = '' 186 | self._clean(magic) 187 | 188 | def __repr__(self): 189 | return '[{}]{} {}>'.format(self.alias, self.eval(), 190 | (' ' + self.magic) if self.magic else '', self.ext) 191 | 192 | def __eq__(self, other): 193 | """ Overrides the token eq to allow for magic : alias comparison for 194 | magic inputs. Defaults to the super() eq otherwise. 195 | """ 196 | try: 197 | return (self.magic == other.alias or 198 | super(Output, self).__eq__(other)) 199 | except AttributeError: 200 | return False 201 | 202 | def eval(self): 203 | """ Returns a filename to be used for script output. """ 204 | if self.magic: 205 | return self.magic 206 | if not self.filename: 207 | return file_pattern.format(self.alias, self.ext) 208 | return self.path 209 | 210 | def as_input(self): 211 | """ Returns an input token for the given output. """ 212 | return Input(self.alias, self.eval()) 213 | 214 | def _clean(self, magic): 215 | """ Given a magic string, remove the output tag designator. """ 216 | if magic.lower() == 'o': 217 | self.magic = '' 218 | elif magic[:2].lower() == 'o:': 219 | self.magic = magic[2:] 220 | elif magic[:2].lower() == 'o.': 221 | self.ext = magic[1:] 222 | 223 | @staticmethod 224 | def from_string(string): 225 | """ Parse a given string and turn it into an output token. """ 226 | return Output('', magic=string) 227 | -------------------------------------------------------------------------------- /metapipe/parser.py: -------------------------------------------------------------------------------- 1 | """ A parser and other parser related classes. """ 2 | 3 | import pyparsing 4 | 5 | from .models import Command, Input, Output, Grammar 6 | from .models import command_template_factory as ctf 7 | 8 | 9 | class Parser(object): 10 | 11 | def __init__(self, string): 12 | self.string = string 13 | self.commands = [] 14 | self.paths = [] 15 | self.files = [] 16 | 17 | def consume(self, cwd=None): 18 | """ Converts the lexer tokens into valid statements. This process 19 | also checks command syntax. 20 | """ 21 | first_pass = Grammar.overall.parseString(self.string) 22 | lowered = { key.lower(): val for key, val in first_pass.iteritems() } 23 | 24 | self.commands = ['\n'.join(self._get('commands', lowered))] 25 | self.job_options = self._get('job_options', lowered) 26 | self.global_options = self._get('options', lowered) 27 | 28 | self.files = self._get('files', lowered) 29 | self.paths = self._get('paths', lowered) 30 | 31 | self.files = self._parse(self.files, Grammar.file, True) 32 | self.paths = self._parse(self.paths, Grammar.path, True) 33 | self.job_options = self._parse(self.job_options, Grammar.line) 34 | 35 | try: 36 | command_lines = self._parse(self.commands, Grammar.command_lines)[0] 37 | except IndexError: 38 | raise ValueError('Did you write any commands?') 39 | 40 | self.commands = [] 41 | for command_line in command_lines: 42 | comments, command = command_line 43 | self.commands.append([comments.asList(), 44 | self._parse([''.join(command)], Grammar.command)]) 45 | 46 | self.job_options = [opt.asList() for opt in self.job_options] 47 | 48 | self.paths = ctf.get_paths(self.paths) 49 | self.files = ctf.get_files(self.files) 50 | 51 | self.paths.reverse() 52 | self.files.reverse() 53 | self.commands.reverse() 54 | 55 | return ctf.get_command_templates(self.commands, self.files[:], 56 | self.paths[:], self.job_options) 57 | 58 | def _get(self, key, parser_result): 59 | """ Given a type and a dict of parser results, return 60 | the items as a list. 61 | """ 62 | try: 63 | list_data = parser_result[key].asList() 64 | if any(isinstance(obj, str) for obj in list_data): 65 | txt_lines = [''.join(list_data)] 66 | else: 67 | txt_lines = [''.join(f) for f in list_data] 68 | except KeyError: 69 | txt_lines = [] 70 | return txt_lines 71 | 72 | def _parse(self, lines, grammar, ignore_comments=False): 73 | """ Given a type and a list, parse it using the more detailed 74 | parse grammar. 75 | """ 76 | results = [] 77 | for c in lines: 78 | if c != '' and not (ignore_comments and c[0] == '#'): 79 | try: 80 | results.append(grammar.parseString(c)) 81 | except pyparsing.ParseException as e: 82 | raise ValueError('Invalid syntax. Verify line {} is ' 83 | 'correct.\n{}\n\n{}'.format(e.lineno, c, e)) 84 | return results 85 | -------------------------------------------------------------------------------- /metapipe/runtime.py: -------------------------------------------------------------------------------- 1 | """ The metapipe runtime. 2 | 3 | author: Brian Schrader 4 | since: 2015-01-13 5 | """ 6 | 7 | from time import sleep 8 | 9 | from metapipe.models import JobTemplate 10 | 11 | 12 | class Runtime(object): 13 | 14 | def __init__(self, command_templates, queue_type, job_types, 15 | job_type='local', sleep_time=1, max_jobs=10): 16 | self.complete_jobs = [] 17 | self.queue = queue_type() 18 | self.sleep_time = sleep_time 19 | 20 | self.queue.MAX_CONCURRENT_JOBS = max_jobs 21 | 22 | job_templates = [] 23 | for command_template in command_templates: 24 | self.add(command_template, job_types[job_type]) 25 | 26 | def add(self, command_template, job_class): 27 | """ Given a command template, add it as a job to the queue. """ 28 | job = JobTemplate(command_template.alias, 29 | command_template=command_template, 30 | depends_on=command_template.depends_on, queue=self.queue, 31 | job_class=job_class) 32 | self.queue.push(job) 33 | 34 | def run(self): 35 | """ Begins the runtime execution. """ 36 | iterations = 0 37 | queue = self.queue.tick() 38 | while True: 39 | try: 40 | next(queue) 41 | except StopIteration: 42 | break 43 | 44 | iterations += 1 45 | sleep(self.sleep_time) 46 | return iterations 47 | -------------------------------------------------------------------------------- /metapipe/templates/__init__.py: -------------------------------------------------------------------------------- 1 | from jinja2 import Environment, PackageLoader 2 | env = Environment(loader=PackageLoader('metapipe', 'templates')) 3 | -------------------------------------------------------------------------------- /metapipe/templates/output_script.tmpl.sh: -------------------------------------------------------------------------------- 1 | #! {{shell}} 2 | set -e; 3 | 4 | {{options}} 5 | 6 | python - < 2 | 3 | 4 | 5 | 6 | 7 | {{name}} Pipeline Progress Report | Metapipe 8 | 9 | 18 | 19 | 20 | 24 | 25 | 26 |
27 |

{{name}} Pipeline Progress Report

28 | 29 |
30 |
32 | {{progress}}% 33 |
34 |
35 | 36 |
37 |
38 |

Jobs

39 |
41 | {% for job in jobs %} 42 |
43 | 52 |
54 |
55 | 56 | {{job.command.eval()}} 57 | 58 |
59 |
60 |
61 | {% endfor %} 62 |
63 | 64 |
65 |
66 |

Log

67 |
68 | {% for msg in messages %} 69 | 70 |

{{msg.text}}

71 | {{msg.time}} 72 |
73 | {% endfor %} 74 |
75 |
76 |
77 |
78 | 79 | 80 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cov-core==1.15.0 2 | coverage==4.0.3 3 | mock==1.3.0 4 | nose==1.3.7 5 | nose-cov==1.6 6 | pbr==1.8.1 7 | pyparsing==2.0.6 8 | python-coveralls==2.6.0 9 | PyYAML==3.11 10 | requests==2.9.1 11 | sh==1.11 12 | six==1.10.0 13 | sure==1.2.24 14 | wheel==0.24.0 15 | Jinja2==2.8 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | from setuptools import setup, find_packages 3 | 4 | 5 | setup( 6 | name='metapipe', 7 | version='1.3-1', 8 | packages=find_packages(), 9 | description='A pipeline for building analysis pipelines.', 10 | url='https://github.com/TorkamaniLab/metapipe', 11 | entry_points = { 12 | "console_scripts": ['metapipe = metapipe.app:main'] 13 | }, 14 | install_requires = ['pyparsing', 'Jinja2', 'mock'], 15 | author='Brian Schrader', 16 | author_email='brian@brianschrader.com', 17 | include_package_data = True, 18 | ) 19 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | import glob, os, shutil 2 | 3 | 4 | TEST_FILE_DIR = 'test/files/' 5 | 6 | 7 | def setup(): 8 | """ Copy the testing files to the current working dir. """ 9 | for file in glob.glob('{}*'.format(TEST_FILE_DIR)): 10 | new_dest = file.replace(TEST_FILE_DIR, '') 11 | shutil.copy(file, new_dest) 12 | 13 | 14 | def teardown(): 15 | """ Delete the files. """ 16 | for file in glob.glob('{}*'.format(TEST_FILE_DIR)): 17 | new_dest = file.replace(TEST_FILE_DIR, '') 18 | os.remove(new_dest) 19 | 20 | for file in glob.glob('metapipe.*.job'): 21 | os.remove(file) 22 | 23 | for file in glob.glob('metapipe.*.output*'): 24 | os.remove(file) 25 | 26 | for file in glob.glob('metapipe.*_stdout'): 27 | os.remove(file) 28 | 29 | for file in glob.glob('metapipe.*_stderr'): 30 | os.remove(file) 31 | -------------------------------------------------------------------------------- /test/files/mp.1.1-1.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1-1.output -------------------------------------------------------------------------------- /test/files/mp.1.1.job: -------------------------------------------------------------------------------- 1 | # This is a test of the comments 2 | sh long_run_task.sh somefile.1 > metapipe.1.1.output.testing_file -------------------------------------------------------------------------------- /test/files/mp.1.1.job_stderr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.job_stderr -------------------------------------------------------------------------------- /test/files/mp.1.1.job_stdout: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.job_stdout -------------------------------------------------------------------------------- /test/files/mp.1.1.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.output -------------------------------------------------------------------------------- /test/files/mp.1.1.output.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.output.gz -------------------------------------------------------------------------------- /test/files/mp.1.1.output.testing_file: -------------------------------------------------------------------------------- 1 | somefile.1 2 | -------------------------------------------------------------------------------- /test/files/mp.1.2.job: -------------------------------------------------------------------------------- 1 | # This is a test of the comments 2 | sh long_run_task.sh somefile.2 > metapipe.1.2.output.testing_file -------------------------------------------------------------------------------- /test/files/mp.1.2.job_stderr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.job_stderr -------------------------------------------------------------------------------- /test/files/mp.1.2.job_stdout: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.job_stdout -------------------------------------------------------------------------------- /test/files/mp.1.2.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.output -------------------------------------------------------------------------------- /test/files/mp.1.2.output.testing_file: -------------------------------------------------------------------------------- 1 | somefile.2 2 | -------------------------------------------------------------------------------- /test/files/mp.2.1.job: -------------------------------------------------------------------------------- 1 | ## This is another test 2 | #PBS_O_WORKDIR ~/bhuvan 3 | sh long_run_task_err.sh metapipe.1.1.output.testing_file metapipe.1.2.output.testing_file -------------------------------------------------------------------------------- /test/files/mp.2.1.job_stderr: -------------------------------------------------------------------------------- 1 | THERE WAS AN ERROR 2 | long_run_task_err.sh: 7: exit: Illegal number: -1 3 | -------------------------------------------------------------------------------- /test/files/mp.2.1.job_stdout: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.1.job_stdout -------------------------------------------------------------------------------- /test/files/mp.2.1.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.1.output -------------------------------------------------------------------------------- /test/files/mp.2.2.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.2.output -------------------------------------------------------------------------------- /test/files/mp.3.1.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.1.output -------------------------------------------------------------------------------- /test/files/mp.3.2.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.2.output -------------------------------------------------------------------------------- /test/files/mp.3.3.output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.3.output -------------------------------------------------------------------------------- /test/files/somefile.1: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /test/files/somefile.1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.1.bam -------------------------------------------------------------------------------- /test/files/somefile.1.counts: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /test/files/somefile.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2 -------------------------------------------------------------------------------- /test/files/somefile.2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2.bam -------------------------------------------------------------------------------- /test/files/somefile.2.counts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2.counts -------------------------------------------------------------------------------- /test/files/somefile.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.3 -------------------------------------------------------------------------------- /test/files/somefile.3.counts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.3.counts -------------------------------------------------------------------------------- /test/files/somefile.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.4 -------------------------------------------------------------------------------- /test/files/somefile.4.counts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.4.counts -------------------------------------------------------------------------------- /test/files/somefile.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.5 -------------------------------------------------------------------------------- /test/files/somefile.6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.6 -------------------------------------------------------------------------------- /test/files/somefile.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.bam -------------------------------------------------------------------------------- /test/files/star.my_output: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/star.my_output -------------------------------------------------------------------------------- /test/fixtures.py: -------------------------------------------------------------------------------- 1 | from metapipe.models import * 2 | 3 | # Snippets 4 | 5 | basic_cmd = { 6 | 'text': """python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh \ 7 | somefile.txt 8 | """, 9 | 'template_parts': [ 10 | 'python', 'somescript.py', '-i', 11 | [[Input('1'), Input('2'), Input('3')], 12 | [Input('4'), Input('5'), Input('6')]], 13 | '-o', Output('1.1'), '-fgh' 14 | ], 15 | 'command_parts': [ 16 | ['python', 'somescript.py', '-i', 17 | [Input('1'), Input('2'), Input('3')], 18 | '-o', Output('1.1'), '-fgh'], 19 | ['python', 'somescript', '-i', 20 | [Input('4'), Input('5'), Input('6')], 21 | '-o', Output('1.1'), '-fgh'] 22 | ] 23 | } 24 | 25 | magic_cmd = { 26 | 'text': """python somescript.py {*.counts||} > someout 27 | """, 28 | 'template_parts': [ 29 | ['python', 'somescript.py', '-i', 30 | [Input('*.counts', and_or='||'), 'someout'], 31 | ] 32 | ] 33 | 34 | } 35 | 36 | cmd_magic1 = """python somescript.py {*.counts||} > someout 37 | """ 38 | 39 | cmd_magic2 = """python somescript.py {*.counts,} > someout 40 | """ 41 | 42 | cmd_compound1 = """./somescript {1,2,3,4||test/files/*.counts||} 43 | """ 44 | 45 | cmd_compound2 = """./somescript {1,2,3,4||test/files/*.counts,} 46 | """ 47 | 48 | cmd_multiple_inputs = """bash somescript {1,2,3} --conf {4,5,6} > {o} 49 | """ 50 | 51 | cmd_suggest_output = """bash somescript {1,2,3} > {o.gz} 52 | """ 53 | 54 | cmd_comment = """# Some comment 55 | #Some other comment 56 | bash somescript {1,2,3} > {o.gz} 57 | """ 58 | 59 | cmd_multiple_close_inputs = """ 60 | java -jar trimmomatic PE {*R1_001.fastq.gz||} {*R2_001.fastq.gz||} \ 61 | {o} {o} {o} {o} \ 62 | ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true \ 63 | LEADING:3 TRAILING:3 64 | """ 65 | 66 | cmd_using_multiple_out = """ 67 | gzip --stdout -d {1.1-1||1.1-3} > {o} 68 | """ 69 | 70 | file = """1. somedir/somefile.ext""" 71 | 72 | path = """python /usr/bin/python""" 73 | 74 | 75 | # Full input files. 76 | 77 | 78 | overall = """ 79 | # Some top comment 80 | # Another top comment 81 | # A third top comment 82 | # Woo! 83 | 84 | [COMMANDS] 85 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt 86 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt 87 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile 88 | cut -f *.counts > something.file 89 | paste *.counts > some.file #{o:some.file} 90 | 91 | ./somescript {1,2,3,4||*.counts,} 92 | 93 | rb somescript.rb -i {*.counts||} 94 | python somescript.py -i {*.counts,} #{o:*.bam} 95 | cat {*.bam,} 96 | 97 | cat {2.1} > something.my_output #{o:*.my_output} 98 | cat {*.my_output,} 99 | 100 | [FILES] 101 | 1. somefile.1 102 | 2. somefile.2 103 | 3. somefile.3 104 | #THIS IS A COMMENT 105 | 4. somefile.4 106 | 5. somefile.5 107 | 6. somefile.6 108 | 109 | [PATHS] 110 | python /usr/bin/python 111 | # THIS IS also A COMMENT 112 | bash /usr/bin/bash 113 | rb /usr/bin/ruby 114 | cat2 module load cat2; cat2 115 | 116 | [JOB_OPTIONS] 117 | #PBS_O_WORKDIR=~/someuser 118 | set -e; 119 | module load python 120 | # do something 121 | 122 | [OPTIONS] 123 | module load python; 124 | set -e 125 | """ 126 | 127 | 128 | overall_cmd_templates = [ 129 | CommandTemplate('1', [ 130 | PathToken('python', '/usr/bin/python'), 131 | 'somescript.py', 132 | '-i', 133 | [[ 134 | Input('1', filename='somefile.1'), 135 | Input('2', filename='somefile.2'), 136 | Input('3', filename='somefile.3'), 137 | ], 138 | [ 139 | Input('4', filename='somefile.4'), 140 | Input('5', filename='somefile.5'), 141 | Input('6', filename='somefile.6'), 142 | ]], 143 | '-o', 144 | Output('1'), 145 | '-fgh', 146 | 'somefile.txt', 147 | ]), 148 | ] 149 | 150 | 151 | no_paths = """ 152 | [COMMANDS] 153 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt 154 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt 155 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile 156 | cut -f *.counts > something.file 157 | paste *.counts > some.file #{o:some.file} 158 | 159 | ./somescript {1,2,3,4||*.counts,} 160 | 161 | rb somescript.rb -i {*.counts||} 162 | python somescript.py -i {*.counts,} #{o:*.bam} 163 | cat {*.bam,} 164 | 165 | cat {2.1} > something.my_output #{o:*.my_output} 166 | cat {*.my_output,} 167 | 168 | [FILES] 169 | 1. somefile.1 170 | 2. somefile.2 171 | 3. somefile.3 172 | 4. somefile.4 173 | 5. somefile.5 174 | 6. somefile.6 175 | """ 176 | 177 | 178 | no_files = """ 179 | [COMMANDS] 180 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt 181 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt 182 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile 183 | cut -f *.counts > something.file 184 | paste *.counts > some.file #{o:some.file} 185 | 186 | ./somescript {1,2,3,4||*.counts,} 187 | 188 | rb somescript.rb -i {*.counts||} 189 | python somescript.py -i {*.counts,} #{o:*.bam} 190 | cat {*.bam,} 191 | 192 | cat {2.1} > something.my_output #{o:*.my_output} 193 | cat {*.my_output,} 194 | 195 | [PATHS] 196 | python /usr/bin/python 197 | bash /usr/bin/bash 198 | rb /usr/bin/ruby 199 | """ 200 | 201 | 202 | no_cmds = """ 203 | [PATHS] 204 | python /usr/bin/python 205 | bash /usr/bin/bash 206 | rb /usr/bin/ruby 207 | """ 208 | 209 | multiple_inputs = """ 210 | [COMMANDS] 211 | bash somescript {1||2||3} --conf {4||5||6} > {o} 212 | python somescript.py {1,2,3} --conf {4,5,6} > {o} 213 | 214 | [FILES] 215 | 1. somefile.1 216 | 2. somefile.2 217 | 3. somefile.3 218 | 4. somefile.4 219 | 5. somefile.5 220 | 6. somefile.6 221 | """ 222 | 223 | multiple_input_vals = ['bash', 'somescript', 224 | [[Input('1', 'somefile.1')], [Input('2', 'somefile.2')], 225 | [Input('3', 'somefile.3')]], '--conf', 226 | [[Input('4', 'somefile.4')], [Input('5', 'somefile.5')], 227 | [Input('6', 'somefile.6')]], 228 | '>', Output('1', 'metapipe.1.output')] 229 | 230 | 231 | 232 | multiple_outputs = """ 233 | [COMMANDS] 234 | bash somescript {1||2||3} --log {o} -r {o} 235 | python somescript.py {4,5,6} --log {o} -r {o} --output {o} 236 | 237 | [FILES] 238 | 1. somefile.1 239 | 2. somefile.2 240 | 3. somefile.3 241 | 4. somefile.4 242 | 5. somefile.5 243 | 6. somefile.6 244 | """ 245 | 246 | multiple_output_vals = ['bash', 'somescript', 247 | [[Input('1', 'somefile.1')], [Input('2', 'somefile.2')], 248 | [Input('3', 'somefile.3')]], '--log', 249 | Output('1', 'metapipe.1.output'), '-r', 250 | Output('1', 'metapipe.1.output')] 251 | 252 | 253 | 254 | magic_inputs = """ 255 | [COMMANDS] 256 | bash somescript {*.counts||} > {o} 257 | bash togetherness {*.counts} > {o} 258 | python somescript.py {*.counts||} --conf {*.counts||} > {o} 259 | 260 | [FILES] 261 | 1. somefile.1 262 | 2. somefile.2 263 | 3. somefile.3 264 | 4. somefile.4 265 | 5. somefile.5 266 | 6. somefile.6 267 | """ 268 | 269 | full_sample_pipeline = """ 270 | [COMMANDS] 271 | # Trimmomatic 272 | java -jar trimmomatic PE {*R1_001.fastq.gz} {*R2_001.fastq.gz} \ 273 | {o} {o} {o} {o} illuminaclip LEADING:3 TRAILING:3 274 | 275 | # Unzip the outputs from trimmomatic 276 | gzip --stdout -d {*.1.*-2.output||} > {o} 277 | gzip --stdout -d {*.1.*-4.output||} > {o} 278 | 279 | # Cutadapt 280 | # cutadapt needs unzipped fastq files 281 | cutadapt --cut 7 -o {o} {*.2.output||} 282 | cutadapt --cut 7 -o {o} {*.3.output||} 283 | 284 | # BowTie 285 | module load bowtie/2.2.3; \ 286 | bowtie2 --very-sensitive -N 1 -p 8 -x HG_19 -q -1 {*.4.*.output||} -2 \ 287 | {*.5.*.output||} -S {o} 288 | 289 | # HTSeq 290 | module load python; \ 291 | htseq-count {*.7.*.output||} gene_list > {o} 292 | 293 | # Summary 294 | head --lines -5 {*.8.*.output} > {o} 295 | 296 | [PATHS] 297 | trimmomatic Trimmomatic-0.35/trimmomatic-0.35.jar 298 | cutadapt ~/.local/bin/cutadapt 299 | illuminaclip ILLUMINACLIP:/gpfs/home/bhuvan/Programs/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10:2:true 300 | HG_19 /gpfs/group/stsi/data/bschrader/hg19/hg19_ucsc 301 | gene_list /gpfs/home/atorkama/iGenomes/Homo_sapiens/UCSC/hg19/Annotation/Archives/archive-2011-08-30-21-45-18/Genes/genes.gtf 302 | """ 303 | 304 | another_sample = """ 305 | [COMMANDS] 306 | # Trimmomatic 307 | java -jar trimmomatic PE {1} {2} {o} {o} {o} {o} ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true LEADING:3 TRAILING:3 308 | 309 | # Unzip the outputs from trimmomatic 310 | gzip --stdout -d {1.1-1||1.1-3} > {o} 311 | 312 | # Cutadapt 313 | # cutadapt needs unzipped fastq files 314 | cutadapt --cut 7 -o {o} {2.*||} 315 | 316 | # BowTie 317 | bowtie2 --very-sensitive -N 1 -p 8 -x HG_19 -q -1 {3.1} -2 {3.2} -S {o} 318 | 319 | # HTSeq 320 | htseq-count {4.1} gene_list > {o} 321 | 322 | # Summary 323 | head --lines -5 {5.1} > {o} 324 | 325 | [PATHS] 326 | trimmomatic Trimmomatic-0.35/trimmomatic-0.35.jar 327 | cutadapt ~/.local/bin/cutadapt 328 | HG_19 hg19_ucsc.1.bt2 329 | gene_list genes.gtf 330 | 331 | 332 | [FILES] 333 | 1. somefile.1 334 | 2. somefile.2 335 | """ 336 | 337 | long_running = """ 338 | [COMMANDS] 339 | cat {1||2||3||4} > {o} && sleep 1 340 | cat {1.1||1.2} && sleep 1 341 | 342 | [FILES] 343 | 1. somefile.1 344 | 2. somefile.2 345 | 3. somefile.3 346 | 4. somefile.4 347 | 5. somefile.5 348 | 6. somefile.6 349 | """ 350 | 351 | full_output_file_name = """ 352 | [COMMANDS] 353 | gzip --stdout {1} > {o.gz} 354 | cat {1.1} > {o.gz} 355 | cat {2.1} > {o.gz} 356 | cat {2.1} > {o.gz} 357 | 358 | [FILES] 359 | 1. somefile.1 360 | 2. somefile.2 361 | 3. somefile.3 362 | 4. somefile.4 363 | 5. somefile.5 364 | 6. somefile.6 365 | """ 366 | 367 | one_step_pipeline = """ 368 | [COMMANDS] 369 | cut somefile > anotherfile 370 | """ 371 | 372 | concurrent = """ 373 | [COMMANDS] 374 | # Each one has 10 375 | cat {1||2||3||4||1||2||3||4||1||2} > {o} 376 | cat {1||2||3||4||1||2||3||4||1||2} > {o} 377 | cat {1||2||3||4||1||2||3||4||1||2} > {o} 378 | 379 | [FILES] 380 | 1. somefile.1 381 | 2. somefile.2 382 | 3. somefile.3 383 | 4. somefile.4 384 | """ 385 | 386 | 387 | magical_glob = """ 388 | [COMMANDS] 389 | split -o breakdown {1} #{o:breakdown/*} 390 | cat {1.*} > {o} 391 | diff {2.*} {1} 392 | 393 | [FILES] 394 | 1. somefile.1 395 | 2. somefile.2 396 | """ 397 | 398 | magical_glob2 = """ 399 | [COMMANDS] 400 | split -o breakdown {1} #{o:breakdown/*} 401 | cat {1.*||} > {o} 402 | diff {2.*} {1} 403 | 404 | [FILES] 405 | 1. somefile.1 406 | 2. somefile.2 407 | """ 408 | 409 | 410 | # Job Fixtures 411 | 412 | pbs_job_qstat_queued = ("""Job id Name User Time Use S Queue 413 | ---------------- ---------------- ---------------- -------- - ----- 414 | 4807 scatter user01 12:56:34 Q batch 415 | """, None) 416 | 417 | pbs_job_qstat_running = ("""Job id Name User Time Use S Queue 418 | ---------------- ---------------- ---------------- -------- - ----- 419 | 4807 scatter user01 12:56:34 R batch 420 | """, None) 421 | 422 | pbs_job_qsub = ("""9974279.garibaldi01-adm.cluster.net""", None) 423 | 424 | 425 | 426 | sge_job_qstat_queued = ("""job-ID prior name user state submit/start at queue slots ja-task-ID 427 | ------------------------------------------------------------------- 428 | 1 0.00000 hostname sgeadmin qw 09/09/2009 14:58:00 1 429 | """, None) 430 | 431 | sge_job_qstat_running = ("""job-ID prior name user state submit/start at queue slots ja-task-ID 432 | ------------------------------------------------------------------- 433 | 6 0.55500 jobscript. sgeadmin r 09/09/2009 16:18:57 all.q@node001.c 1 434 | """, None) 435 | 436 | sge_job_qsub = ("""Your job 1 ("hostname") has been submitted""", None) 437 | -------------------------------------------------------------------------------- /test/mocks.py: -------------------------------------------------------------------------------- 1 | """ A series of mocks for metapipe. """ 2 | 3 | from metapipe.models import Job 4 | 5 | 6 | class MockJob(Job): 7 | 8 | def __init__(self, alias, command, depends_on=[]): 9 | super(MockJob, self).__init__(alias, command, depends_on) 10 | self._submitted = False 11 | self._done = False 12 | self._step = 0 13 | 14 | def __repr__(self): 15 | return ''.format(self.alias) 16 | 17 | def submit(self): 18 | self._step += 1 19 | 20 | def is_running(self): 21 | self._step += 1 22 | return self._step > 1 and self._step < 10 23 | 24 | def is_queued(self): 25 | return False 26 | 27 | def is_complete(self): 28 | return self._step > 10 29 | 30 | def is_fail(self): 31 | return False 32 | -------------------------------------------------------------------------------- /test/test_app.py: -------------------------------------------------------------------------------- 1 | import sure 2 | 3 | from metapipe.app import * 4 | 5 | 6 | def test_app(): 7 | pass 8 | 9 | -------------------------------------------------------------------------------- /test/test_command.py: -------------------------------------------------------------------------------- 1 | """ Tests for the command class. """ 2 | try: 3 | from unittest.mock import Mock, PropertyMock, patch 4 | except ImportError: 5 | from mock import Mock, PropertyMock, patch 6 | 7 | import sure 8 | 9 | from .fixtures import * 10 | 11 | from metapipe.parser import Parser 12 | from metapipe.models import * 13 | 14 | 15 | def test_eval_1(): 16 | parser = Parser(overall) 17 | 18 | cmds = parser.consume() 19 | cmds[0].eval()[0].eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 20 | '\nmodule load python\n# do something\n' 21 | '/usr/bin/python somescript.py -i ' 22 | 'somefile.1 somefile.2 somefile.3 -o mp.1.1.output ' 23 | '-fgh somefile.txt') 24 | 25 | 26 | def test_eval_2(): 27 | parser = Parser(overall) 28 | cmds = parser.consume() 29 | 30 | cmds[0].eval()[1].eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 31 | '\nmodule load python\n# do something\n' 32 | '/usr/bin/python somescript.py -i ' 33 | 'somefile.4 somefile.5 somefile.6 -o mp.1.2.output ' 34 | '-fgh somefile.txt') 35 | 36 | 37 | def test_eval_3(): 38 | parser = Parser(overall) 39 | cmds = parser.consume() 40 | old_commands = [] 41 | for cmd in cmds[0:1]: 42 | old_commands.extend(cmd.eval()) 43 | 44 | cmd = cmds[1].eval()[0] 45 | cmd.update_dependent_files(old_commands) 46 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 47 | '\nmodule load python\n# do something\n' 48 | '/usr/bin/bash somescript.sh -i mp.1.1.output' 49 | ' -o mp.2.1.output -fgh somefile.txt') 50 | 51 | 52 | def test_eval_4(): 53 | parser = Parser(overall) 54 | cmds = parser.consume() 55 | old_commands = [] 56 | for cmd in cmds[0:1]: 57 | old_commands.extend(cmd.eval()) 58 | 59 | cmd = cmds[1].eval()[1] 60 | cmd.update_dependent_files(old_commands) 61 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 62 | '\nmodule load python\n# do something\n' 63 | '/usr/bin/bash somescript.sh -i mp.1.2.output' 64 | ' -o mp.2.2.output -fgh somefile.txt') 65 | 66 | 67 | def test_eval_5(): 68 | parser = Parser(overall) 69 | cmds = parser.consume() 70 | old_commands = [] 71 | for cmd in cmds[0:2]: 72 | old_commands.extend(cmd.eval()) 73 | 74 | cmd = cmds[2].eval()[0] 75 | cmd.update_dependent_files(old_commands) 76 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 77 | '\nmodule load python\n# do something\n' 78 | '/usr/bin/ruby somescript.rb -i mp.2.1.output' 79 | ' >> somefile') 80 | 81 | 82 | def test_eval_6(): 83 | parser = Parser(overall) 84 | cmds = parser.consume() 85 | old_commands = [] 86 | for cmd in cmds[0:2]: 87 | old_commands.extend(cmd.eval()) 88 | 89 | cmd = cmds[2].eval()[1] 90 | cmd.update_dependent_files(old_commands) 91 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 92 | '\nmodule load python\n# do something\n' 93 | '/usr/bin/ruby somescript.rb -i mp.2.2.output' 94 | ' >> somefile') 95 | 96 | 97 | def test_eval_7(): 98 | parser = Parser(overall) 99 | cmds = parser.consume() 100 | old_commands = [] 101 | for cmd in cmds[0:2]: 102 | old_commands.extend(cmd.eval()) 103 | 104 | cmd = cmds[2].eval()[2] 105 | cmd.update_dependent_files(old_commands) 106 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 107 | '\nmodule load python\n# do something\n/usr/bin/ruby somescript.rb -i ' 108 | 'mp.1.1.output mp.1.2.output >> somefile') 109 | 110 | 111 | def test_eval_8(): 112 | parser = Parser(overall) 113 | cmds = parser.consume() 114 | old_commands = [] 115 | for cmd in cmds[0:3]: 116 | old_commands.extend(cmd.eval()) 117 | 118 | cmd = cmds[3].eval()[0] 119 | cmd.update_dependent_files(old_commands) 120 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 121 | '\nmodule load python\n# do something\n' 122 | 'cut -f *.counts > something.file') 123 | 124 | 125 | def test_eval_9(): 126 | parser = Parser(overall) 127 | cmds = parser.consume() 128 | old_commands = [] 129 | for cmd in cmds[0:4]: 130 | old_commands.extend(cmd.eval()) 131 | 132 | cmd = cmds[4].eval()[0] 133 | cmd.update_dependent_files(old_commands) 134 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 135 | '\nmodule load python\n# do something\n' 136 | 'paste *.counts > some.file # some.file') 137 | 138 | 139 | def test_eval_10(): 140 | parser = Parser(overall) 141 | cmds = parser.consume() 142 | old_commands = [] 143 | for cmd in cmds[0:5]: 144 | old_commands.extend(cmd.eval()) 145 | 146 | cmd = cmds[5].eval()[0] 147 | cmd.update_dependent_files(old_commands) 148 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 149 | '\nmodule load python\n# do something\n' 150 | './somescript somefile.1 somefile.2 ' 151 | 'somefile.3 somefile.4') 152 | 153 | 154 | def test_eval_11(): 155 | parser = Parser(overall) 156 | cmds = parser.consume() 157 | old_commands = [] 158 | for cmd in cmds[0:5]: 159 | old_commands.extend(cmd.eval()) 160 | 161 | cmd = cmds[5].eval()[1] 162 | cmd.update_dependent_files(old_commands) 163 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 164 | '\nmodule load python\n# do something\n' 165 | './somescript somefile.1.counts somefile.2.counts ' 166 | 'somefile.3.counts somefile.4.counts') 167 | 168 | 169 | def test_eval_12(): 170 | parser = Parser(overall) 171 | cmds = parser.consume() 172 | old_commands = [] 173 | for cmd in cmds[0:6]: 174 | old_commands.extend(cmd.eval()) 175 | 176 | cmd = cmds[6].eval()[0] 177 | cmd.update_dependent_files(old_commands) 178 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 179 | '\nmodule load python\n# do something\n' 180 | '/usr/bin/ruby somescript.rb -i somefile.1.counts') 181 | 182 | 183 | def test_eval_13(): 184 | parser = Parser(overall) 185 | cmds = parser.consume() 186 | old_commands = [] 187 | for cmd in cmds[0:6]: 188 | old_commands.extend(cmd.eval()) 189 | 190 | cmd = cmds[6].eval()[1] 191 | cmd.update_dependent_files(old_commands) 192 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 193 | '\nmodule load python\n# do something\n' 194 | '/usr/bin/ruby somescript.rb -i somefile.2.counts') 195 | 196 | 197 | def test_eval_14(): 198 | parser = Parser(overall) 199 | cmds = parser.consume() 200 | old_commands = [] 201 | for cmd in cmds[0:6]: 202 | old_commands.extend(cmd.eval()) 203 | 204 | cmd = cmds[6].eval()[2] 205 | cmd.update_dependent_files(old_commands) 206 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 207 | '\nmodule load python\n# do something\n' 208 | '/usr/bin/ruby somescript.rb -i somefile.3.counts') 209 | 210 | 211 | def test_eval_14(): 212 | parser = Parser(overall) 213 | cmds = parser.consume() 214 | old_commands = [] 215 | for cmd in cmds[0:6]: 216 | old_commands.extend(cmd.eval()) 217 | 218 | cmd = cmds[6].eval()[3] 219 | cmd.update_dependent_files(old_commands) 220 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 221 | '\nmodule load python\n# do something\n' 222 | '/usr/bin/ruby somescript.rb -i somefile.4.counts') 223 | 224 | 225 | def test_eval_15(): 226 | parser = Parser(overall) 227 | cmds = parser.consume() 228 | old_commands = [] 229 | for cmd in cmds[0:7]: 230 | old_commands.extend(cmd.eval()) 231 | 232 | cmd = cmds[7].eval()[0] 233 | cmd.update_dependent_files(old_commands) 234 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 235 | '\nmodule load python\n# do something\n' 236 | '/usr/bin/python somescript.py -i somefile.1.counts' 237 | ' somefile.2.counts somefile.3.counts somefile.4.counts # *.bam') 238 | 239 | 240 | def test_eval_16(): 241 | parser = Parser(overall) 242 | cmds = parser.consume() 243 | old_commands = [] 244 | for cmd in cmds[0:8]: 245 | old_commands.extend(cmd.eval()) 246 | 247 | cmd = cmds[8].eval()[0] 248 | cmd.update_dependent_files(old_commands) 249 | cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;' 250 | '\nmodule load python\n# do something\n' 251 | 'cat somefile.1.bam somefile.2.bam somefile.bam') 252 | 253 | 254 | def test_eval_16_deps(): 255 | parser = Parser(overall) 256 | cmds = parser.consume() 257 | old_commands = [] 258 | for cmd in cmds[0:8]: 259 | old_commands.extend(cmd.eval()) 260 | 261 | cmd = cmds[8].eval()[0] 262 | cmd.update_dependent_files(old_commands) 263 | cmd.depends_on.should.have.length_of(1) 264 | 265 | 266 | def test_eval_multiple_inputs(): 267 | parser = Parser(multiple_inputs) 268 | cmds = parser.consume() 269 | old_commands = [] 270 | 271 | cmd = cmds[0].eval()[0] 272 | print(cmd) 273 | cmd.update_dependent_files(old_commands) 274 | cmd.eval().should.equal('bash somescript somefile.1 --conf somefile.4 > ' 275 | 'mp.1.1.output') 276 | 277 | 278 | def test_multiple_outputs1(): 279 | parser = Parser(multiple_outputs) 280 | cmds = parser.consume() 281 | old_commands = [] 282 | 283 | cmd = cmds[0].eval()[0] 284 | cmd.update_dependent_files(old_commands) 285 | cmd.eval().should.equal('bash somescript somefile.1 --log' 286 | ' mp.1.1-1.output -r mp.1.1-2.output') 287 | 288 | 289 | def test_multiple_outputs2(): 290 | parser = Parser(multiple_outputs) 291 | cmds = parser.consume() 292 | old_commands = [] 293 | 294 | cmd = cmds[1].eval()[0] 295 | cmd.update_dependent_files(old_commands) 296 | cmd.eval().should.equal('python somescript.py somefile.4 somefile.5 ' 297 | 'somefile.6 --log mp.2.1-1.output -r mp.2.1-2.output ' 298 | '--output mp.2.1-3.output') 299 | 300 | 301 | def test_another_sample_pipeline(): 302 | parser = Parser(another_sample) 303 | 304 | cmds = parser.consume() 305 | 306 | old_commands = [] 307 | 308 | cmd = cmds[0].eval()[0] 309 | cmd.update_dependent_files(old_commands) 310 | cmd.eval().should.equal('# Trimmomatic\n' 311 | 'java -jar Trimmomatic-0.35/trimmomatic-0.35.jar ' 312 | 'PE somefile.1 somefile.2 mp.1.1-1.output mp.1.1-2.output ' 313 | 'mp.1.1-3.output mp.1.1-4.output ' 314 | 'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true ' 315 | 'LEADING:3 TRAILING:3') 316 | 317 | 318 | def test_another_sample_pipeline_1(): 319 | parser = Parser(another_sample) 320 | 321 | cmds = parser.consume() 322 | 323 | old_commands = [] 324 | for cmd in cmds[0:1]: 325 | old_commands.extend(cmd.eval()) 326 | 327 | cmd = cmds[1].eval()[0] 328 | cmd.update_dependent_files(old_commands) 329 | cmd.eval().should.equal('# Unzip the outputs from trimmomatic\n' 330 | 'gzip --stdout -d mp.1.1-1.output > ' 331 | 'mp.2.1.output') 332 | 333 | 334 | def test_another_sample_pipeline_1_deps(): 335 | parser = Parser(another_sample) 336 | 337 | cmds = parser.consume() 338 | 339 | old_commands = [] 340 | for cmd in cmds[0:1]: 341 | old_commands.extend(cmd.eval()) 342 | 343 | cmd = cmds[1].eval()[0] 344 | cmd.update_dependent_files(old_commands) 345 | cmd.depends_on.should.have.length_of(1) 346 | cmd.depends_on[0].should.equal('1.1') 347 | 348 | 349 | def test_another_sample_pipeline_2(): 350 | parser = Parser(another_sample) 351 | 352 | cmds = parser.consume() 353 | 354 | old_commands = [] 355 | for cmd in cmds[0:2]: 356 | old_commands.extend(cmd.eval()) 357 | 358 | cmd = cmds[2].eval()[0] 359 | cmd.update_dependent_files(old_commands) 360 | cmd.eval().should.equal('# Cutadapt\n# cutadapt needs unzipped fastq ' 361 | 'files\n~/.local/bin/cutadapt --cut 7 -o ' 362 | 'mp.3.1.output mp.2.1.output') 363 | 364 | 365 | def test_another_sample_pipeline_2(): 366 | parser = Parser(another_sample) 367 | 368 | cmds = parser.consume() 369 | 370 | old_commands = [] 371 | for cmd in cmds[0:2]: 372 | old_commands.extend(cmd.eval()) 373 | 374 | cmd = cmds[2].eval()[1] 375 | cmd.update_dependent_files(old_commands) 376 | cmd.eval().should.equal('# Cutadapt\n# cutadapt needs unzipped fastq ' 377 | 'files\n~/.local/bin/cutadapt --cut 7 -o ' 378 | 'mp.3.2.output mp.2.2.output') 379 | 380 | 381 | def test_long_running_1(): 382 | parser = Parser(long_running) 383 | 384 | old_commands = [] 385 | 386 | templates = parser.consume() 387 | 388 | cmd = templates[0].eval()[0] 389 | cmd.update_dependent_files(old_commands) 390 | cmd.eval().should.equal('cat somefile.1 > mp.1.1.output && sleep 1') 391 | 392 | 393 | def test_long_running_2(): 394 | parser = Parser(long_running) 395 | 396 | templates = parser.consume() 397 | 398 | old_commands = [] 399 | 400 | for cmd in templates[0:1]: 401 | old_commands.extend(cmd.eval()) 402 | cmd = templates[1].eval()[0] 403 | 404 | cmd.update_dependent_files(old_commands) 405 | cmd.eval().should.equal('cat mp.1.1.output && ' 406 | 'sleep 1') 407 | 408 | 409 | def test_full_output_file_name(): 410 | parser = Parser(full_output_file_name) 411 | 412 | templates = parser.consume() 413 | 414 | old_commands = [] 415 | 416 | cmd = templates[0].eval()[0] 417 | 418 | cmd.update_dependent_files(old_commands) 419 | cmd.eval().should.equal('gzip --stdout somefile.1 > mp.1.1.output.gz') 420 | 421 | 422 | def test_full_output_file_name_2(): 423 | parser = Parser(full_output_file_name) 424 | 425 | templates = parser.consume() 426 | 427 | old_commands = [] 428 | 429 | for cmd in templates[0:1]: 430 | old_commands.extend(cmd.eval()) 431 | cmd = templates[1].eval()[0] 432 | 433 | cmd.update_dependent_files(old_commands) 434 | cmd.eval().should.equal('cat mp.1.1.output.gz > mp.2.1.output.gz') 435 | 436 | 437 | def test_magical_glob(): 438 | parser = Parser(magical_glob) 439 | templates = parser.consume() 440 | old_commands = [] 441 | 442 | for cmd in templates[0:1]: 443 | old_commands.extend(cmd.eval()) 444 | 445 | with patch('metapipe.models.Input.files', new_callable=PropertyMock) as mock_files: 446 | mock_files.return_value = ['mp.1.1.output', 'mp.1.2.output'] 447 | cmd = templates[1].eval()[0] 448 | 449 | cmd.update_dependent_files(old_commands) 450 | cmd.eval().should.equal('cat mp.1.1.output mp.1.2.output > mp.2.1.output') 451 | 452 | 453 | def test_magical_glob2(): 454 | parser = Parser(magical_glob2) 455 | templates = parser.consume() 456 | old_commands = [] 457 | 458 | for cmd in templates[0:1]: 459 | old_commands.extend(cmd.eval()) 460 | 461 | with patch('metapipe.models.Input.files', new_callable=PropertyMock) as mock_files: 462 | mock_files.return_value = ['mp.1.1.output', 'mp.1.2.output'] 463 | cmd = templates[1].eval()[0] 464 | 465 | cmd.update_dependent_files(old_commands) 466 | cmd.eval().should.equal('cat mp.1.1.output > mp.2.1.output') 467 | -------------------------------------------------------------------------------- /test/test_command_template.py: -------------------------------------------------------------------------------- 1 | """ Tests for the output of the command template. """ 2 | 3 | import sure 4 | 5 | from .fixtures import * 6 | 7 | from metapipe.parser import Parser 8 | from metapipe.models import * 9 | 10 | 11 | def test_eval_1(): 12 | parser = Parser(overall) 13 | 14 | templates = parser.consume() 15 | 16 | vals = [CommentToken(['#PBS_O_WORKDIR=~/someuser']), 17 | CommentToken(['set -e;']), 18 | CommentToken(['module load python']), 19 | CommentToken(['# do something']), 20 | PathToken('python', '/usr/bin/python'), 'somescript.py', '-i', 21 | Input('1', 'somefile.1'), 22 | Input('2', 'somefile.2'), 23 | Input('3', 'somefile.3'), 24 | '-o', Output('1.1', 'metapipe.1.1.output'), 25 | '-fgh', 'somefile.txt'] 26 | cmd = templates[0].eval()[0] 27 | for i, part in enumerate(cmd.parts): 28 | vals[i].should.equal(part) 29 | 30 | 31 | def test_eval_2(): 32 | parser = Parser(overall) 33 | templates = parser.consume() 34 | 35 | vals = [CommentToken(['#PBS_O_WORKDIR=~/someuser']), 36 | CommentToken(['set -e;']), 37 | CommentToken(['module load python']), 38 | CommentToken(['# do something']), 39 | PathToken('python', '/usr/bin/python'), 'somescript.py', '-i', 40 | Input('4', 'somefile.4'), 41 | Input('5', 'somefile.5'), 42 | Input('6', 'somefile.6'), 43 | '-o', Output('1.2', 'metapipe.1.2.output'), 44 | '-fgh', 'somefile.txt'] 45 | cmd = templates[0].eval()[1] 46 | for i, part in enumerate(cmd.parts): 47 | vals[i].should.equal(part) 48 | 49 | 50 | def test_eval_multiple_inputs1(): 51 | parser = Parser(multiple_inputs) 52 | 53 | templates = parser.consume() 54 | 55 | vals = ['bash', 'somescript', 56 | Input('1', 'somefile.1'), '--conf', 57 | Input('4', 'somefile.4'), 58 | '>', Output('1.1', 'metapipe.1.1.output')] 59 | cmd = templates[0].eval()[0] 60 | for i, part in enumerate(cmd.parts): 61 | vals[i].should.equal(part) 62 | 63 | 64 | def test_eval_multiple_inputs2(): 65 | parser = Parser(multiple_inputs) 66 | 67 | templates = parser.consume() 68 | 69 | vals = ['bash', 'somescript', 70 | Input('2', 'somefile.2'), '--conf', 71 | Input('5', 'somefile.5'), 72 | '>', Output('1.2', 'metapipe.1.2.output')] 73 | cmd = templates[0].eval()[1] 74 | for i, part in enumerate(cmd.parts): 75 | vals[i].should.equal(part) 76 | 77 | 78 | def test_eval_multiple_inputs3(): 79 | parser = Parser(multiple_inputs) 80 | 81 | templates = parser.consume() 82 | 83 | vals = ['bash', 'somescript', 84 | Input('3', 'somefile.3'), '--conf', 85 | Input('6', 'somefile.6'), 86 | '>', Output('1.3', 'metapipe.1.3.output')] 87 | cmd = templates[0].eval()[2] 88 | for i, part in enumerate(cmd.parts): 89 | vals[i].should.equal(part) 90 | 91 | 92 | def test_eval_multiple_inputs4(): 93 | parser = Parser(multiple_inputs) 94 | 95 | templates = parser.consume() 96 | 97 | vals = ['python', 'somescript.py', 98 | Input('1', 'somefile.1'), 99 | Input('2', 'somefile.2'), 100 | Input('3', 'somefile.3'), '--conf', 101 | Input('4', 'somefile.4'), 102 | Input('5', 'somefile.5'), 103 | Input('6', 'somefile.6'), 104 | '>', Output('2.1', 'metapipe.2.1.output')] 105 | cmd = templates[1].eval()[0] 106 | for i, part in enumerate(cmd.parts): 107 | vals[i].should.equal(part) 108 | 109 | 110 | def test_eval_magic_input(): 111 | parser = Parser(magic_inputs) 112 | 113 | templates = parser.consume() 114 | 115 | vals = ['bash', 'somescript', 116 | Input('*.counts', 'somefile.1'), 117 | '>', Output('1.1', 'metapipe.1.1.output')] 118 | cmd = templates[0].eval()[0] 119 | for i, part in enumerate(cmd.parts): 120 | vals[i].should.equal(part) 121 | 122 | 123 | def test_multiple_outputs(): 124 | parser = Parser(multiple_outputs) 125 | 126 | templates = parser.consume() 127 | 128 | vals = ['bash', 'somescript', 129 | Input('1', 'somefile.1'), '--log', 130 | Output('1.1-1', 'metapipe.1.1-1.output'), '-r', 131 | Output('1.1-2', 'metapipe.1.1-2.output')] 132 | 133 | cmd = templates[0].eval()[0] 134 | for i, part in enumerate(cmd.parts): 135 | vals[i].should.equal(part) 136 | 137 | 138 | 139 | def test_another_sample_pipeline(): 140 | parser = Parser(another_sample) 141 | 142 | templates = parser.consume() 143 | 144 | 145 | vals = [CommentToken(['#', ' Trimmomatic']),'java', '-jar', 146 | PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'), 147 | 'PE', Input('1'), Input('2'), 148 | Output('1.1-1', 'metapipe.1.output'), Output('1.1-2', 'metapipe.1.output'), 149 | Output('1.1-3', 'metapipe.1.output'), Output('1.1-4', 'metapipe.1.output'), 150 | 'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true', 151 | 'LEADING:3', 'TRAILING:3' 152 | ] 153 | 154 | cmd = templates[0].eval()[0] 155 | for i, part in enumerate(cmd.parts): 156 | vals[i].should.equal(part) 157 | 158 | 159 | def test_another_sample_pipeline_1(): 160 | parser = Parser(another_sample) 161 | 162 | templates = parser.consume() 163 | 164 | 165 | vals = [CommentToken(['#', ' Unzip the outputs from trimmomatic']), 166 | 'gzip', '--stdout', '-d', 167 | Input('1.1-1'), '>', 168 | Output('2.1', 'metapipe.2.1.output')] 169 | 170 | cmd = templates[1].eval()[0] 171 | for i, part in enumerate(cmd.parts): 172 | vals[i].should.equal(part) 173 | 174 | 175 | def test_another_sample_pipeline_2(): 176 | parser = Parser(another_sample) 177 | 178 | templates = parser.consume() 179 | 180 | 181 | vals = [CommentToken(['#', ' Cutadapt']), 182 | CommentToken(['#', ' cutadapt needs unzipped fastq files']), 183 | PathToken('cutadapt', '~/.local/bin/cutadapt'), '--cut', '7', 184 | '-o', Output('3.1', 'metapipe.3.1.output'), Input('2.*')] 185 | 186 | cmd = templates[2].eval()[0] 187 | for i, part in enumerate(cmd.parts): 188 | vals[i].should.equal(part) 189 | 190 | 191 | def test_long_running_1(): 192 | parser = Parser(long_running) 193 | 194 | templates = parser.consume() 195 | 196 | 197 | vals = ['cat', Input('1', 'somefile.1'), '>', 198 | Output('1.1', 'metapipe.1.1.output'), '&&', 'sleep', '1'] 199 | 200 | cmd = templates[0].eval()[0] 201 | for i, part in enumerate(cmd.parts): 202 | vals[i].should.equal(part) 203 | 204 | 205 | def test_long_running_2(): 206 | parser = Parser(long_running) 207 | 208 | templates = parser.consume() 209 | 210 | 211 | vals = ['cat', Input('1.1', 'metapipe.1.1.output'), '&&', 'sleep', '1'] 212 | 213 | cmd = templates[1].eval()[0] 214 | for i, part in enumerate(cmd.parts): 215 | vals[i].should.equal(part) 216 | 217 | 218 | def test_output_file_name(): 219 | parser = Parser(full_output_file_name) 220 | 221 | templates = parser.consume() 222 | 223 | 224 | vals = ['gzip', '--stdout', Input('1', 'somefile.1'), '>', 225 | Output('1.1', 'metapipe.1.1.output.gz')] 226 | 227 | cmd = templates[0].eval()[0] 228 | for i, part in enumerate(cmd.parts): 229 | vals[i].should.equal(part) 230 | 231 | def test_magical_glob(): 232 | parser = Parser(magical_glob) 233 | templates = parser.consume() 234 | 235 | vals = ['cat', Input('1.*', ''), '>', 236 | Output('2.1', 'mp.2.1.output')] 237 | 238 | cmd = templates[1].eval()[0] 239 | for i, part in enumerate(cmd.parts): 240 | vals[i].should.equal(part) 241 | 242 | def test_magical_glob(): 243 | parser = Parser(magical_glob2) 244 | templates = parser.consume() 245 | 246 | vals = ['cat', Input('1.*', ''), '>', 247 | Output('2.1', 'mp.2.1.output')] 248 | 249 | cmd = templates[1].eval()[0] 250 | for i, part in enumerate(cmd.parts): 251 | vals[i].should.equal(part) 252 | -------------------------------------------------------------------------------- /test/test_command_template_factory.py: -------------------------------------------------------------------------------- 1 | """ Tests for the output of the command template factory. """ 2 | 3 | import sure 4 | 5 | from .fixtures import * 6 | 7 | from metapipe.parser import Parser 8 | from metapipe.models import * 9 | 10 | 11 | def test_multiple_inputs(): 12 | parser = Parser(multiple_inputs) 13 | 14 | cmds = parser.consume() 15 | for i, part in enumerate(cmds[0].parts): 16 | multiple_input_vals[i].should.equal(part) 17 | 18 | 19 | def test_multiple_outputs(): 20 | parser = Parser(multiple_outputs) 21 | 22 | cmds = parser.consume() 23 | for i, part in enumerate(cmds[0].parts): 24 | multiple_output_vals[i].should.equal(part) 25 | 26 | 27 | def test_full_sample_pipeline(): 28 | parser = Parser(full_sample_pipeline) 29 | 30 | cmds = parser.consume() 31 | 32 | vals = [CommentToken(['#', ' Trimmomatic']), 'java', '-jar', 33 | PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'), 34 | 'PE', [[Input('*R1_001.fastq.gz')]], [[Input('*R2_001.fastq.gz')]], 35 | Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'), 36 | Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'), 37 | PathToken('illuminaclip', 'ILLUMINACLIP:/gpfs/home/bhuvan/Programs/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10:2:true'), 38 | 'LEADING:3', 'TRAILING:3' 39 | ] 40 | 41 | for i, part in enumerate(cmds[0].parts): 42 | vals[i].should.equal(part) 43 | 44 | 45 | def test_another_sample_pipeline(): 46 | parser = Parser(another_sample) 47 | 48 | cmds = parser.consume() 49 | 50 | 51 | vals = [CommentToken(['#', ' Trimmomatic']), 'java', '-jar', 52 | PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'), 53 | 'PE', [[Input('1')]], [[Input('2')]], 54 | Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'), 55 | Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'), 56 | 'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true', 57 | 'LEADING:3', 'TRAILING:3' 58 | ] 59 | 60 | for i, part in enumerate(cmds[0].parts): 61 | vals[i].should.equal(part) 62 | 63 | 64 | def test_another_sample_pipeline_1(): 65 | parser = Parser(another_sample) 66 | 67 | cmds = parser.consume() 68 | 69 | 70 | vals = [CommentToken(['#', ' Unzip the outputs from trimmomatic']), 71 | 'gzip', '--stdout', '-d', 72 | [[Input('1.1-1')], [Input('1.1-3')]], '>', 73 | Output('2', 'metapipe.2.output')] 74 | 75 | for i, part in enumerate(cmds[1].parts): 76 | vals[i].should.equal(part) 77 | 78 | 79 | def test_another_sample_pipeline_2(): 80 | parser = Parser(another_sample) 81 | 82 | cmds = parser.consume() 83 | 84 | 85 | vals = [CommentToken(['#', ' Cutadapt']), 86 | CommentToken(['#', ' cutadapt needs unzipped fastq files']), 87 | PathToken('cutadapt', '~/.local/bin/cutadapt'), '--cut', '7', '-o', 88 | Output('3', 'metapipe.3.output'), 89 | [[Input('2.*')]]] 90 | 91 | for i, part in enumerate(cmds[2].parts): 92 | vals[i].should.equal(part) 93 | 94 | 95 | def test_long_running_1(): 96 | parser = Parser(long_running) 97 | 98 | cmds = parser.consume() 99 | 100 | 101 | vals = ['cat', [[Input('1', 'somefile.1')], 102 | [Input('2', 'somefile.2')], [Input('3', 'somefile.3')], 103 | [Input('4', 'somefile.4')]], '>', 104 | Output('1', 'metapipe.1.output'), '&&', 'sleep', '1'] 105 | 106 | for i, part in enumerate(cmds[0].parts): 107 | vals[i].should.equal(part) 108 | 109 | 110 | def test_long_running_2(): 111 | parser = Parser(long_running) 112 | 113 | cmds = parser.consume() 114 | 115 | 116 | vals = ['cat', [[Input('1.1')], [Input('1.2')]], '&&', 'sleep', '1'] 117 | 118 | for i, part in enumerate(cmds[1].parts): 119 | vals[i].should.equal(part) 120 | 121 | 122 | def test_long_running_2_deps(): 123 | parser = Parser(long_running) 124 | 125 | cmds = parser.consume() 126 | cmds[1]._dependencies.should.have.length_of(1) 127 | 128 | 129 | def test_one_step_pipeline(): 130 | parser = Parser(one_step_pipeline) 131 | cmds = parser.consume() 132 | 133 | vals = ['cut', 'somefile', '>', 'anotherfile'] 134 | for i, part in enumerate(cmds[0].parts): 135 | vals[i].should.equal(part) 136 | 137 | 138 | def test_one_step_pipeline(): 139 | parser = Parser(one_step_pipeline) 140 | cmds = parser.consume() 141 | 142 | vals = ['cut', 'somefile', '>', 'anotherfile'] 143 | for i, part in enumerate(cmds[0].parts): 144 | vals[i].should.equal(part) 145 | 146 | -------------------------------------------------------------------------------- /test/test_grammar.py: -------------------------------------------------------------------------------- 1 | """ A test of PyParsing. """ 2 | 3 | import sure 4 | 5 | from metapipe.models.grammar import Grammar 6 | 7 | from .fixtures import * 8 | 9 | 10 | def test_cmd(): 11 | res = Grammar.command.parseString(basic_cmd['text']) 12 | val = ['python somescript.py -i ', '-o ', '-fgh somefile.txt'] 13 | 14 | for i, c in enumerate(res.command): 15 | c.should.equal(val[i]) 16 | 17 | res._in[0][0][0].should.equal('1') 18 | res._in[0][0][2].should.equal('2') 19 | res._in[0][0][4].should.equal('3') 20 | res._in[0][0][6].should.equal('4') 21 | res._in[0][0][8].should.equal('5') 22 | res._in[0][0][10].should.equal('6') 23 | res._in[1][0][0].should.equal('o') 24 | 25 | 26 | def test_cmd_output_name(): 27 | res = Grammar.command.parseString(cmd_suggest_output) 28 | val = ['bash somescript ', '> '] 29 | 30 | for i, c in enumerate(res.command): 31 | c.should.equal(val[i]) 32 | 33 | res._in[1][0][0].should.equal('o.gz') 34 | 35 | 36 | def test_cmd_magic1(): 37 | res = Grammar.command.parseString(cmd_magic1) 38 | val = ['python somescript.py ', '> someout'] 39 | 40 | for i, c in enumerate(res.command): 41 | c.should.equal(val[i]) 42 | 43 | res._in[0][0][0].should.equal('*.counts') 44 | 45 | 46 | def test_cmd_magic2(): 47 | res = Grammar.command.parseString(cmd_magic2) 48 | val = ['python somescript.py ', '> someout'] 49 | 50 | for i, c in enumerate(res.command): 51 | c.should.equal(val[i]) 52 | res._in[0][0][0].should.equal('*.counts') 53 | 54 | 55 | def test_cmd_compund1(): 56 | res = Grammar.command.parseString(cmd_compound1) 57 | val = ['./somescript ', ['1', '2', '3', '4'], ['test/files/*.counts'], '<>'] 58 | 59 | for i, c in enumerate(res.command): 60 | c.should.equal(val[i]) 61 | res._in[0][0][0].should.equal('1') 62 | res._in[0][0][2].should.equal('2') 63 | res._in[0][0][4].should.equal('3') 64 | res._in[0][0][6].should.equal('4') 65 | res._in[0][0][8].should.equal('test/files/*.counts') 66 | 67 | 68 | def test_cmd_compund2(): 69 | res = Grammar.command.parseString(cmd_compound2) 70 | val = ['./somescript ', ['1', '<>', '2', '<>', '3', '<>', '4', '<>', 'test/files/*.counts', '<>']] 71 | 72 | for i, c in enumerate(res.command): 73 | c.should.equal(val[i]) 74 | res._in[0][0][0].should.equal('1') 75 | res._in[0][0][2].should.equal('2') 76 | res._in[0][0][4].should.equal('3') 77 | res._in[0][0][6].should.equal('4') 78 | res._in[0][0][8].should.equal('test/files/*.counts') 79 | 80 | 81 | def test_file(): 82 | res = Grammar.file.parseString(file) 83 | res.alias.should.equal('1') 84 | res.filename.should.equal('somedir/somefile.ext') 85 | 86 | 87 | def test_path(): 88 | res = Grammar.path.parseString(path) 89 | res.alias.should.equal('python') 90 | res.path.should.equal('/usr/bin/python') 91 | 92 | 93 | def test_overall(): 94 | res = Grammar.overall.parseString(overall) 95 | 96 | res['COMMANDS'][0][0].should.equal('python') 97 | res['COMMANDS'][0][1].should.equal(' somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt') 98 | 99 | 100 | def test_full_sample_pipeline(): 101 | res = Grammar.overall.parseString(full_sample_pipeline) 102 | 103 | res['COMMANDS'][0][0].should.equal('#') 104 | res['COMMANDS'][0][1].should.equal(' Trimmomatic') 105 | res['COMMANDS'][1][0].should.equal('java') 106 | 107 | 108 | def test_multiple_inputs(): 109 | res = Grammar.command.parseString(cmd_multiple_inputs) 110 | res._in.should.have.length_of(3) 111 | 112 | 113 | def test_multiple_close_inputs(): 114 | res = Grammar.command.parseString(cmd_multiple_close_inputs) 115 | res._in.should.have.length_of(6) 116 | 117 | 118 | def test_full_pipeline_1(): 119 | res = Grammar.command.parseString(cmd_using_multiple_out) 120 | res._in.should.have.length_of(2) 121 | 122 | 123 | def test_multiple_word_paths(): 124 | res = Grammar.overall.parseString(overall) 125 | path = Grammar.path.parseString(''.join(res['PATHS'][4])) 126 | path.path.should.equal('module load cat2; cat2') 127 | -------------------------------------------------------------------------------- /test/test_job.py: -------------------------------------------------------------------------------- 1 | """ Tests for the Job ABC 2 | 3 | author: Brian Schrader 4 | since: 2016-01-27 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import sure 10 | 11 | from metapipe.models import * 12 | 13 | from .fixtures import * 14 | 15 | 16 | def test_new_job(): 17 | alias, command, depends_on = 'test', Command([], []), [] 18 | job = Job(alias, command, depends_on) 19 | job.alias.should.equal(alias) 20 | job.command.should.equal(command) 21 | job.depends_on.should.equal(depends_on) 22 | 23 | 24 | -------------------------------------------------------------------------------- /test/test_local_job.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sure 4 | 5 | from metapipe.models import * 6 | 7 | from .fixtures import * 8 | 9 | 10 | def test_cmd(): 11 | alias = 'test' 12 | cmd = Command('testcmd', ['test', 'command']) 13 | job = LocalJob(alias, cmd) 14 | job.cmd.should.equal(['bash', 'metapipe.test.job']) 15 | -------------------------------------------------------------------------------- /test/test_parser.py: -------------------------------------------------------------------------------- 1 | """ Tests for the overall parser. """ 2 | 3 | from __future__ import print_function 4 | 5 | import sure 6 | 7 | from metapipe.parser import Parser 8 | from metapipe.models import Input, Output 9 | 10 | from .fixtures import * 11 | 12 | 13 | def test_no_paths(): 14 | parser = Parser(no_paths) 15 | res = parser.consume() 16 | 17 | parser.paths.should.have.length_of(0) 18 | 19 | 20 | def test_no_files(): 21 | parser = Parser(no_files) 22 | res = parser.consume() 23 | 24 | parser.files.should.have.length_of(0) 25 | 26 | 27 | def test_no_cmds(): 28 | parser = Parser(no_cmds) 29 | res = parser.consume.when.called.should.throw(ValueError) 30 | 31 | 32 | def test_consume_paths(): 33 | parser = Parser(overall) 34 | res = parser.consume() 35 | 36 | parser.paths[0].alias.should.equal('python') 37 | parser.paths[0].path.should.equal('/usr/bin/python') 38 | parser.paths[1].alias.should.equal('bash') 39 | parser.paths[1].path.should.equal('/usr/bin/bash') 40 | parser.paths[2].alias.should.equal('rb') 41 | parser.paths[2].path.should.equal('/usr/bin/ruby') 42 | 43 | 44 | def test_consume_files(): 45 | parser = Parser(overall) 46 | res = parser.consume() 47 | 48 | parser.files[0].alias.should.equal('1') 49 | parser.files[0].filename.should.equal('somefile.1') 50 | parser.files[1].alias.should.equal('2') 51 | parser.files[1].filename.should.equal('somefile.2') 52 | 53 | 54 | def test_consume_commands_1(): 55 | parser = Parser(overall) 56 | res = parser.consume() 57 | 58 | res[0].alias.should.equal('1') 59 | res[0].parts[4].should.equal(parser.paths[0]) 60 | res[0].parts[5].should.equal('somescript.py') 61 | res[0].parts[6].should.equal('-i') 62 | res[0].parts[7][0][0].should.equal(Input('1', filename='somefile.1')) 63 | res[0].parts[7][0][1].should.equal(Input('2', filename='somefile.2')) 64 | res[0].parts[7][0][2].should.equal(Input('3', filename='somefile.3')) 65 | res[0].parts[7][1][0].should.equal(Input('4', filename='somefile.4')) 66 | res[0].parts[7][1][1].should.equal(Input('5', filename='somefile.5')) 67 | res[0].parts[7][1][2].should.equal(Input('6', filename='somefile.6')) 68 | res[0].parts[8].should.equal('-o') 69 | res[0].parts[9].should.equal(Output('1')) 70 | res[0].parts[10].should.equal('-fgh') 71 | res[0].parts[11].should.equal('somefile.txt') 72 | res[0]._dependencies.should.have.length_of(0) 73 | 74 | 75 | def test_consume_commands_2(): 76 | parser = Parser(overall) 77 | res = parser.consume() 78 | 79 | res[1].alias.should.equal('2') 80 | res[1].parts[4].should.equal(parser.paths[1]) 81 | res[1].parts[5].should.equal('somescript.sh') 82 | res[1].parts[6].should.equal('-i') 83 | res[1].parts[7][0][0].should.equal(Input('1.1')) 84 | res[1].parts[7][1][0].should.equal(Input('1.2')) 85 | res[1].parts[8].should.equal('-o') 86 | res[1].parts[9].should.equal(Output('1')) 87 | res[1].parts[10].should.equal('-fgh') 88 | res[1].parts[11].should.equal('somefile.txt') 89 | res[1]._dependencies.should.have.length_of(1) 90 | res[1]._dependencies[0].alias.should.equal('1') 91 | 92 | 93 | def test_consume_commands_3(): 94 | parser = Parser(overall) 95 | res = parser.consume() 96 | 97 | res[2].alias.should.equal('3') 98 | res[2].parts[4].should.equal(parser.paths[2]) 99 | res[2].parts[5].should.equal('somescript.rb') 100 | res[2].parts[6].should.equal('-i') 101 | res[2].parts[7][0][0].should.equal(Input('2.1')) 102 | res[2].parts[7][1][0].should.equal(Input('2.2')) 103 | res[2].parts[7][2][0].should.equal(Input('1.1')) 104 | res[2].parts[7][2][1].should.equal(Input('1.2')) 105 | res[2].parts[8].should.equal('>>') 106 | res[2].parts[9].should.equal('somefile') 107 | res[2]._dependencies.should.have.length_of(2) 108 | 109 | aliases = [dep.alias for dep in res[2]._dependencies] 110 | aliases.should.contain('2') 111 | aliases.should.contain('1') 112 | 113 | 114 | def test_consume_commands_4(): 115 | parser = Parser(overall) 116 | res = parser.consume() 117 | 118 | res[3].alias.should.equal('4') 119 | res[3].parts[4].should.equal('cut') 120 | res[3].parts[5].should.equal('-f') 121 | res[3].parts[6].should.equal('*.counts') 122 | res[3].parts[7][0].should.equal('>') 123 | res[3].parts[8].should.equal('something.file') 124 | res[3]._dependencies.should.have.length_of(0) 125 | 126 | 127 | def test_consume_commands_5(): 128 | parser = Parser(overall) 129 | res = parser.consume() 130 | 131 | res[4].alias.should.equal('5') 132 | res[4].parts[4].should.equal('paste') 133 | res[4].parts[5].should.equal('*.counts') 134 | res[4].parts[6].should.equal('>') 135 | res[4].parts[9].should.equal(Output('', magic='some.file')) 136 | res[4]._dependencies.should.have.length_of(0) 137 | 138 | 139 | def test_consume_commands_6(): 140 | parser = Parser(overall) 141 | res = parser.consume() 142 | print(res[5].parts) 143 | res[5].alias.should.equal('6') 144 | res[5].parts[4].should.equal('./somescript') 145 | res[5].parts[5][0][0].should.equal(Input('1', 'somefile.1')) 146 | res[5].parts[5][0][1].should.equal(Input('2', 'somefile.2')) 147 | res[5].parts[5][0][2].should.equal(Input('3', 'somefile.3')) 148 | res[5].parts[5][1][0].should.equal(Input('4', '*.counts')) 149 | res[5]._dependencies.should.have.length_of(0) 150 | 151 | 152 | def test_consume_commands_7(): 153 | parser = Parser(overall) 154 | res = parser.consume() 155 | 156 | res[6].alias.should.equal('7') 157 | res[6].parts[4].should.equal(parser.paths[2]) 158 | res[6].parts[5].should.equal('somescript.rb') 159 | res[6].parts[6].should.equal('-i') 160 | res[6].parts[7][0][0].should.equal(Input('*.counts', 161 | '*.counts')) 162 | res[6].parts.should.have.length_of(8) 163 | res[6]._dependencies.should.have.length_of(0) 164 | 165 | 166 | def test_consume_commands_8(): 167 | parser = Parser(overall) 168 | res = parser.consume() 169 | 170 | res[7].alias.should.equal('8') 171 | res[7].parts[4].should.equal(parser.paths[0]) 172 | res[7].parts[5].should.equal('somescript.py') 173 | res[7].parts[6].should.equal('-i') 174 | res[7].parts[7][0][0].should.equal(Input('*.counts', 175 | filename='*.counts')) 176 | res[7].parts[9].should.equal(Output('', magic='*.bam')) 177 | res[7]._dependencies.should.have.length_of(0) 178 | 179 | 180 | def test_consume_commands_9(): 181 | parser = Parser(overall) 182 | res = parser.consume() 183 | 184 | res[8].alias.should.equal('9') 185 | res[8].parts[4].should.equal('cat') 186 | res[8].parts[5][0][0].should.equal(Input('*.bam', 187 | filename='*.bam')) 188 | res[8]._dependencies.should.have.length_of(1) 189 | 190 | 191 | def test_consume_full_sample_pipeline(): 192 | parser = Parser(full_sample_pipeline) 193 | res = parser.consume() 194 | 195 | res[0].alias.should.equal('1') 196 | res[0].parts[0].should.equal(CommentToken(['#', ' Trimmomatic'])) 197 | res[0].parts[1].should.equal('java') 198 | 199 | 200 | def test_consume_multiple_inputs(): 201 | parser = Parser(multiple_inputs) 202 | res = parser.consume() 203 | 204 | res[0].alias.should.equal('1') 205 | res[0].parts[0].should.equal('bash') 206 | res[0].parts[2][0][0].should.equal(Input('1', 207 | filename='somefile.1')) 208 | res[0].parts[2][1][0].should.equal(Input('2', 209 | filename='somefile.2')) 210 | res[0].parts[2][2][0].should.equal(Input('3', 211 | filename='somefile.3')) 212 | res[0].parts[4][0][0].should.equal(Input('4', 213 | filename='somefile.4')) 214 | res[0].parts[4][1][0].should.equal(Input('5', 215 | filename='somefile.5')) 216 | res[0].parts[4][2][0].should.equal(Input('6', 217 | filename='somefile.6')) 218 | res[0]._dependencies.should.have.length_of(0) 219 | 220 | 221 | def test_consume_global_opts(): 222 | parser = Parser(overall) 223 | res = parser.consume() 224 | print(parser.global_options) 225 | parser.global_options.should.have.length_of(2) 226 | -------------------------------------------------------------------------------- /test/test_pbs_job.py: -------------------------------------------------------------------------------- 1 | """ Tests for the Torque/PBS Job """ 2 | 3 | import sure 4 | from mock import Mock 5 | 6 | from .fixtures import * 7 | 8 | from metapipe.models import pbs_job 9 | 10 | 11 | def test_qstat_queued(): 12 | j = pbs_job.PBSJob('', None) 13 | pbs_job.call = Mock(return_value=pbs_job_qstat_queued) 14 | 15 | j.is_queued().should.equal(True) 16 | 17 | 18 | def test_qstat_running(): 19 | j = pbs_job.PBSJob('', None) 20 | pbs_job.call = Mock(return_value=pbs_job_qstat_running) 21 | 22 | j.is_running().should.equal(True) 23 | 24 | 25 | def test_qstat_exception(): 26 | j = pbs_job.PBSJob('', None) 27 | pbs_job.call = Mock(return_value=('', None)) 28 | 29 | j.is_running().should.equal(False) 30 | 31 | 32 | def test_submit(): 33 | j = pbs_job.PBSJob('', None) 34 | pbs_job.call = Mock(return_value=pbs_job_qsub) 35 | j.make = Mock() 36 | 37 | j.submit() 38 | j.id.should.equal('9974279') 39 | -------------------------------------------------------------------------------- /test/test_queue.py: -------------------------------------------------------------------------------- 1 | """ Tests for the runtime using a mock job. """ 2 | 3 | from __future__ import print_function 4 | import collections 5 | 6 | import sure 7 | 8 | from metapipe.parser import Parser 9 | from metapipe.runtime import Runtime 10 | from metapipe.models import * 11 | 12 | from .mocks import MockJob 13 | from .fixtures import * 14 | 15 | 16 | def test_repr(): 17 | q = BaseQueue() 18 | str(q).should.equal('') 19 | 20 | def test_on_end(): 21 | """ Ticks the queue when it's empty. """ 22 | q = BaseQueue() 23 | tick = q.tick() 24 | 25 | def test_progress_1(): 26 | q = ReportingJobQueue() 27 | q.push(MockJob('', None)) 28 | q.progress.should.equal(0) 29 | 30 | def test_progress_2(): 31 | q = ReportingJobQueue() 32 | q.push(MockJob('1.1', None)) 33 | tick = q.tick() 34 | for _ in range(10): 35 | next(tick) 36 | 37 | q.push(MockJob('2.2', None)) 38 | for _ in range(6): 39 | next(tick) 40 | 41 | q.push(MockJob('3.3', None)) 42 | q.push(MockJob('4.4', None)) 43 | for _ in range(4): 44 | next(tick) 45 | q.progress.should.equal(50) 46 | -------------------------------------------------------------------------------- /test/test_runtime.py: -------------------------------------------------------------------------------- 1 | """ Tests for the runtime using a mock job. """ 2 | 3 | from __future__ import print_function 4 | 5 | import sure 6 | 7 | from metapipe.parser import Parser 8 | from metapipe.runtime import Runtime 9 | from metapipe.models import * 10 | 11 | from .mocks import MockJob 12 | from .fixtures import * 13 | 14 | 15 | JOB_TYPES = { 16 | 'mock': MockJob, 17 | 'local': LocalJob, 18 | } 19 | 20 | 21 | # New Command Tests 22 | 23 | 24 | def test_get_new_commands_1(): 25 | parser = Parser(overall) 26 | cmds = parser.consume()[:1] 27 | 28 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 29 | new = pipeline.queue.queue 30 | new.should.have.length_of(1) 31 | 32 | 33 | def test_get_new_commands_2(): 34 | parser = Parser(overall) 35 | cmds = parser.consume()[:2] 36 | 37 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 38 | new = pipeline.queue.queue 39 | new.should.have.length_of(2) 40 | 41 | 42 | def test_get_new_commands_3(): 43 | parser = Parser(overall) 44 | cmds = parser.consume()[:3] 45 | 46 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 47 | new = pipeline.queue.queue 48 | new.should.have.length_of(3) 49 | 50 | 51 | def test_get_new_commands_4(): 52 | parser = Parser(overall) 53 | cmds = parser.consume()[:4] 54 | 55 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 56 | new = pipeline.queue.queue 57 | new.should.have.length_of(4) 58 | 59 | 60 | def test_get_new_commands_5(): 61 | parser = Parser(overall) 62 | cmds = parser.consume()[:5] 63 | 64 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 65 | new = pipeline.queue.queue 66 | new.should.have.length_of(5) 67 | 68 | 69 | def test_get_new_commands_6(): 70 | parser = Parser(overall) 71 | cmds = parser.consume()[:6] 72 | 73 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 74 | new = pipeline.queue.queue 75 | new.should.have.length_of(6) 76 | 77 | 78 | def test_get_new_commands_7(): 79 | parser = Parser(overall) 80 | cmds = parser.consume()[:7] 81 | 82 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 83 | new = pipeline.queue.queue 84 | new.should.have.length_of(7) 85 | 86 | 87 | def test_get_new_commands_8(): 88 | parser = Parser(overall) 89 | cmds = parser.consume()[:8] 90 | 91 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 92 | new = pipeline.queue.queue 93 | new.should.have.length_of(8) 94 | 95 | 96 | def test_get_new_commands_9(): 97 | parser = Parser(overall) 98 | cmds = parser.consume()[:9] 99 | 100 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock') 101 | new = pipeline.queue.queue 102 | new.should.have.length_of(9) 103 | 104 | 105 | # Run Tests 106 | 107 | 108 | def test_run_1(): 109 | parser = Parser(overall) 110 | cmds = parser.consume()[:1] 111 | 112 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 113 | iters = pipeline.run() 114 | iters.should.equal(8) 115 | 116 | 117 | def test_run_2(): 118 | parser = Parser(overall) 119 | cmds = parser.consume()[:2] 120 | 121 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 122 | iters = pipeline.run() 123 | iters.should.equal(15) 124 | 125 | 126 | def test_run_3(): 127 | parser = Parser(overall) 128 | cmds = parser.consume()[:3] 129 | 130 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 131 | iters = pipeline.run() 132 | iters.should.equal(23) 133 | 134 | 135 | def test_run_4(): 136 | parser = Parser(overall) 137 | cmds = parser.consume()[:4] 138 | 139 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 140 | iters = pipeline.run() 141 | iters.should.equal(23) 142 | 143 | 144 | def test_run_5(): 145 | parser = Parser(overall) 146 | cmds = parser.consume()[:5] 147 | 148 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 149 | iters = pipeline.run() 150 | iters.should.equal(23) 151 | 152 | 153 | def test_run_6(): 154 | parser = Parser(overall) 155 | cmds = parser.consume()[:6] 156 | 157 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 158 | iters = pipeline.run() 159 | iters.should.equal(23) 160 | 161 | 162 | def test_run_7(): 163 | parser = Parser(overall) 164 | cmds = parser.consume()[:7] 165 | 166 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 167 | iters = pipeline.run() 168 | iters.should.equal(24) 169 | 170 | 171 | def test_run_8(): 172 | parser = Parser(overall) 173 | cmds = parser.consume()[:8] 174 | 175 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 176 | iters = pipeline.run() 177 | iters.should.equal(24) 178 | 179 | 180 | def test_run_9(): 181 | parser = Parser(overall) 182 | cmds = parser.consume()[:9] 183 | 184 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 185 | iters = pipeline.run() 186 | iters.should.equal(25) 187 | 188 | 189 | def test_run_10(): 190 | parser = Parser(overall) 191 | cmds = parser.consume()[:10] 192 | 193 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 194 | iters = pipeline.run() 195 | iters.should.equal(25) 196 | 197 | 198 | def test_run_11(): 199 | parser = Parser(overall) 200 | cmds = parser.consume()[:11] 201 | 202 | pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01) 203 | iters = pipeline.run() 204 | iters.should.be.greater_than(15) 205 | 206 | 207 | def test_max_concurrent_jobs(): 208 | parser = Parser(concurrent) 209 | cmds = parser.consume() 210 | 211 | pipeline = Runtime(cmds, ReportingJobQueue, { 'local': MockJob }, 'local', sleep_time=0.01) 212 | iters = pipeline.run() 213 | iters.should.be.greater_than(30) 214 | -------------------------------------------------------------------------------- /test/test_sge_job.py: -------------------------------------------------------------------------------- 1 | """ Tests for the StarCluster Job """ 2 | 3 | import sure 4 | from mock import Mock 5 | 6 | from .fixtures import * 7 | 8 | from metapipe.models import sge_job 9 | 10 | 11 | def test_qstat_queued(): 12 | j = sge_job.SGEJob('', None) 13 | sge_job.call = Mock(return_value=sge_job_qstat_queued) 14 | 15 | j.is_queued().should.equal(True) 16 | 17 | 18 | def test_qstat_running(): 19 | j = sge_job.SGEJob('', None) 20 | sge_job.call = Mock(return_value=sge_job_qstat_running) 21 | 22 | j.is_running().should.equal(True) 23 | 24 | 25 | def test_submit(): 26 | j = sge_job.SGEJob('', None) 27 | sge_job.call = Mock(return_value=sge_job_qsub) 28 | j.make = Mock() 29 | 30 | j.submit() 31 | j.id.should.equal('1') 32 | -------------------------------------------------------------------------------- /test/test_template.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sure 4 | 5 | from metapipe.templates import env 6 | 7 | from .fixtures import * 8 | 9 | template = env.get_template('output_script.tmpl.sh') 10 | 11 | def test_make_script(): 12 | 13 | script = template.render(shell='/usr/bin/sh', temp='metapipe.script') 14 | script.should.equal("""#! /usr/bin/sh 15 | set -e; 16 | 17 | 18 | 19 | python - <