├── .coveragerc
├── .coveralls
├── .editorconfig
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── extending.md
    ├── getting_started.md
    ├── index.rst
    ├── make.bat
    ├── pipeline.gif
    ├── reporting.md
    ├── scripting.md
    ├── syntax.md
    └── what_is_metapipe.md
├── metapipe
    ├── __init__.py
    ├── __main__.py
    ├── app.py
    ├── models
    │   ├── __init__.py
    │   ├── command.py
    │   ├── command_template.py
    │   ├── command_template_factory.py
    │   ├── grammar.py
    │   ├── job.py
    │   ├── job_template.py
    │   ├── local_job.py
    │   ├── pbs_job.py
    │   ├── queue.py
    │   ├── reporting.py
    │   ├── sge_job.py
    │   └── tokens.py
    ├── parser.py
    ├── runtime.py
    └── templates
    │   ├── __init__.py
    │   ├── output_script.tmpl.sh
    │   └── progress-report.tmpl.html
├── requirements.txt
├── setup.py
└── test
    ├── __init__.py
    ├── files
        ├── mp.1.1-1.output
        ├── mp.1.1.job
        ├── mp.1.1.job_stderr
        ├── mp.1.1.job_stdout
        ├── mp.1.1.output
        ├── mp.1.1.output.gz
        ├── mp.1.1.output.testing_file
        ├── mp.1.2.job
        ├── mp.1.2.job_stderr
        ├── mp.1.2.job_stdout
        ├── mp.1.2.output
        ├── mp.1.2.output.testing_file
        ├── mp.2.1.job
        ├── mp.2.1.job_stderr
        ├── mp.2.1.job_stdout
        ├── mp.2.1.output
        ├── mp.2.2.output
        ├── mp.3.1.output
        ├── mp.3.2.output
        ├── mp.3.3.output
        ├── somefile.1
        ├── somefile.1.bam
        ├── somefile.1.counts
        ├── somefile.2
        ├── somefile.2.bam
        ├── somefile.2.counts
        ├── somefile.3
        ├── somefile.3.counts
        ├── somefile.4
        ├── somefile.4.counts
        ├── somefile.5
        ├── somefile.6
        ├── somefile.bam
        └── star.my_output
    ├── fixtures.py
    ├── mocks.py
    ├── test_app.py
    ├── test_command.py
    ├── test_command_template.py
    ├── test_command_template_factory.py
    ├── test_grammar.py
    ├── test_job.py
    ├── test_local_job.py
    ├── test_parser.py
    ├── test_pbs_job.py
    ├── test_queue.py
    ├── test_runtime.py
    ├── test_sge_job.py
    ├── test_template.py
    └── test_tokens.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = tests/*
3 | 


--------------------------------------------------------------------------------
/.coveralls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/.coveralls


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: http://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | # Unix-style newlines with a newline ending every file
 7 | [*]
 8 | end_of_line = lf
 9 | insert_final_newline = true
10 | 
11 | [*.{js,py}]
12 | charset = utf-8
13 | indent_size = space
14 | indent_size = 4
15 | 
16 | [{package.json,.travis.yml}]
17 | indent_style = space
18 | indent_size = 2
19 | 
20 | [*.html]
21 | indent_size = 2
22 | indent_style = space
23 | 
24 | [*.css]
25 | indent_size = 2
26 | indent_style = space
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | etc/
 2 | docs/_build
 3 | cover/
 4 | .coverage
 5 | *cache*
 6 | *egg*
 7 | build/
 8 | dist/
 9 | MANIFEST
10 | pipeline
11 | .metapipe
12 | **.mp
13 | **sample**
14 | *.sh
15 | notes/
16 | *.pyc
17 | *.swp
18 | htmlcov/
19 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | - '2.7'
 4 | - '3.4'
 5 | - '3.5'
 6 | before_script:
 7 | - pip install -r requirements.txt
 8 | script: nosetests --with-cov --cov-report html --cov-config .coveragerc  --cov metapipe
 9 |   --cov test
10 | after_success:
11 | - coveralls
12 | deploy:
13 |   provider: pypi
14 |   user: sonicrocketman
15 |   password:
16 |     secure: cfDJKNv1BMDsJ3NyOpjwVQwBwO3ZcDMVnEDmcNoS4bwONo/pF7+UFbNm/4+AG2Oo9W5u63YNoR/b1MajbaLd9gBCf7uymrOnLxVtFwq1JDb5BWOsegJwPtlxrKxjKjsBAp5BY7cqOivAWEJZuCi6XjNCyG+QSt0vXKqw4U2xqjfx3KtfSea1Hu0aN1YvFn1otod9faXPK80T/4AZ1Ytmauq12vzla1bLJz7djYS2ApBM+pEJodOhw9V53CknrBpm9SfgFRs5xOkKB7FY8Tq208AxDvcufxkwUsqzoipOzfGcBFhVQdREOOLbWUKmExufCHhyXWVmp7yrkLwGX4REWI+unq6SFU61mknVizLfphJ0DSLGWzcoPxwP3vk39q3PHP1XKojEkCicIb5C6r3YJqYpFtF83YjuEmJXew+9GdP1KWyWS5G1xslhxZvklAdkSPsn65GmABFsSNrMLyVdCTllGpgnjrpcbf1jEMP8MTp6+qc8YVjdEDtzgeJ8aoSyC6K9dRg95qixb1COqzTrF0N4LDKRGKKJrHFg+JXUZDSPYdpju5oz1ohm3/96SmdYGqL+ilO1RT3gxhlFV1X30AymAGUcVKCCLpj9dauQALeA16sKvtcFYVxjunjzJwz+OzM7AYlvFd+ak618x4btnmybsT0Nc93enT9seI+LSlE=
17 |   on:
18 |     tags: true
19 |     distributions: sdist bdist_wheel
20 |     branch: master
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include metapipe/templates/*
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Metapipe
 2 | 
 3 | *A pipeline generator and runtime system*
 4 | 
 5 | [![Build Status](https://travis-ci.org/TorkamaniLab/metapipe.svg)](https://travis-ci.org/TorkamaniLab/metapipe)
 6 | [![Coverage Status](https://coveralls.io/repos/github/TorkamaniLab/metapipe/badge.svg?branch=master)](https://coveralls.io/github/TorkamaniLab/metapipe?branch=master)
 7 | [![Python 2.7 Status](https://img.shields.io/badge/Python-2.7-brightgreen.svg)](https://img.shields.io/badge/Python-2.7-blue.svg)
 8 | [![Python 3.4 Status](https://img.shields.io/badge/Python-3.4-brightgreen.svg)](https://img.shields.io/badge/Python-3.4-blue.svg)
 9 | [![Python 3.5 Status](https://img.shields.io/badge/Python-3.5-brightgreen.svg)](https://img.shields.io/badge/Python-3.5-blue.svg)
10 | [![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/TorkamaniLab/metapipe/blob/master/LICENSE)
11 | 
12 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you.
13 | 
14 | <img src="docs/pipeline.gif" width="350px" align="right" />
15 | 
16 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines.
17 | 
18 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue!
19 | 
20 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem)
21 | 
22 | 
23 | ## How do I get it?
24 | 
25 | It's super simple!
26 | 
27 | `pip install metapipe`
28 | 
29 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5!
30 | 
31 | 
32 | ## What does it do?
33 | 
34 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change!
35 | 
36 | 
37 | ## Documentation & Help
38 | 
39 | [Check out the full documentation at ReadTheDocs &#8594;](http://metapipe.readthedocs.org/en/latest/index.html)
40 | 
41 | If you need help with Metapipe, or you'd like to chat about new features, get in touch by filing an issue, or at `#metapipe` on freenode!
42 | 
43 | 
44 | ### Here's a sample!
45 | 
46 | Let's say you have a few command-line tools that you want to string together into a pipeline. You used to have to know Python, Perl, Bash, or some other scripting language; now you can use Metapipe!
47 | 
48 | ```bash
49 | [COMMANDS]
50 | # Let's get the first and third columns from each of
51 | # our files, and put the output in seperate files.
52 | cut -f 1,3 {1||2||3} > {o}
53 | 
54 | # Once that's done, we'll need to take the output and 
55 | # run each through our custom processing script individually.
56 | # Here we can give a custom extension to the default output file.
57 | python3 my_script.py --output {o.processed.csv} -i {1.*||}
58 | 
59 | # Finally, we want to collect each sample and analyze 
60 | # them all together. We also need to use a custom version 
61 | # of Python for this.
62 | custom_python anaylysis.py -o {o.results.txt} {2.*}
63 | 
64 | [FILES]
65 | 1. controls.1.csv
66 | 2. controls.2.csv
67 | 3. controls.3.csv
68 | 
69 | [PATHS]
70 | custom_python ~/path/to/my/custom/python/version
71 | ```
72 | 
73 | Excluding the comments, this entire analysis pipeline is 13 lines long, and extremely readable! What's even better? If you want to change any steps, its super easy! That's the power of Metapipe!
74 | 
75 | 
76 | ## No Queue? No Problem!
77 | 
78 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail.
79 | 
80 | To run metapipe locally, see the app's help menu!
81 | 
82 | `metapipe --help`
83 | 
84 | 
85 | ## Other Queue Systems
86 | 
87 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!*
88 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/metapipe.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/metapipe.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/metapipe"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/metapipe"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # metapipe documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Jan 25 16:10:38 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | from recommonmark.parser import CommonMarkParser
 20 | 
 21 | source_parsers = {
 22 |     '.md': CommonMarkParser,
 23 | }
 24 | 
 25 | # Read the Docs theme
 26 | 
 27 | # on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org
 28 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 29 | 
 30 | if not on_rtd:  # only import and set the theme if we're building docs locally
 31 |     import sphinx_rtd_theme
 32 |     html_theme = 'sphinx_rtd_theme'
 33 |     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 34 | # otherwise, readthedocs.org uses their theme by default, so no need to specify it
 35 | 
 36 | # If extensions (or modules to document with autodoc) are in another directory,
 37 | # add these directories to sys.path here. If the directory is relative to the
 38 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 39 | #sys.path.insert(0, os.path.abspath('.'))
 40 | 
 41 | # -- General configuration ------------------------------------------------
 42 | 
 43 | # If your documentation needs a minimal Sphinx version, state it here.
 44 | #needs_sphinx = '1.0'
 45 | 
 46 | # Add any Sphinx extension module names here, as strings. They can be
 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 48 | # ones.
 49 | extensions = []
 50 | 
 51 | # Add any paths that contain templates here, relative to this directory.
 52 | #templates_path = ['_templates']
 53 | 
 54 | # The suffix(es) of source filenames.
 55 | # You can specify multiple suffix as a list of string:
 56 | # source_suffix = ['.rst', '.md']
 57 | source_suffix = ['.rst', '.md']
 58 | 
 59 | # The encoding of source files.
 60 | #source_encoding = 'utf-8-sig'
 61 | 
 62 | # The master toctree document.
 63 | master_doc = 'index'
 64 | 
 65 | # General information about the project.
 66 | project = 'metapipe'
 67 | copyright = '2016, Brian Schrader'
 68 | author = 'Brian Schrader'
 69 | 
 70 | # The version info for the project you're documenting, acts as replacement for
 71 | # |version| and |release|, also used in various other places throughout the
 72 | # built documents.
 73 | #
 74 | # The short X.Y version.
 75 | version = '0.1'
 76 | # The full version, including alpha/beta/rc tags.
 77 | release = '0.1'
 78 | 
 79 | # The language for content autogenerated by Sphinx. Refer to documentation
 80 | # for a list of supported languages.
 81 | #
 82 | # This is also used if you do content translation via gettext catalogs.
 83 | # Usually you set "language" from the command line for these cases.
 84 | language = None
 85 | 
 86 | # There are two options for replacing |today|: either, you set today to some
 87 | # non-false value, then it is used:
 88 | #today = ''
 89 | # Else, today_fmt is used as the format for a strftime call.
 90 | #today_fmt = '%B %d, %Y'
 91 | 
 92 | # List of patterns, relative to source directory, that match files and
 93 | # directories to ignore when looking for source files.
 94 | exclude_patterns = ['_build']
 95 | 
 96 | # The reST default role (used for this markup: `text`) to use for all
 97 | # documents.
 98 | #default_role = None
 99 | 
100 | # If true, '()' will be appended to :func: etc. cross-reference text.
101 | #add_function_parentheses = True
102 | 
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #add_module_names = True
106 | 
107 | # If true, sectionauthor and moduleauthor directives will be shown in the
108 | # output. They are ignored by default.
109 | #show_authors = False
110 | 
111 | # The name of the Pygments (syntax highlighting) style to use.
112 | pygments_style = 'sphinx'
113 | 
114 | # A list of ignored prefixes for module index sorting.
115 | #modindex_common_prefix = []
116 | 
117 | # If true, keep warnings as "system message" paragraphs in the built documents.
118 | #keep_warnings = False
119 | 
120 | # If true, `todo` and `todoList` produce output, else they produce nothing.
121 | todo_include_todos = False
122 | 
123 | 
124 | # -- Options for HTML output ----------------------------------------------
125 | 
126 | # The theme to use for HTML and HTML Help pages.  See the documentation for
127 | # a list of builtin themes.
128 | #html_theme = 'alabaster'
129 | 
130 | # Theme options are theme-specific and customize the look and feel of a theme
131 | # further.  For a list of options available for each theme, see the
132 | # documentation.
133 | #html_theme_options = {}
134 | 
135 | # Add any paths that contain custom themes here, relative to this directory.
136 | #html_theme_path = []
137 | 
138 | # The name for this set of Sphinx documents.  If None, it defaults to
139 | # "<project> v<release> documentation".
140 | #html_title = None
141 | 
142 | # A shorter title for the navigation bar.  Default is the same as html_title.
143 | #html_short_title = None
144 | 
145 | # The name of an image file (relative to this directory) to place at the top
146 | # of the sidebar.
147 | #html_logo = None
148 | 
149 | # The name of an image file (within the static path) to use as favicon of the
150 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
151 | # pixels large.
152 | #html_favicon = None
153 | 
154 | # Add any paths that contain custom static files (such as style sheets) here,
155 | # relative to this directory. They are copied after the builtin static files,
156 | # so a file named "default.css" will overwrite the builtin "default.css".
157 | html_static_path = ['_static']
158 | 
159 | # Add any extra paths that contain custom files (such as robots.txt or
160 | # .htaccess) here, relative to this directory. These files are copied
161 | # directly to the root of the documentation.
162 | #html_extra_path = []
163 | 
164 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
165 | # using the given strftime format.
166 | #html_last_updated_fmt = '%b %d, %Y'
167 | 
168 | # If true, SmartyPants will be used to convert quotes and dashes to
169 | # typographically correct entities.
170 | #html_use_smartypants = True
171 | 
172 | # Custom sidebar templates, maps document names to template names.
173 | #html_sidebars = {}
174 | 
175 | # Additional templates that should be rendered to pages, maps page names to
176 | # template names.
177 | #html_additional_pages = {}
178 | 
179 | # If false, no module index is generated.
180 | #html_domain_indices = True
181 | 
182 | # If false, no index is generated.
183 | #html_use_index = True
184 | 
185 | # If true, the index is split into individual pages for each letter.
186 | #html_split_index = False
187 | 
188 | # If true, links to the reST sources are added to the pages.
189 | #html_show_sourcelink = True
190 | 
191 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
192 | #html_show_sphinx = True
193 | 
194 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
195 | #html_show_copyright = True
196 | 
197 | # If true, an OpenSearch description file will be output, and all pages will
198 | # contain a <link> tag referring to it.  The value of this option must be the
199 | # base URL from which the finished HTML is served.
200 | #html_use_opensearch = ''
201 | 
202 | # This is the file name suffix for HTML files (e.g. ".xhtml").
203 | #html_file_suffix = None
204 | 
205 | # Language to be used for generating the HTML full-text search index.
206 | # Sphinx supports the following languages:
207 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
208 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
209 | #html_search_language = 'en'
210 | 
211 | # A dictionary with options for the search language support, empty by default.
212 | # Now only 'ja' uses this config value
213 | #html_search_options = {'type': 'default'}
214 | 
215 | # The name of a javascript file (relative to the configuration directory) that
216 | # implements a search results scorer. If empty, the default will be used.
217 | #html_search_scorer = 'scorer.js'
218 | 
219 | # Output file base name for HTML help builder.
220 | htmlhelp_basename = 'metapipedoc'
221 | 
222 | # -- Options for LaTeX output ---------------------------------------------
223 | 
224 | latex_elements = {
225 | # The paper size ('letterpaper' or 'a4paper').
226 | #'papersize': 'letterpaper',
227 | 
228 | # The font size ('10pt', '11pt' or '12pt').
229 | #'pointsize': '10pt',
230 | 
231 | # Additional stuff for the LaTeX preamble.
232 | #'preamble': '',
233 | 
234 | # Latex figure (float) alignment
235 | #'figure_align': 'htbp',
236 | }
237 | 
238 | # Grouping the document tree into LaTeX files. List of tuples
239 | # (source start file, target name, title,
240 | #  author, documentclass [howto, manual, or own class]).
241 | latex_documents = [
242 |     (master_doc, 'metapipe.tex', 'metapipe Documentation',
243 |      'Brian Schrader', 'manual'),
244 | ]
245 | 
246 | # The name of an image file (relative to this directory) to place at the top of
247 | # the title page.
248 | #latex_logo = None
249 | 
250 | # For "manual" documents, if this is true, then toplevel headings are parts,
251 | # not chapters.
252 | #latex_use_parts = False
253 | 
254 | # If true, show page references after internal links.
255 | #latex_show_pagerefs = False
256 | 
257 | # If true, show URL addresses after external links.
258 | #latex_show_urls = False
259 | 
260 | # Documents to append as an appendix to all manuals.
261 | #latex_appendices = []
262 | 
263 | # If false, no module index is generated.
264 | #latex_domain_indices = True
265 | 
266 | 
267 | # -- Options for manual page output ---------------------------------------
268 | 
269 | # One entry per manual page. List of tuples
270 | # (source start file, name, description, authors, manual section).
271 | man_pages = [
272 |     (master_doc, 'metapipe', 'metapipe Documentation',
273 |      [author], 1)
274 | ]
275 | 
276 | # If true, show URL addresses after external links.
277 | #man_show_urls = False
278 | 
279 | 
280 | # -- Options for Texinfo output -------------------------------------------
281 | 
282 | # Grouping the document tree into Texinfo files. List of tuples
283 | # (source start file, target name, title, author,
284 | #  dir menu entry, description, category)
285 | texinfo_documents = [
286 |     (master_doc, 'metapipe', 'metapipe Documentation',
287 |      author, 'metapipe', 'One line description of project.',
288 |      'Miscellaneous'),
289 | ]
290 | 
291 | # Documents to append as an appendix to all manuals.
292 | #texinfo_appendices = []
293 | 
294 | # If false, no module index is generated.
295 | #texinfo_domain_indices = True
296 | 
297 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
298 | #texinfo_show_urls = 'footnote'
299 | 
300 | # If true, do not generate a @detailmenu in the "Top" node's menu.
301 | #texinfo_no_detailmenu = False
302 | 


--------------------------------------------------------------------------------
/docs/extending.md:
--------------------------------------------------------------------------------
  1 | # Extending Metapipe
  2 | 
  3 | Metapipe provides 2 extension points for developers to extend it's functionality: custom Queues and custom Job Types. In most cases, custom queues are an advanced feature that most users and developers will not need to worry about, but if you must, it is there.
  4 | 
  5 | To add support for a queue system not included with metapipe, all you need to do is add a job type.
  6 | 
  7 | 
  8 | ## Custom Job types
  9 | 
 10 | All job types are subclasses of the `metapipe.models.Job` class. The base job class implements a lot of the functionality that is common between all job types, and has method stubs for the required functionality that needs to be implemented by any subclass. This section will cover what duty job subclasses have, how to subclass the main `Job` and what to fill in.
 11 | 
 12 | 
 13 | ### The Root Job Class
 14 | 
 15 | The code for the main job class can be found [here][job]. To create your own job type, simply subclass this as follows:
 16 | 
 17 | ```python
 18 | from metapipe.models import Job
 19 | 
 20 | class MyCustomJob(Job):
 21 | 
 22 |     def __repr__(self):
 23 |         return '<MyCustomJob: {}>'.format(self.cmd)
 24 | ```
 25 | 
 26 | There are 6 methods you need to fill in to have a complete job class. Your full job subclass should have the following form:
 27 | 
 28 | ```python 
 29 | class MyCustomJob(Job):
 30 | 
 31 |     def __repr__(self):
 32 |         return '<MyCustomJob: {}>'.format(self.cmd)
 33 | 
 34 |     # Override these...
 35 | 
 36 |     @property
 37 |     def cmd(self):
 38 |         """ Returns the command needed to submit the calculations. 
 39 |         Normally, this would be just running the command, however if 
 40 |         using a queue system, then this should return the command to 
 41 |         submit the command to the queue.
 42 |         """
 43 |         pass
 44 | 
 45 |     def submit(self):
 46 |         """ Submits the job to be run. If an external queue system is used,
 47 |         this method submits itself to that queue. Else it runs the job itself.
 48 |         :see: call
 49 |         """
 50 |         pass
 51 |         
 52 |     def is_running(self):
 53 |         """ Returns whether the job is running or not. """
 54 |         pass
 55 | 
 56 |     def is_queued(self):
 57 |         """ Returns whether the job is queued or not. 
 58 |         This function is only used if jobs are submitted to an external queue.
 59 |         """
 60 |         pass
 61 |         
 62 |     def is_complete(self):
 63 |         """ Returns whether the job is complete or not. """
 64 |         pass
 65 | 
 66 |     def is_error(self):
 67 |         """ Checks to see if the job errored out. """
 68 |         pass
 69 | ```
 70 | 
 71 | The duty of the job types is to submit the jobs when asked by the queue, and to inform the queue about the status of jobs. The queue needs to know when a job is running, queued, complete, or when an error has occurred. 
 72 | 
 73 | Each of the `is_*` callbacks should return a boolean value, and the cmd property should return the bash command (as an array of strings) that can be called to run the job. The job class has an attribute `filename` that contains the value of the bash script containing the job command (i.e. `['bash', self.filename]`).
 74 | 
 75 | **IMPORTANT:** All of the above handlers are required for custom job types to function properly.
 76 | 
 77 | Here is the code for the `cmd` property of the `PBSJob` class:
 78 | 
 79 | ```python
 80 | class PBSJob(Job):
 81 |     #...
 82 |     @property
 83 |     def cmd(self):
 84 |         return ['qsub', self.filename]
 85 |     #...
 86 | ```
 87 | 
 88 | The `submit` call should do any logic pertaining to submitting the job or tracking the number of total submissions. For example, here is the code for submitting a job to the PBS queue:
 89 | 
 90 | ```python
 91 | class PBSJob(Job):
 92 |     #...
 93 |     def submit(self, job):
 94 |         if self.attempts == 0:
 95 |             job.make()
 96 |         self.attempts += 1
 97 |         out = call(job.cmd)
 98 |         self.waiting = False
 99 |         self.id = out[:out.index('.')]
100 |     #...
101 | ```
102 | 
103 | As you can see, it keeps track of the number of times the job was submitted, and then calls the `call` function, provided in the root job module, to execute the job. Since PBS assigns job ids to each job at submission-time, it also captures that information and saves it for later use.
104 | 
105 | [job]: https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/models/job.py#L20
106 |  
107 |  
108 | ## Custom Queues
109 | 
110 | In the event that your analysis requires more control over the submission process for jobs, the metapipe module also allows for the customization of queue logic by subclassing `metapipe.models.Queue`. This section will cover how to subclass the root queue, but it is left to the reader to determine why you might want to do this. From personal experience, customizing the queue should be a very rare requirement.
111 | 
112 | 
113 | ### The Root Queue class
114 | 
115 | As is the case for custom job types, all queues inherit from the root Queue in `metapipe.models.Queue`, including the main `JobQueue` that is used by the metapipe command line tool.
116 | 
117 | To customize the response of the queue to various types of events subclass it and fill in the following methods, all the methods are optional so just omit any handlers that you don't need.
118 | 
119 | ```python
120 | class MyCustomQueue(object):
121 |                     
122 |     def __repr__(self):
123 |         return '<MyCustomQueue: jobs=%s>' % len(self.queue)
124 |                         
125 |     # Callbacks...
126 |         
127 |     def on_start(self):
128 |         """ Called when the queue is starting up. """
129 |         pass
130 | 
131 |     def on_end(self):
132 |         """ Called when the queue is shutting down. """
133 |         pass
134 |     
135 |     def on_locked(self):
136 |         """ Called when the queue is locked and no jobs can proceed. 
137 |         If this callback returns True, then the queue will be restarted,
138 |         else it will be terminated.
139 |         """
140 |         return True
141 |         
142 |     def on_tick(self):
143 |         """ Called when a tick of the queue is complete. """
144 |         pass
145 |     
146 |     def on_ready(self, job):
147 |         """ Called when a job is ready to be submitted. 
148 |         :param job: The given job that is ready.
149 |         """ 
150 |         pass
151 |         
152 |     def on_submit(self, job):
153 |         """ Called when a job has been submitted. 
154 |         :param job: The given job that has been submitted.
155 |         """ 
156 |         pass
157 |         
158 |     def on_complete(self, job):
159 |         """ Called when a job has completed. 
160 |         :param job: The given job that has completed.
161 |         """ 
162 |         pass
163 |         
164 |     def on_error(self, job):
165 |         """ Called when a job has errored. 
166 |         :param job: The given job that has errored.
167 |         """ 
168 |         pass
169 | ```
170 |  
171 |  
172 | ## Using Your Custom Code
173 | 
174 | Once you have subclassed and filled in the required code for your custom job type or queue, it is time to use your code. If your code adapts metapipe to work on a common computing platform, or system then please consider contributing to the metapipe project. This helps the rest of the community use a broader range of hardware to solve our problems!
175 | 
176 | 
177 | ### Building your custom pipeline
178 | 
179 | Use the following code to build your pipeline. This code is taken directly from [metapipe's app.py][app] tool which is the command line tool that metapipe uses to build pipelines.
180 | 
181 | ```python
182 | import MyCustomJob
183 | 
184 | JOB_TYPES = {
185 |     'my_custom_job_type': MyCustomJob
186 | }
187 | 
188 | parser = Parser(config)
189 | try:
190 |     command_templates = parser.consume()
191 | except ValueError as e:
192 |     raise SyntaxError('Invalid config file. \n%s' % e)
193 | 
194 | pipeline = Runtime(command_templates, JOB_TYPES, 'my_custom_job_type')
195 | ```
196 | 
197 | **IMPORTANT:** Adding custom queues is coming soon!
198 | 
199 | For more information on how to script metapipe once you have custom jobs, see [Scripting Metapipe](scripting.html)
200 |  
201 |  
202 | 


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This section contains a quick guide for installing, and using metapipe. For the  detailed syntax guide, see the [Metapipe Syntax][syntax]
 4 | 
 5 | [syntax]: syntax.html
 6 | 
 7 | 
 8 | ## Installation
 9 | 
10 | Metapipe is available on PyPi so installing is easy.
11 | 
12 | ```bash 
13 | $ pip install metapipe
14 | ```
15 | 
16 |  
17 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5!
18 | 
19 | 
20 | ## Using metapipe
21 | 
22 | By default, metapipe is both a command line tool and a Python module that can be used to build and run pipelines in code. This means that whether you're a user, or a developer Metapipe can be adapted to fit your needs.
23 | 
24 | To see metapipe's help menu, type the following, just as you'd expect.
25 | 
26 | ```bash 
27 | $ metapipe --help
28 | ```
29 | 
30 | ## Sample Pipeline
31 | 
32 | Here's a simple pipeline you can use for testing metapipe. Typically complex pipelines are used for things like bioinformatics, or some batch processing
33 | 
34 | But first, we need some sample files to work with. Run these commands to generate them.
35 | 
36 | ```bash 
37 | $ echo "SAMPLE DATA 1" > test_file.1.txt
38 | $ echo "SAMPLE DATA 2" > test_file.2.txt
39 | $ echo "SAMPLE DATA 3" > test_file.3.txt
40 | ```
41 | 
42 | Now that we have our data, let's analyze it! Here's our sample pipeline:
43 | 
44 | ```bash
45 | [COMMANDS]
46 | # Remove the ending number from each of our data files.
47 | cut -f 1-2 -d ' ' {1||2||3} > {o}
48 | 
49 | # Paste each of the files together and save it to a final output.
50 | # Since this is our last step, and only 1 output there's no need to have 
51 | # metapipe name the output file. We'll call it something ourselves. 
52 | paste {1.1,1.2,1.3} > final_output.txt
53 | 
54 | [FILES]
55 | 1. test_file.1.txt
56 | 2. test_file.2.txt
57 | 3. test_file.3.txt
58 | ```
59 | 
60 | Save that as `sample_pipeline.mp`, open a terminal, and `cd` to that directory.
61 | 
62 | 
63 | ### Run the sample pipeline locally
64 | 
65 | Local execution is the default for metapipe so you just need to specify your metapipe file and an output destination.
66 | 
67 | ```bash
68 | $ metapipe -o pipeline.sh sample_pipeline.mp
69 | ```
70 | 
71 | This will generate an output script named `pipeline.sh` which will run the pipeline. Simply run it to start your pipeline!
72 | 
73 | ```bash
74 | $ sh pipeline.sh
75 | ```
76 | 
77 | That's it! Metapipe will run in the foreground watching your jobs complete until everything finishes.
78 | 
79 | 
80 | ### Run the sample pipeline on PBS
81 | 
82 | Simply change the metapipe command to the following:
83 | 
84 | ```bash
85 | $ metapipe -o pipeline.sh -j pbs sample_pipeline.mp
86 | ```
87 | 
88 | Then simply submit metapipe as a job:
89 | 
90 | ```bash
91 | $ qsub pipeline.sh
92 | ```
93 | 
94 | Metapipe will run as a job on the PBS/Torque queue and submit other jobs to the same queue! It will keep tabs on the running jobs and submit them when they're ready, then exit when all jobs finish.
95 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. metapipe documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan 25 16:10:38 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to metapipe's documentation!
 7 | ====================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    what_is_metapipe.md
15 |    getting_started.md
16 |    syntax.md
17 |    scripting.md
18 |    extending.md
19 |    reporting.md
20 | 
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 1>NUL 2>NUL
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\metapipe.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\metapipe.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/docs/pipeline.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/docs/pipeline.gif


--------------------------------------------------------------------------------
/docs/reporting.md:
--------------------------------------------------------------------------------
 1 | # Measuring Pipeline Progress
 2 | 
 3 | While Metapipe runs your pipeline, it writes updates to `stdout`. These can be helpful, but most times it can be more helpful to get additional information in a more helpful format.
 4 | 
 5 | Metapipe provides a few different methods of visualizing the progress of your pipeline. These options are specified by the `--report-type` option.
 6 | 
 7 | 
 8 | ## Text based reporting
 9 | 
10 | ```
11 | --report-type text
12 | ```
13 | 
14 | This option is the default. Metapipe will write to `stdout` and this can be redirected to a file.
15 | 
16 | 
17 | ## HTML based reporting
18 | 
19 | ```
20 | --report-type html
21 | ```
22 | 
23 | Using this option, Metapipe will generate an HTML report of the pipeline as it runs. This static report represents the current state of the pipeline and what steps have already been completed. The report also includes a progress bar that reports a visualization of the rough progress of the pipeline.
24 | 
25 | **Important:** This progress indicator is based on the number of overall steps to be completed and represents the number of steps remaining. This has no correlation with the amount of time remaining, as that depends on the length of time each step takes.
26 | 


--------------------------------------------------------------------------------
/docs/scripting.md:
--------------------------------------------------------------------------------
 1 | # Scripting Metapipe
 2 | 
 3 | In addition to being a command line tool, metapipe is also a Python module. You can use this module to extend, or script metapipe to fit your specific uses. This section will discuss scripting metapipe, and building/running jobs using Python. For information on how to extend metapipe's builtin job types or queue system, see [Extending Metapipe](extending.html).
 4 | 
 5 | 
 6 | ## The Run Interface
 7 | 
 8 | The first, and easiest way to script Metapipe is by invoking it via the Python interface.
 9 | 
10 | 
11 | ```python
12 | from metapipe import run
13 | 
14 | config_text = get_config_text()
15 | run(config_text)
16 | ```
17 | 
18 | For detailed information, see the [run method's docstring](https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/app.py#L90).
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/syntax.md:
--------------------------------------------------------------------------------
  1 | # Metapipe Syntax
  2 | 
  3 | The syntax for Pipeline Config files is as follows.
  4 | 
  5 | 
  6 | ## Section Definitions
  7 | 
  8 | In each Metapipe file, there are a number of different sections you can specify. Each has their own purpose and function. Each section is denoted with a header in brackets at the top of the section.
  9 | 
 10 | All sections support comments, and in most sections, they are not parsed as input.
 11 | 
 12 | 
 13 | ### Commands
 14 | 
 15 | The commands section is the only required Metapipe config section. Specified by the `[COMMANDS]` header, this is where the various steps of the pipeline are specified. Commands are very similar to normal shell commands, and most shell commands are valid. The only difference is in the input/output of each command. For these sections, use Metapipe's command syntax to indicate the location and desired input and output.
 16 | 
 17 | **Example:**
 18 | 
 19 | ```bash
 20 | [COMMANDS]
 21 | # Here we cat a hardcoded input file into sed
 22 | # and redirect the output to a metapipe output token.
 23 | cat somefile.txt | sed 's/replace me/with me' > {o}
 24 | ```
 25 | 
 26 | Metapipe automatically creates a filename for the given output token and assigns that file an alias. The alias structure is `command_number.command_iteration-output_number`, where the output number is optional.
 27 | 
 28 | **Important:** Commands are *NOT* run sequentially. As commands are parsed, they are evaluated based on what inputs they take in and what outputs they generate. For more information: see [Command Structure](#command-structure). Commands are run as soon as they are deemed ready and any command that does not specify inputs via Metapipe's input patterns will be run immediately.
 29 | 
 30 | 
 31 | ### Paths
 32 | 
 33 | The paths section allows users to simplify their commands by creating aliases or short names to binaries. Paths are structured as a single word alias followed by a space and the rest of the line is considered the path. The paths section is denoted by the `[PATHS]` header.
 34 | 
 35 | ```bash
 36 | [COMMANDS]
 37 | # Here we've aliased Python. When the script is generated,
 38 | # the hardcoded path will be substituted in.
 39 | python2 my_script.py
 40 | 
 41 | # Here we're using the builtin python and using paths
 42 | # to simplify the arguments.
 43 | python my_script.py somefile
 44 | 
 45 | [PATHS]
 46 | python2 /usr/local/bin/python2.7.4
 47 | somefile /a/long/file/path
 48 | ```
 49 | 
 50 | Paths can also be used to create pseudo-variables for long configuration options. When doing this, it's recommended to use a bash-variable-like syntax because it reminds the reader that the variable is not a literal in the command.
 51 | 
 52 | **Reminder**: Paths are substituted in after the inputs have been processed. This means that `{}` characters are treated as literals and not as input markers.
 53 | 
 54 | ```bash
 55 | [COMMANDS]
 56 | # Here, the braces represent an output token,
 57 | # but the $OPTIONS variable will be evaluated
 58 | # as a literal {}
 59 | python my_script.py -o {o} $OPTIONS
 60 | 
 61 | [PATHS]
 62 | $OPTIONS -rfg --do-something --no-save --get --no-get -I {}
 63 | ```
 64 | 
 65 | 
 66 | ### Files
 67 | 
 68 | For a given pipeline, there is usually a set of input or auxiliary files. These files go through the analysis and other steps require the output of one command as the input for another. This is where most of the power of Metapipe's syntax comes into play. The files section is denoted as `[FILES]`.
 69 | 
 70 | Files are specified using a number followed by a period, and then the path to the given file. The number is the file's alias, and once that alias is assigned, it can be used in commands.
 71 | 
 72 | ```bash
 73 | [COMMANDS]
 74 | cat {1} | sed 's/replace me/with me' > {o}
 75 | cat {2} | cut -f 1 | sort | uniq > {o}
 76 | 
 77 | [FILES]
 78 | 1. somefile.1
 79 | 2. /path/to/somefile.2
 80 | 
 81 | ```
 82 | 
 83 | In this example, we use the aliases of files 1 and 2 to perform different analysis on each file. Then, when the input files need to change, they can be changed in the `[FILES]` section and the pipeline remains the same.
 84 | 
 85 | 
 86 | ### Job Options
 87 | 
 88 | The job options section, denoted by `[JOB_OPTIONS]`, is a section that allows the user to specify a global set of options for all jobs. This helps reduce pipeline redundancy.
 89 | 
 90 | ```bash
 91 | # Each of the commands in this pipeline need to
 92 | # be working in a scratch directory.
 93 | [COMMANDS]
 94 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
 95 | cat somefile.2.txt | sed 's/replace me/with you' > {o}
 96 | cat somefile.3.txt | sed 's/replace you/with me' > {o}
 97 | 
 98 | [JOB_OPTIONS]
 99 | set -e
100 | cd /var/my_project/
101 | 
102 | # This config will result in the following:
103 | # ------- Job 1 ---------
104 | set -e
105 | cd /var/my_project/
106 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
107 | ```
108 | 
109 | The set of commands in Job Options will be carried over to every job in the pipeline. This can be extremely useful when setting configuration comments for a queue system.
110 | 
111 | ```bash
112 | # Each of the commands needs 4GB of RAM
113 | [COMMANDS]
114 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
115 | cat somefile.2.txt | sed 's/replace me/with you' > {o}
116 | cat somefile.3.txt | sed 's/replace you/with me' > {o}
117 | 
118 | [JOB_OPTIONS]
119 | #PBS -l mem=4096mb
120 | ```
121 | 
122 | Job Options allow users to make their pipelines more clear and less redundant by allowing them to follow the [DRY][dry] principle.
123 | 
124 | [dry]: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself
125 | 
126 | 
127 | ## Command Structure
128 | 
129 | Now that all of the concepts and supported sections have been explained, it's time to take a look at the command structure and how to take advantage of Metapipe's advanced features.
130 | 
131 | 
132 | ### Input Patterns
133 | 
134 | Consider the following command:
135 | 
136 | ```bash
137 | [COMMANDS]
138 | python somescript {1||2||3}
139 | 
140 | [FILES]
141 | 1. some_file1.txt
142 | 2. some_file2.txt
143 | 3. some_file3.txt
144 | ```
145 | 
146 | This command will run the python script 3 times in parallel, once with each
147 | file specified. The output will look something like this:
148 | 
149 | ```bash
150 | # Output
151 | # ------
152 | 
153 | python somescript some_file1.txt
154 | python somescript some_file2.txt
155 | python somescript some_file3.txt
156 | ```
157 | 
158 | #### Running a script with multiple inputs
159 | 
160 | Let's say that you have a script with takes multiple files as input. In this
161 | case the syntax becomes:
162 | 
163 | ```bash
164 | [COMMANDS]
165 | python somescript {1,2,3}
166 | 
167 | [FILES]
168 | 1. some_file1.txt
169 | 2. some_file2.txt
170 | 3. some_file3.txt
171 | 
172 | # Output
173 | # ------
174 | 
175 | python somescript some_file1.txt some_file2.txt some_file3.txt
176 | ```
177 | 
178 | 
179 | ### Output Patterns
180 | 
181 | Whenever a script would take an explicit output filename you can use the output
182 | pattern syntax to tell metapipe where/what it should use.
183 | 
184 | ```bash
185 | [COMMANDS]
186 | python somescript -o {o} {1||2||3}
187 | 
188 | [FILES]
189 | 1. some_file1.txt
190 | 2. some_file2.txt
191 | 3. some_file3.txt
192 | 
193 | # Output
194 | # ------
195 | 
196 | python somescript -o mp.1.1.output some_file1.txt
197 | python somescript -o mp.1.2.output some_file2.txt
198 | python somescript -o mp.1.3.output some_file3.txt
199 | ```
200 | 
201 | Metapipe will generate the filename with the command's alias inside. An upcoming feature will provide more useful output names.
202 | 
203 | 
204 | #### Implicit or Hardcoded output
205 | 
206 | In a case where the script or command you want to use generates an output that
207 | is not passed through the command, but you need to use for another step in the
208 | pipeline, you can use output patterns to tell metapipe what to look for.
209 | 
210 | Consider this:
211 | 
212 | ```bash
213 | [COMMANDS]
214 | # This command doesn't provide an output filename
215 | # so metapipe can't automatically track it.
216 | ./do_count {1||2}
217 | ./analyze.sh {1.*}
218 | 
219 | [FILES]
220 | 1. foo.txt
221 | 2. bar.txt
222 | ```
223 | 
224 | This set of commands is invalid because the second command (`./analyze.sh`)
225 | doesn't know what the output of command 1 is because it isn't specified.
226 | The split command generates output based on the input filenames it is given.
227 | 
228 | Since we wrote the `./do_count` script, we know that it generates files with a
229 | `.counts` extension. But since we don't explicitly specify the files, in
230 | this case Metapipe cannot assume the file names generated by step 1 and this
231 | config file is invalid.
232 | 
233 | We can tell metapipe what the output should look like by using an output pattern.
234 | 
235 | ```bash
236 | [COMMANDS]
237 | # We've now told Metapipe what the output file name
238 | # will look like. It can now track the file as normal.
239 | ./do_counts {1||2} #{o:*.counts}
240 | ./analyze.sh {2.*}
241 | 
242 | [FILES]
243 | 1. foo.txt
244 | 2. bar.txt
245 | ```
246 | 
247 | The above example tells metapipe that the output of command 1, which is
248 | hardcoded in the script will have an output that ends in `.counts`. Now that
249 | the output of command 1 is known, command 2 will wait until command 1 finishes.
250 | 
251 | When the output marker has the form `{o}`, then metapipe will insert a
252 | pregenerated filename to the command. The output marker `{o:<pattern>}` means
253 | that the output of the script is *not* determined by the input of the script,
254 | but it *will* match given pattern. This means that later commands will be able
255 | to reference the files by name.
256 | 
257 | 
258 | ### Multiple Inputs and Outputs
259 | 
260 | Often times a given shell command will either take multiple dynamic files as input, or generate multiple files as output. In either case, metapipe provides a way to manage and track these files.
261 | 
262 | For multiple inputs, metapipe expects the number of inputs per command to be the same, and will iterate over them in order.
263 | 
264 | **Example:**
265 | 
266 | ```bash
267 | # Given the following:
268 | [COMMANDS]
269 | bash somescript {1||2||3} --conf {4||5||6}  > {o}
270 | 
271 | [FILES]
272 | 1. somefile.1
273 | 2. somefile.2
274 | 3. somefile.3
275 | 
276 | # Metapipe will return this:
277 | bash somescript somefile.1 --conf somefile.4  > mp.1.1.output
278 | bash somescript somefile.2 --conf somefile.5  > mp.1.2.output
279 | bash somescript somefile.3 --conf somefile.6  > mp.1.3.output
280 | ```
281 | 
282 | Metapipe will name the multiple output files as follows (in order from left to right):
283 | 
284 | `mp.{command_number}.{sub_command_number}-{output_number}`
285 | 
286 | **Example:**
287 | 
288 | ```bash
289 | # Given an input like the one below:
290 | [COMMANDS]
291 | bash somescript {1||2||3} --log {o} -r {o}
292 | 
293 | [FILES]
294 | 1. somefile.1
295 | 2. somefile.2
296 | 3. somefile.3
297 | 
298 | # metapipe will generate the following:
299 | bash somescript somefile.1 --log mp.1.1-1.output -r mp.1.1-2.output
300 | bash somescript somefile.2 --log mp.1.2-1.output -r mp.1.2-2.output
301 | bash somescript somefile.3 --log mp.1.3-1.output -r mp.1.3-2.output
302 | ```
303 | 
304 | 
305 | 
306 | ## Sample config.mp file
307 | 
308 | ```bash
309 | [COMMANDS]
310 | # Here we run our analysis script on every gzipped file
311 | # in the current directory and output the results to a file.
312 | python my_custom_script.py -o {o} {*.gz||}
313 | 
314 | # Take all the outputs of step 1 and feed them to cut.
315 | cut -f 1 {1.*||} > {o}
316 | 
317 | # Oh no! You hardcode the output name? No problem! Just tell metapipe
318 | # what the filename is.
319 | python my_other_custom_code.py {2.*} #{o:hardcoded_output.csv}
320 | 
321 | # Now you want to compare your results to some controls? Ok!
322 | # Metapipe wil compare your hardcoded_output to all 3
323 | # controls at the same time!
324 | python my_compare_script.py -o {o} $OPTIONS --compare {1||2||3} {3.1}
325 | 
326 | # Finally, you want to make some pretty graphs? No problem!
327 | # But wait! You want R 2.0 for this code? Just create an alias for R!
328 | Rscript my_cool_graphing_code.r {4.*} > {o}
329 | 
330 | [FILES]
331 | 1. controls.1.csv
332 | 2. controls.2.csv
333 | 3. controls.3.csv
334 | 
335 | [PATHS]
336 | Rscript ~/path/to/my/custom/R/version
337 | $OPTIONS -rne --get --no-get -v --V --log-level 1
338 | ```
339 | 


--------------------------------------------------------------------------------
/docs/what_is_metapipe.md:
--------------------------------------------------------------------------------
 1 | # Metapipe
 2 | 
 3 | *A pipeline for building analysis pipelines.*
 4 | 
 5 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you.
 6 | 
 7 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines.
 8 | 
 9 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue! 
10 | 
11 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem)
12 | 
13 | 
14 | ## What does it do?
15 | 
16 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change! 
17 | 
18 | A sample metapipe file can be found in [Metapipe Syntax](syntax.html)
19 | 
20 | 
21 | ## No Queue? No Problem!
22 | 
23 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail.
24 | 
25 | To run metapipe locally, see the app's help menu!
26 | 
27 | `metapipe --help`
28 | 
29 | 
30 | ## Other Queue Systems
31 | 
32 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!*
33 | 
34 | 


--------------------------------------------------------------------------------
/metapipe/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import run
2 | 


--------------------------------------------------------------------------------
/metapipe/__main__.py:
--------------------------------------------------------------------------------
1 | from app import main
2 | main()
3 | 


--------------------------------------------------------------------------------
/metapipe/app.py:
--------------------------------------------------------------------------------
  1 | """ A pipeline that generates analysis pipelines.
  2 | 
  3 | author: Brian Schrader
  4 | since: 2015-12-22
  5 | """
  6 | 
  7 | from __future__ import print_function
  8 | 
  9 | import argparse, pickle, os, sys
 10 | 
 11 | from multiprocessing import cpu_count
 12 | 
 13 | import pyparsing
 14 | 
 15 | from .parser import Parser
 16 | from .models import Command, LocalJob, PBSJob, SGEJob, \
 17 |     HtmlReportingJobQueue, TextReportingJobQueue
 18 | from .runtime import Runtime
 19 | from metapipe.templates import env
 20 | 
 21 | 
 22 | __version__ = '1.2-1'
 23 | 
 24 | 
 25 | PIPELINE_ALIAS = "metapipe.queue.job"
 26 | 
 27 | JOB_TYPES = {
 28 |     'local': LocalJob,
 29 |     'pbs': PBSJob,
 30 |     'sge': SGEJob,
 31 | }
 32 | 
 33 | QUEUE_TYPES = {
 34 |     'text': TextReportingJobQueue,
 35 |     'html': HtmlReportingJobQueue,
 36 | }
 37 | 
 38 | 
 39 | def main():
 40 |     """ Parses the command-line args, and calls run. """
 41 |     parser = argparse.ArgumentParser(
 42 |         description='A pipeline that generates analysis pipelines.')
 43 |     parser.add_argument('input', nargs='?',
 44 |                    help='A valid metapipe configuration file.')
 45 |     parser.add_argument('-o', '--output',
 46 |                    help='An output destination. If none is provided, the '
 47 |                    'results will be printed to stdout.',
 48 |                    default=sys.stdout)
 49 |     parser.add_argument('-t', '--temp',
 50 |                    help='A desired metapipe binary file. This is used to store '
 51 |                    'temp data between generation and execution. '
 52 |                    '(Default: "%(default)s")', default='.metapipe')
 53 |     parser.add_argument('-s', '--shell',
 54 |                    help='The path to the shell to be used when executing the '
 55 |                    'pipeline. (Default: "%(default)s)"',
 56 |                    default='/bin/bash')
 57 |     parser.add_argument('-r', '--run',
 58 |                    help='Run the pipeline as soon as it\'s ready.',
 59 |                    action='store_true')
 60 |     parser.add_argument('-n', '--name',
 61 |                    help='A name for the pipeline.',
 62 |                    default='')
 63 |     parser.add_argument('-j', '--job-type',
 64 |                    help='The destination for calculations (i.e. local, a PBS '
 65 |                    'queue on a cluster, etc).\nOptions: {}. '
 66 |                    '(Default: "%(default)s)"'.format(JOB_TYPES.keys()),
 67 |                    default='local')
 68 |     parser.add_argument('-p', '--max-jobs',
 69 |                    help='The maximum number of concurrent jobs allowed. '
 70 |                    'Defaults to maximum available cores.',
 71 |                    default=None)
 72 |     parser.add_argument('--report-type',
 73 |                    help='The output report type. By default metapipe will '
 74 |                    'print updates to the console. \nOptions: {}. '
 75 |                    '(Default: "%(default)s)"'.format(QUEUE_TYPES.keys()),
 76 |                    default='text')
 77 |     parser.add_argument('-v','--version',
 78 |                     help='Displays the current version of the application.',
 79 |                     action='store_true')
 80 |     args = parser.parse_args()
 81 | 
 82 |     if args.version:
 83 |         print('Version: {}'.format(__version__))
 84 |         sys.exit(0)
 85 | 
 86 |     try:
 87 |         with open(args.input) as f:
 88 |             config = f.read()
 89 |     except IOError:
 90 |         print('No valid config file found.')
 91 |         return -1
 92 | 
 93 |     run(config, args.max_jobs, args.output, args.job_type, args.report_type,
 94 |         args.shell, args.temp, args.run)
 95 | 
 96 | 
 97 | def run(config, max_jobs, output=sys.stdout, job_type='local',
 98 |         report_type='text', shell='/bin/bash', temp='.metapipe', run_now=False):
 99 |     """ Create the metapipe based on the provided input. """
100 |     if max_jobs == None:
101 |         max_jobs = cpu_count()
102 | 
103 |     parser = Parser(config)
104 |     try:
105 |         command_templates = parser.consume()
106 |     except ValueError as e:
107 |         raise SyntaxError('Invalid config file. \n%s' % e)
108 |     options = '\n'.join(parser.global_options)
109 | 
110 |     queue_type = QUEUE_TYPES[report_type]
111 |     pipeline = Runtime(command_templates,queue_type,JOB_TYPES,job_type,max_jobs)
112 | 
113 |     template = env.get_template('output_script.tmpl.sh')
114 |     with open(temp, 'wb') as f:
115 |         pickle.dump(pipeline, f, 2)
116 |         script = template.render(shell=shell,
117 |             temp=os.path.abspath(temp), options=options)
118 | 
119 |     if run_now:
120 |         output = output if output != sys.stdout else PIPELINE_ALIAS
121 |         submit_job = make_submit_job(shell, output, job_type)
122 |         submit_job.submit()
123 | 
124 |     try:
125 |         f = open(output, 'w')
126 |         output = f
127 |     except TypeError:
128 |         pass
129 | 
130 |     output.write(script)
131 |     f.close()
132 | 
133 | 
134 | def make_submit_job(shell, output, job_type):
135 |     """ Preps the metapipe main job to be submitted. """
136 |     run_cmd = [shell, output]
137 |     submit_command = Command(alias=PIPELINE_ALIAS, cmds=run_cmd)
138 |     submit_job = get_job(submit_command, job_type)
139 |     submit_job.make()
140 |     return submit_job
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     main()
145 | 


--------------------------------------------------------------------------------
/metapipe/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .command import Command
 2 | from .command_template import CommandTemplate
 3 | from .command_template_factory import *
 4 | from .job import Job, call
 5 | from .job_template import JobTemplate
 6 | from .local_job import LocalJob
 7 | from .grammar import Grammar
 8 | from .pbs_job import PBSJob
 9 | from .sge_job import SGEJob
10 | from .queue import HtmlReportingJobQueue, TextReportingJobQueue, \
11 |     ReportingJobQueue, BaseQueue
12 | from .tokens import FileToken, Input, Output, PathToken, CommentToken
13 | 


--------------------------------------------------------------------------------
/metapipe/models/command.py:
--------------------------------------------------------------------------------
 1 | """ A command model that can be easily transformed into jobs.
 2 | 
 3 | author: Brian Schrader
 4 | since: 2015-12-21
 5 | """
 6 | 
 7 | from .tokens import Input, Output, FileToken, PathToken, alias_pattern
 8 | 
 9 | 
10 | class Command(object):
11 | 
12 |     def __init__(self, alias, parts=[]):
13 |         self.alias = alias
14 |         self.parts = parts
15 |         if len(self.output_parts) > 1:
16 |             for i, output in enumerate(self.output_parts):
17 |                 output.alias = alias_pattern.format(command=self.alias,
18 |                     output_number=i+1)
19 |         else:
20 |             for output in self.output_parts:
21 |                 output.alias = self.alias
22 | 
23 |     def __repr__(self):
24 |         return '<Command: {}>'.format(self.alias)
25 | 
26 |     @property
27 |     def depends_on(self):
28 |         """ Returns a list of command template aliases that the given command
29 |         template depends on.
30 |         """
31 |         return [part.command_alias for part in self.input_parts
32 |             if part.command_alias is not None]
33 | 
34 |     @property
35 |     def input_parts(self):
36 |         """ Returns a list of the input tokens in the list of parts. """
37 |         return [part for part in self.file_parts
38 |             if isinstance(part, Input)]
39 | 
40 |     @property
41 |     def output_parts(self):
42 |         """ Returns a list of the output tokens in the list of parts. """
43 |         return [part for part in self.file_parts
44 |             if isinstance(part, Output)]
45 | 
46 |     @property
47 |     def file_parts(self):
48 |         """ Returns a list of the file tokens in the list of parts. """
49 |         file_parts = []
50 |         for part in self.parts:
51 |             try:
52 |                 for sub_part in part:
53 |                     if isinstance(sub_part, FileToken):
54 |                         file_parts.append(sub_part)
55 |             except TypeError:
56 |                 if isinstance(part, FileToken):
57 |                     file_parts.append(part)
58 |         return file_parts
59 | 
60 |     @property
61 |     def path_parts(self):
62 |         """ Returns a list of the path tokens in the list of parts. """
63 |         return [part for part in self.parts
64 |             if isinstance(part, PathToken)]
65 | 
66 |     def update_dependent_files(self, prev_commands=[]):
67 |         """ Update the command's dependencies based on the evaluated input and
68 |         output of previous commands.
69 |         """
70 |         for command in prev_commands:
71 |             for my_input in self.input_parts:
72 |                 for their_output in command.output_parts:
73 |                     if their_output == my_input:
74 |                         my_input.filename = their_output.eval()
75 | 
76 |     def eval(self):
77 |         """ Evaluate the given job and return a complete shell script to be run
78 |         by the job manager.
79 |         """
80 |         eval = []
81 |         for part in self.parts:
82 |             try:
83 |                 result = part.eval()
84 |             except AttributeError:
85 |                 result = part
86 |             if result[-1] != '\n':
87 |                 result += ' '
88 |             eval.append(result)
89 |         return ''.join(eval).strip()
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/metapipe/models/command_template.py:
--------------------------------------------------------------------------------
  1 | """ A template for creating commands.
  2 | 
  3 | author: Brian Schrader
  4 | since: 2016-01-13
  5 | """
  6 | 
  7 | import copy, collections
  8 | 
  9 | from .tokens import Input, Output, FileToken, PathToken, CommentToken
 10 | from .command import Command
 11 | 
 12 | 
 13 | class Ticker(object):
 14 | 
 15 |     def __init__(self, maxlen, value=0):
 16 |         self.maxlen = maxlen
 17 |         self.value = value
 18 | 
 19 |     def tick(self, n=1):
 20 |         self.value += n
 21 |         if self.value >= self.maxlen:
 22 |             self.value -= self.maxlen
 23 | 
 24 | 
 25 | class CommandTemplate(Command):
 26 | 
 27 |     def __init__(self, alias, parts=[], dependencies=[]):
 28 |         self.alias = alias
 29 |         self.parts = parts
 30 |         self._dependencies = dependencies
 31 | 
 32 |     def __repr__(self):
 33 |         return '<CommandTemplate: {}, {} part(s), {} dep(s)>'.format(self.alias,
 34 |             len(self.parts), len(self._dependencies))
 35 | 
 36 |     @property
 37 |     def depends_on(self):
 38 |         """ Returns a list of command template aliases that the given command
 39 |         template depends on.
 40 |         """
 41 |         return [dep.alias for dep in self._dependencies]
 42 | 
 43 |     @property
 44 |     def file_parts(self):
 45 |         """ Returns a list of the file tokens in the list of parts. """
 46 |         return _search_for_files(self.parts)
 47 | 
 48 |     def eval(self):
 49 |         """ Returns a list of Command objects that can be evaluated as their
 50 |         string values. Each command will track it's preliminary dependencies,
 51 |         but these values should not be depended on for running commands.
 52 |         """
 53 |         max_size = _get_max_size(self.parts)
 54 |         parts_list = _grow([[]], max_size-1)
 55 | 
 56 |         counter = Ticker(max_size)
 57 |         parts = self.parts[:]
 58 |         while len(parts) > 0:
 59 |             parts_list, counter = _get_parts_list(parts,
 60 |                 parts_list, counter)
 61 | 
 62 |         commands = []
 63 |         for i, parts in enumerate(parts_list):
 64 |             alias = self._get_alias(i+1)
 65 |             new_parts = copy.deepcopy(parts)
 66 |             commands.append(Command(alias=alias, parts=new_parts))
 67 |         return commands
 68 | 
 69 |     def _get_alias(self, index):
 70 |         """ Given an index, return the string alias for that command. """
 71 |         return '{}.{}'.format(self.alias, index)
 72 | 
 73 | 
 74 | def _get_parts_list(to_go, so_far=[[]], ticker=None):
 75 |     """ Iterates over to_go, building the list of parts. To provide
 76 |     items for the beginning, use so_far.
 77 |     """
 78 |     try:
 79 |         part = to_go.pop(0)
 80 |     except IndexError:
 81 |         return so_far, ticker
 82 | 
 83 |     # Lists of input groups
 84 |     if isinstance(part, list) and any(isinstance(e, list) for e in part):
 85 |         while len(part) > 0:
 86 |             so_far, ticker = _get_parts_list(part, so_far, ticker)
 87 |             ticker.tick()
 88 |     # Input Group
 89 |     elif isinstance(part, list) and any(isinstance(e, Input) for e in part):
 90 |         while len(part) > 0:
 91 |             so_far, ticker = _get_parts_list(part, so_far, ticker)
 92 |     # Magic Inputs
 93 |     elif isinstance(part, Input) and part.is_magic:
 94 |         inputs = part.eval()
 95 |         while len(inputs) > 0:
 96 |             so_far, ticker = _get_parts_list(inputs, so_far, ticker)
 97 |             ticker.tick()
 98 |     # Normal inputs
 99 |     elif isinstance(part, Input) and not part.is_magic:
100 |         so_far[ticker.value].append(part)
101 |     # Everything else
102 |     else:
103 |         so_far = _append(so_far, part)
104 | 
105 |     return so_far, ticker
106 | 
107 | 
108 | def _get_max_size(parts, size=1):
109 |     """ Given a list of parts, find the maximum number of commands
110 |     contained in it.
111 |     """
112 |     max_group_size = 0
113 |     for part in parts:
114 |         if isinstance(part, list):
115 |             group_size = 0
116 |             for input_group in part:
117 |                 group_size += 1
118 | 
119 |             if group_size > max_group_size:
120 |                 max_group_size = group_size
121 | 
122 |     magic_size = _get_magic_size(parts)
123 |     return max_group_size * magic_size
124 | 
125 | 
126 | def _get_magic_size(parts, size=1):
127 |     for part in parts:
128 |         if isinstance(part, Input) and part.is_magic:
129 |             magic_size = len(part.eval())
130 |             if magic_size > size:
131 |                 return magic_size
132 |         elif isinstance(part, list):
133 |             size = _get_magic_size(part, size)
134 |     return size
135 | 
136 | 
137 | def _append(so_far, item):
138 |     """ Appends an item to all items in a list of lists. """
139 |     for sub_list in so_far:
140 |         sub_list.append(item)
141 |     return so_far
142 | 
143 | 
144 | def _grow(list_of_lists, num_new):
145 |     """ Given a list of lists, and a number of new lists to add, copy the
146 |     content of the first list into the new ones, and add them to the list
147 |     of lists.
148 |     """
149 |     first = list_of_lists[0]
150 |     for i in range(num_new):
151 |         list_of_lists.append(copy.deepcopy(first))
152 |     return list_of_lists
153 | 
154 | 
155 | def _search_for_files(parts):
156 |     """ Given a list of parts, return all of the nested file parts. """
157 |     file_parts = []
158 |     for part in parts:
159 |         if isinstance(part, list):
160 |             file_parts.extend(_search_for_files(part))
161 |         elif isinstance(part, FileToken):
162 |             file_parts.append(part)
163 |     return file_parts
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/metapipe/models/command_template_factory.py:
--------------------------------------------------------------------------------
  1 | """ A factory for building individual commands based on the full list
  2 | of commands and inputs.
  3 | 
  4 | author: Brian Schrader
  5 | since: 2016-01-12
  6 | """
  7 | 
  8 | 
  9 | from .tokens import Input, Output, PathToken, CommentToken
 10 | from .command import Command
 11 | from .command_template import CommandTemplate
 12 | from .grammar import OR_TOKEN, AND_TOKEN
 13 | 
 14 | 
 15 | def get_command_templates(command_tokens, file_tokens=[], path_tokens=[],
 16 |     job_options=[]):
 17 |     """ Given a list of tokens from the grammar, return a
 18 |     list of commands.
 19 |     """
 20 |     files = get_files(file_tokens)
 21 |     paths = get_paths(path_tokens)
 22 |     job_options = get_options(job_options)
 23 | 
 24 |     templates = _get_command_templates(command_tokens, files, paths,
 25 |         job_options)
 26 | 
 27 |     for command_template in templates:
 28 |         command_template._dependencies = _get_prelim_dependencies(
 29 |             command_template, templates)
 30 |     return templates
 31 | 
 32 | 
 33 | def get_files(file_tokens, cwd=None):
 34 |     """ Given a list of parser file tokens, return a list of input objects
 35 |     for them.
 36 |     """
 37 |     if not file_tokens:
 38 |         return []
 39 | 
 40 |     token = file_tokens.pop()
 41 |     try:
 42 |         filename = token.filename
 43 |     except AttributeError:
 44 |         filename = ''
 45 | 
 46 |     if cwd:
 47 |         input = Input(token.alias, filename, cwd=cwd)
 48 |     else:
 49 |         input = Input(token.alias, filename)
 50 | 
 51 |     return [input] + get_files(file_tokens)
 52 | 
 53 | 
 54 | def get_paths(path_tokens):
 55 |     """ Given a list of parser path tokens, return a list of path objects
 56 |     for them.
 57 |     """
 58 |     if len(path_tokens) == 0:
 59 |         return []
 60 | 
 61 |     token = path_tokens.pop()
 62 |     path = PathToken(token.alias, token.path)
 63 |     return [path] + get_paths(path_tokens)
 64 | 
 65 | 
 66 | def get_options(options):
 67 |     """ Given a list of options, tokenize them. """
 68 |     return _get_comments(options)
 69 | 
 70 | 
 71 | # Internal Implementation
 72 | 
 73 | 
 74 | def _get_command_templates(command_tokens, files=[], paths=[], job_options=[],
 75 |     count=1):
 76 |     """ Reversivly create command templates. """
 77 |     if not command_tokens:
 78 |         return []
 79 | 
 80 |     comment_tokens, command_token = command_tokens.pop()
 81 |     parts = []
 82 | 
 83 |     parts += job_options + _get_comments(comment_tokens)
 84 |     for part in command_token[0]:
 85 |         # Check for file
 86 |         try:
 87 |             parts.append(_get_file_by_alias(part, files))
 88 |             continue
 89 |         except (AttributeError, ValueError):
 90 |             pass
 91 | 
 92 |         # Check for path/string
 93 |         for cut in part.split():
 94 |             try:
 95 |                 parts.append(_get_path_by_name(cut, paths))
 96 |                 continue
 97 |             except ValueError:
 98 |                 pass
 99 | 
100 |             parts.append(cut)
101 | 
102 |     command_template = CommandTemplate(alias=str(count), parts=parts)
103 |     [setattr(p, 'alias', command_template.alias)
104 |         for p in command_template.output_parts]
105 |     return [command_template] + _get_command_templates(command_tokens,
106 |         files, paths, job_options, count+1)
107 | 
108 | 
109 | def _get_prelim_dependencies(command_template, all_templates):
110 |     """ Given a command_template determine which other templates it
111 |     depends on. This should not be used as the be-all end-all of
112 |     dependencies and before calling each command, ensure that it's
113 |     requirements are  met.
114 |     """
115 |     deps = []
116 |     for input in command_template.input_parts:
117 |         if '.' not in input.alias:
118 |             continue
119 |         for template in all_templates:
120 |             for output in template.output_parts:
121 |                 if input.fuzzy_match(output):
122 |                     deps.append(template)
123 |                     break
124 |     return list(set(deps))
125 | 
126 | 
127 | def _get_file_by_alias(part, files):
128 |     """ Given a command part, find the file it represents. If not found,
129 |     then returns a new token representing that file.
130 |     :throws ValueError: if the value is not a command file alias.
131 |     """
132 |     # Make Output
133 |     if _is_output(part):
134 |         return Output.from_string(part.pop())
135 | 
136 |     # Search/Make Input
137 |     else:
138 |         inputs = [[]]
139 | 
140 |         if part.magic_or:
141 |             and_or = 'or'
142 |         else:
143 |             and_or = 'and'
144 | 
145 |         for cut in part.asList():
146 |             if cut == OR_TOKEN:
147 |                 inputs.append([])
148 |                 continue
149 |             if cut == AND_TOKEN:
150 |                 continue
151 | 
152 |             input = Input(cut, filename=cut, and_or=and_or)
153 |             for file in files:
154 |                 if file.alias == cut:
155 |                     # Override the filename
156 |                     input.filename = file.filename
157 |                     inputs[-1].append(input)
158 |                     break
159 |             else:
160 |                 inputs[-1].append(input)
161 | 
162 | 
163 |         return [input for input in inputs if input]
164 | 
165 | 
166 | def _get_path_by_name(part, paths):
167 |     """ Given a command part, find the path it represents.
168 |     :throws ValueError: if no valid file is found.
169 |     """
170 |     for path in paths:
171 |         if path.alias == part:
172 |             return path
173 |     raise ValueError
174 | 
175 | def _get_comments(parts):
176 |     """ Given a list of parts representing a list of comments, return the list
177 |     of comment tokens
178 |     """
179 |     return [CommentToken(part) for part in parts]
180 | 
181 | 
182 | def _is_output(part):
183 |     """ Returns whether the given part represents an output variable. """
184 |     if part[0].lower() == 'o':
185 |         return True
186 |     elif part[0][:2].lower() == 'o:':
187 |         return True
188 |     elif part[0][:2].lower() == 'o.':
189 |         return True
190 |     else:
191 |         return False
192 | 
193 | 


--------------------------------------------------------------------------------
/metapipe/models/grammar.py:
--------------------------------------------------------------------------------
  1 | """ Grammars for various parts of the input file. """
  2 | 
  3 | from pyparsing import *
  4 | 
  5 | 
  6 | approved_printables = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`|~'
  7 | 
  8 | lbrack = Literal('[').suppress()
  9 | rbrack = Literal(']').suppress()
 10 | pound = Literal('#')
 11 | 
 12 | OR_TOKEN = '<<OR>>'
 13 | AND_TOKEN = '<<AND>>'
 14 | 
 15 | class classproperty(property):
 16 |     def __get__(self, cls, owner):
 17 |         return self.fget.__get__(None, owner)()
 18 | 
 19 | 
 20 | class Grammar(object):
 21 |     """ A container class for the various grammars in the input files. """
 22 | 
 23 |     _section = lbrack + Word(alphas+'_') + rbrack
 24 |     line = ~lbrack + Word(printables) + restOfLine
 25 |     _non_comment_line = ~pound + Group(Word(printables) + restOfLine)
 26 | 
 27 |     __command_input_output = (
 28 |         Suppress('{') +
 29 |         OneOrMore(
 30 |         Group(OneOrMore(
 31 |             Combine(
 32 |                 Word(alphanums+'.*:/_-') +
 33 |                 Optional('.' + Word(nums))
 34 |             ) +
 35 |             Optional((
 36 |                 Suppress(',' + FollowedBy('}')) ^
 37 |                 Suppress(',')
 38 |             ).addParseAction(replaceWith(AND_TOKEN)).setResultsName('_and')) +
 39 |             Optional(
 40 |                 ('||' + FollowedBy('}')).addParseAction(
 41 |                     replaceWith(OR_TOKEN)).setResultsName('magic_or') ^
 42 |                 Suppress('||').addParseAction(
 43 |                     replaceWith(OR_TOKEN)).setResultsName('_or')
 44 |             )
 45 |         ))) +
 46 |         Suppress('}')
 47 |         )
 48 | 
 49 |     @classproperty
 50 |     @staticmethod
 51 |     def overall():
 52 |         """ The overall grammer for pulling apart the main input files. """
 53 |         return ZeroOrMore(Grammar.comment) + Dict(ZeroOrMore(Group(
 54 |             Grammar._section + ZeroOrMore(Group(Grammar.line)))
 55 |             ))
 56 | 
 57 |     @classproperty
 58 |     @staticmethod
 59 |     def comment():
 60 |         return ('#' + Optional(restOfLine))
 61 | 
 62 |     @classproperty
 63 |     @staticmethod
 64 |     def file():
 65 |         """ Grammar for files found in the overall input files.	"""
 66 |         return (
 67 |             Optional(Word(alphanums).setResultsName('alias') +
 68 |                 Suppress(Literal('.'))) + Suppress(White()) +
 69 |             Word(approved_printables).setResultsName('filename')
 70 |             )
 71 | 
 72 |     @classproperty
 73 |     @staticmethod
 74 |     def path():
 75 |         """ Grammar for paths found in the overall input files. """
 76 |         return (
 77 |             Word(approved_printables).setResultsName('alias') +
 78 |             Suppress(White()) +
 79 |             restOfLine.setResultsName('path')
 80 |             )
 81 | 
 82 |     @classproperty
 83 |     @staticmethod
 84 |     def command_lines():
 85 |         """ Grammar for commands found in the overall input files. """
 86 |         return ZeroOrMore(Group(
 87 |             Group(ZeroOrMore(Group(Grammar.comment))) + Grammar._non_comment_line
 88 |         ))
 89 | 
 90 |     @classproperty
 91 |     @staticmethod
 92 |     def command():
 93 |         """ Grammar for commands found in the overall input files. """
 94 |         return (
 95 |             OneOrMore(
 96 |                 Word(approved_printables+' ').setResultsName('command',
 97 |                     listAllMatches=True) ^
 98 |                 Grammar.__command_input_output.setResultsName('_in',
 99 |                     listAllMatches=True)
100 |                 )
101 |             )
102 | 


--------------------------------------------------------------------------------
/metapipe/models/job.py:
--------------------------------------------------------------------------------
  1 | """ A basic job model, and local job implementation.
  2 | 
  3 | author: Brian Schrader
  4 | since: 2016-01-04
  5 | """
  6 | 
  7 | import os
  8 | from subprocess import Popen, PIPE
  9 | 
 10 | 
 11 | def call(args, stdout=PIPE, stderr=PIPE):
 12 |     """ Calls the given arguments in a seperate process
 13 |     and returns the contents of standard out.
 14 |     """
 15 |     p = Popen(args, stdout=stdout, stderr=stderr)
 16 |     out, err = p.communicate()
 17 | 
 18 |     try:
 19 |         return out.decode(sys.stdout.encoding), err.decode(sys.stdout.encoding)
 20 |     except Exception:
 21 |         return out, err
 22 | 
 23 | 
 24 | class Job(object):
 25 |     """ A template job class that just runs the given command script locally.
 26 |     To make your own custom jobs, subclass this Job and override the status methods, the submit method, and cmd property.
 27 | 
 28 |     Submitting a job cannot block execution. The submit call should return
 29 |     immediately so that other jobs can be executed, and tracked.
 30 |     """
 31 | 
 32 |     JOB_FILE_PATTERN = 'metapipe.{}.job'
 33 |     MAX_RETRY = 5
 34 | 
 35 |     def __init__(self, alias, command, depends_on=[]):
 36 |         """ Create an new job with the given name, and command. """
 37 |         self.command = command
 38 |         self.depends_on = depends_on
 39 |         self.alias = alias
 40 |         self.attempts = 0
 41 |         self.filename = self.JOB_FILE_PATTERN.format(self.alias)
 42 | 
 43 |     def __repr__(self):
 44 |         return '<Job: {}>'.format(self.cmd)
 45 | 
 46 |     def __cmp__(self, other):
 47 |         return cmp(self.alias, other.alias)
 48 | 
 49 |     def make(self):
 50 |         """ Evaluate the command, and write it to a file. """
 51 |         eval = self.command.eval()
 52 |         with open(self.filename, 'w') as f:
 53 |             f.write(eval)
 54 | 
 55 |     @property
 56 |     def should_retry(self):
 57 |         return self.attempts < self.MAX_RETRY
 58 | 
 59 |     # Override these...
 60 | 
 61 |     @property
 62 |     def cmd(self):
 63 |         """ Returns the command needed to submit the calculations.
 64 |         Normally, this would be just running the command, however if
 65 |         using a queue system, then this should return the command to
 66 |         submit the command to the queue.
 67 |         """
 68 |         pass
 69 | 
 70 |     def submit(self):
 71 |         """ Submits the job to be run. If an external queue system is used,
 72 |         this method submits itself to that queue. Else it runs the job itself.
 73 |         :see: call
 74 |         """
 75 |         pass
 76 | 
 77 |     def is_running(self):
 78 |         """ Returns whether the job is running or not. """
 79 |         pass
 80 | 
 81 |     def is_queued(self):
 82 |         """ Returns whether the job is queued or not.
 83 |         This function is only used if jobs are submitted to an external queue.
 84 |         """
 85 |         pass
 86 | 
 87 |     def is_complete(self):
 88 |         """ Returns whether the job is complete or not. """
 89 |         pass
 90 | 
 91 |     def is_error(self):
 92 |         """ Checks to see if the job errored out. """
 93 |         pass
 94 | 
 95 |     def is_failed(self):
 96 |         """ Checks to see if the job has failed. This is usually if the job
 97 |         should not be resubmitted.
 98 |         """
 99 |         pass
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/metapipe/models/job_template.py:
--------------------------------------------------------------------------------
 1 | """ A template that evaluates to muliple jobs and places them back on the queue.
 2 | author: Brian Schrader
 3 | since: 2016-02-19
 4 | """
 5 | 
 6 | from .job import Job
 7 | 
 8 | 
 9 | class JobTemplate(Job):
10 | 
11 |     def __init__(self, alias, command_template, depends_on, queue, job_class):
12 |         super(JobTemplate, self).__init__(alias, command_template, depends_on)
13 |         self.command_template = command_template
14 |         self.queue = queue
15 |         self.job_class = job_class
16 |         self.jobs = []
17 | 
18 |     def __repr__(self):
19 |         return '<JobTemplate: {}>'.format(self.alias)
20 | 
21 |     def submit(self):
22 |         jobs = self._get_jobs_from_template(self.command_template, self.job_class)
23 |         [self.queue.push(job) for job in jobs]
24 |         self.jobs = jobs
25 | 
26 |     def is_running(self):
27 |         if len(self.jobs) > 0:
28 |             return any(job.is_running() for job in self.jobs)
29 |         return False
30 | 
31 |     def is_queued(self):
32 |         return False
33 | 
34 |     def is_complete(self):
35 |         if len(self.jobs) > 0:
36 |             return all(job.is_complete() for job in self.jobs)
37 |         return False
38 | 
39 |     def is_error(self):
40 |         if len(self.jobs) > 0:
41 |             return all(job.is_error() for job in self.jobs)
42 |         return False
43 | 
44 |     def is_fail(self):
45 |         self.attempts > self.MAX_RETRY
46 | 
47 |     def _get_jobs_from_template(self, template, job_class):
48 |         """ Given a template, a job class, construct jobs from
49 |         the given template.
50 |         """
51 |         jobs = []
52 |         for command in template.eval():
53 |             alias = command.alias
54 |             depends_on = [job.alias
55 |                 for job in self.queue.all_jobs
56 |                     for deps in command.depends_on
57 |                         if deps == job.alias]
58 |             command.update_dependent_files([job.command
59 |                 for job in self.queue.all_jobs
60 |                     if not isinstance(job, JobTemplate)])
61 | 
62 |             job = job_class(alias, command, depends_on)
63 |             jobs.append(job)
64 |         return jobs
65 | 
66 | 


--------------------------------------------------------------------------------
/metapipe/models/local_job.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | from . import Job, call
 4 | 
 5 | 
 6 | LOCAL_LOG_FORMAT = '{}_{}'
 7 | 
 8 | 
 9 | class LocalJobCallThread(threading.Thread):
10 |     """ A class that handles calling subprocesses in seperate threads. """
11 | 
12 |     def __init__(self, callable, *args, **kwargs):
13 |         self.stdout = None
14 |         self.stderr = None
15 |         self.callable = callable
16 |         self.args = args
17 |         self.kwargs = kwargs
18 |         threading.Thread.__init__(self)
19 |         self.stdout = None
20 |         self.stderr = None
21 | 
22 |     def run(self):
23 |         self.stdout, self.stderr = self.callable(*self.args, **self.kwargs)
24 | 
25 | 
26 | class LocalJob(Job):
27 |     """ A subclass of job for local calculations. """
28 | 
29 |     def __init__(self, alias, command, depends_on=[], shell='bash'):
30 |         super(LocalJob, self).__init__(alias, command, depends_on)
31 |         self.shell = shell
32 |         self._task = None
33 |         self._err = False
34 | 
35 |     def __repr__(self):
36 |         return '<LocalJob: {}>'.format(self.cmd)
37 | 
38 |     @property
39 |     def cmd(self):
40 |         return [self.shell, self.filename]
41 | 
42 |     def submit(self):
43 |         self.make()
44 |         self.attempts += 1
45 |         self._task = LocalJobCallThread(call, self.cmd)
46 |         self._task.start()
47 | 
48 |     def is_running(self):
49 |         try:
50 |             return self._task.is_alive()
51 |         except AttributeError:
52 |             return False
53 | 
54 |     def is_queued(self):
55 |         """ Returns False since local jobs are not submitted to an
56 |         external queue.
57 |         """
58 |         return False
59 | 
60 |     def is_complete(self):
61 |         try:
62 |             if not self._task.is_alive():
63 |                 self._task.join()
64 |                 self._write_log()
65 |                 return True
66 |         except AttributeError:
67 |             pass
68 |         return False
69 | 
70 |     def is_error(self):
71 |         """ Checks to see if the job errored out. """
72 |         try:
73 |             if self._task.is_alive():
74 |                 if len(self._task.stderr.readlines()) > 0:
75 |                     self._task.join()
76 |                     self._write_log()
77 |                     return True
78 |         except AttributeError:
79 |             pass
80 |         return False
81 | 
82 |     def is_fail(self):
83 |         return not self.should_retry
84 | 
85 |     def _write_log(self):
86 |         alias = Job.JOB_FILE_PATTERN.format(self.alias)
87 |         outlog, errlog = (LOCAL_LOG_FORMAT.format(alias, 'stdout'),
88 |             LOCAL_LOG_FORMAT.format(alias, 'stderr'))
89 | 
90 |         with open(outlog, 'w+') as f:
91 |             f.write(str(self._task.stdout))
92 |         with open(errlog, 'w+') as f:
93 |             f.write(str(self._task.stderr))
94 | 
95 | 


--------------------------------------------------------------------------------
/metapipe/models/pbs_job.py:
--------------------------------------------------------------------------------
  1 | from . import Job, call
  2 | 
  3 | 
  4 | class PBSJob(Job):
  5 |     """ A job subclass for running tasks on a PBS queue. """
  6 | 
  7 |     def __init__(self, alias, command, depends_on=[], queue='work'):
  8 |         super(PBSJob, self).__init__(alias, command, depends_on)
  9 |         self.queue = queue
 10 |         self.id = None
 11 |         self.waiting = True     # The job has yet to be submitted.
 12 | 
 13 |     def submit(self):
 14 |         if self.attempts == 0:
 15 |             self.make()
 16 |         self.attempts += 1
 17 |         out, err = call(self.cmd)
 18 |         self.waiting = False
 19 |         self.id = out[:out.index('.')]
 20 | 
 21 |     @property
 22 |     def cmd(self):
 23 |         return ['qsub', self.filename]
 24 | 
 25 |     def is_running(self):
 26 |         """ Checks to see if the job is running. """
 27 |         qstat = self._grep_qstat('running')
 28 |         if qstat:
 29 |             return True
 30 |         return False
 31 | 
 32 |     def is_queued(self):
 33 |         """ Checks to see if the job is queued. """
 34 |         qstat = self._grep_qstat('queued')
 35 |         if qstat:
 36 |             return True
 37 |         return False
 38 | 
 39 |     def is_complete(self):
 40 |         """ Checks the job's output or log file to determing if
 41 |         the completion criteria was met.
 42 |         """
 43 |         qstat = self._grep_qstat('complete')
 44 |         comp = self._grep_status('complete')
 45 |         if qstat and comp:
 46 |             return True
 47 |         return False
 48 | 
 49 |     def is_fail(self):
 50 |         return not self.should_retry
 51 | 
 52 |     def is_error(self):
 53 |         """ Checks to see if the job errored out. """
 54 |         qstat = self._grep_qstat('error')
 55 |         err = self._grep_status('error')
 56 |         if qstat and err:
 57 |             return True
 58 |         return False
 59 | 
 60 |     def _grep_qstat(self, status_type='complete'):
 61 |         """ Greps qstat -e <job_id> for information from the queue.
 62 |         :paramsstatus_type: complete, queued, running, error, gone
 63 |         """
 64 |         args = "qstat -e {}".format(self.id).split()
 65 |         res, _ = call(args)
 66 |         if res == '': return False
 67 |         res = res.split('\n')[2].split()[4]
 68 | 
 69 |         if status_type == 'complete' and res == 'C':
 70 |             return True
 71 |         elif status_type == 'error' and (res == 'E' or res == 'C'):
 72 |             return True
 73 |         elif status_type == 'running' and res == 'R':
 74 |             return True
 75 |         elif status_type == 'queued' and res == 'Q':
 76 |             return True
 77 |         elif status_type == 'gone' and 'unknown job id' in str(res).lower():
 78 |             return True
 79 |         else:
 80 |             return False
 81 | 
 82 |     def _grep_status(self, status_type):
 83 |         """ Greps through the job's current status to see if
 84 |         it returned with the requested status.
 85 |         status_type: complete, error
 86 |         """
 87 |         args = "qstat -f {}".format(self.id).split()
 88 |         res, _ = call(args)
 89 |         exit_status = [line for line in res.split('\n')
 90 |                 if 'exit_status' in line]
 91 |         try:
 92 |             _, __, code = exit_status[0].split()
 93 |         except IndexError:
 94 |             code = None
 95 | 
 96 |         if status_type == 'complete' and code == '0':
 97 |             return True
 98 |         elif status_type == 'error' and code != '0':
 99 |             return True
100 |         else:
101 |             return False
102 | 
103 | 


--------------------------------------------------------------------------------
/metapipe/models/queue.py:
--------------------------------------------------------------------------------
  1 | """ A simple manager for a task queue.
  2 | 
  3 | The manager handles creating, submitting, and managing
  4 | running jobs, and can even resubmit jobs that have failed.
  5 | 
  6 | author: Brian Schrader
  7 | since: 2015-08-27
  8 | """
  9 | from .reporting import BaseReportingMixin, HtmlReportingMixin, TextReportingMixin
 10 | from .job_template import JobTemplate
 11 | 
 12 | class BaseQueue(object):
 13 |     """ An abstract class for managing a queue of jobs. To use this class,
 14 |     subclass it and fill in the callbacks you need.
 15 |     """
 16 | 
 17 |     MAX_CONCURRENT_JOBS = 10
 18 |     def __init__(self, name=''):
 19 |         self.name = name
 20 |         self.queue = []
 21 |         self.running = []
 22 |         self.failed = []
 23 |         self.complete = []
 24 | 
 25 |     def __repr__(self):
 26 |         return '<Queue: jobs=%s>' % str(len(self.active_jobs))
 27 | 
 28 |     @property
 29 |     def is_empty(self):
 30 |         return len(self.active_jobs) == 0
 31 | 
 32 |     @property
 33 |     def active_jobs(self):
 34 |         """ Returns a list of all jobs submitted to the queue,
 35 |         or in progress.
 36 |         """
 37 |         return list(set(self.queue + self.running))
 38 | 
 39 |     @property
 40 |     def all_jobs(self):
 41 |         """ Returns a list of all jobs submitted to the queue, complete,
 42 |         in-progess or failed.
 43 |         """
 44 |         return list(set(self.complete + self.failed + self.queue + self.running))
 45 | 
 46 |     @property
 47 |     def progress(self):
 48 |         """ Returns the percentage, current and total number of
 49 |         jobs in the queue.
 50 |         """
 51 |         total = len(self.all_jobs)
 52 |         remaining = total - len(self.active_jobs) if total > 0 else 0
 53 |         percent = int(100 * (float(remaining) / total)) if total > 0 else 0
 54 |         return percent
 55 | 
 56 |     def ready(self, job):
 57 |         """ Determines if the job is ready to be sumitted to the
 58 |         queue. It checks if the job depends on any currently
 59 |         running or queued operations.
 60 |         """
 61 |         no_deps = len(job.depends_on) == 0
 62 |         all_complete = all(j.is_complete() for j in self.active_jobs
 63 |                 if j.alias in job.depends_on)
 64 |         none_failed = not any(True for j in self.failed
 65 |                 if j.alias in job.depends_on)
 66 |         queue_is_open = len(self.running) < self.MAX_CONCURRENT_JOBS
 67 |         return queue_is_open and (no_deps or (all_complete and none_failed))
 68 | 
 69 |     def locked(self):
 70 |         """ Determines if the queue is locked. """
 71 |         if len(self.failed) == 0:
 72 |             return False
 73 |         for fail in self.failed:
 74 |             for job in self.active_jobs:
 75 |                 if fail.alias in job.depends_on:
 76 |                     return True
 77 | 
 78 |     def push(self, job):
 79 |         """ Push a job onto the queue. This does not submit the job. """
 80 |         self.queue.append(job)
 81 | 
 82 |     def tick(self):
 83 |         """ Submits all the given jobs in the queue and watches their
 84 |         progress as they proceed. This function yields at the end of
 85 |         each iteration of the queue.
 86 |         :raises RuntimeError: If queue is locked.
 87 |         """
 88 |         self.on_start()
 89 |         while not self.is_empty:
 90 |             cruft = []
 91 |             for job in self.queue:
 92 |                 if not self.ready(job):
 93 |                     continue
 94 |                 self.on_ready(job)
 95 |                 try:
 96 |                     job.submit()
 97 |                 except ValueError:
 98 |                     if job.should_retry:
 99 |                         self.on_error(job)
100 |                         job.attempts += 1
101 |                     else:
102 |                         self.on_fail(job)
103 |                         cruft.append(job)
104 |                         self.failed.append(job)
105 |                 else:
106 |                     self.running.append(job)
107 |                     self.on_submit(job)
108 |                     cruft.append(job)
109 | 
110 |             self.queue = [job for job in self.queue if job not in cruft]
111 | 
112 |             cruft = []
113 |             for job in self.running:
114 |                 if job.is_running() or job.is_queued():
115 |                     pass
116 |                 elif job.is_complete():
117 |                     self.on_complete(job)
118 |                     cruft.append(job)
119 |                     self.complete.append(job)
120 |                 elif job.is_fail():
121 |                     self.on_fail(job)
122 |                     cruft.append(job)
123 |                     self.failed.append(job)
124 |                 elif job.is_error():
125 |                     self.on_error(job)
126 |                     cruft.append(job)
127 |                 else:
128 |                     pass
129 |             self.running = [job for job in self.running if job not in cruft]
130 | 
131 |             if self.locked() and self.on_locked():
132 |                 raise RuntimeError
133 |             self.on_tick()
134 |             yield
135 |         self.on_end()
136 | 
137 |     # Callbacks...
138 | 
139 |     def on_start(self):
140 |         """ Called when the queue is starting up. """
141 |         pass
142 | 
143 |     def on_end(self):
144 |         """ Called when the queue is shutting down. """
145 |         pass
146 | 
147 |     def on_locked(self):
148 |         """ Called when the queue is locked and no jobs can proceed.
149 |         If this callback returns True, then the queue will be restarted,
150 |         else it will be terminated.
151 |         """
152 |         return True
153 | 
154 |     def on_tick(self):
155 |         """ Called when a tick of the queue is complete. """
156 |         pass
157 | 
158 |     def on_ready(self, job):
159 |         """ Called when a job is ready to be submitted.
160 |         :param job: The given job that is ready.
161 |         """
162 |         pass
163 | 
164 |     def on_submit(self, job):
165 |         """ Called when a job has been submitted.
166 |         :param job: The given job that has been submitted.
167 |         """
168 |         pass
169 | 
170 |     def on_complete(self, job):
171 |         """ Called when a job has completed.
172 |         :param job: The given job that has completed.
173 |         """
174 |         pass
175 | 
176 |     def on_error(self, job):
177 |         """ Called when a job has errored. By default, the job
178 |         is resubmitted until some max threshold is reached.
179 |         :param job: The given job that has errored.
180 |         """
181 |         pass
182 | 
183 |     def on_fail(self, job):
184 |         """ Called when a job has failed after multiple resubmissions. The
185 |         given job will be removed from the queue.
186 |         :param job: The given job that has errored.
187 |         """
188 |         pass
189 | 
190 | 
191 | class ReportingJobQueue(BaseReportingMixin, BaseQueue):
192 |     """ An abstract subclass of the Queue which reports on progress. """
193 | 
194 |     @property
195 |     def real_jobs(self):
196 |         """ Returns all jobs that represent work. """
197 |         return [j for j in self.all_jobs if  not isinstance(j, JobTemplate)]
198 | 
199 |     def on_locked(self):
200 |         self.render('The queue is locked. Please check the logs.',
201 |             self.progress)
202 |         return True
203 | 
204 |     def on_submit(self, job):
205 |         if not isinstance(job, JobTemplate):
206 |             self.render('Submitted: %s' % job.alias, self.progress)
207 | 
208 |     def on_complete(self, job):
209 |         if not isinstance(job, JobTemplate):
210 |             self.render('Complete: %s' % job.alias, self.progress)
211 | 
212 |     def on_error(self, job):
213 |         if not isinstance(job, JobTemplate):
214 |             self.render('Error: Job %s has failed, retrying (%s/%s)'
215 |                 % (job.alias, str(job.attempts), str(job.MAX_RETRY)), self.progress)
216 | 
217 |     def on_fail(self, job):
218 |         if not isinstance(job, JobTemplate):
219 |             self.render('Error: Job %s has failed. Retried %s times.'
220 |                 % (job.alias, str(job.attempts)), self.progress)
221 | 
222 |     def on_end(self):
223 |         self.render('All jobs are complete.', self.progress)
224 | 
225 | 
226 | class HtmlReportingJobQueue(HtmlReportingMixin, ReportingJobQueue):
227 |     """ A queue that generates HTML reports. """
228 |     pass
229 | 
230 | 
231 | class TextReportingJobQueue(TextReportingMixin, ReportingJobQueue):
232 |     """ A queue that generates textual reports. """
233 |     pass
234 | 


--------------------------------------------------------------------------------
/metapipe/models/reporting.py:
--------------------------------------------------------------------------------
 1 | """ A series of mixins for reporting. """
 2 | from datetime import datetime as dt
 3 | 
 4 | from metapipe.templates import env
 5 | template = env.get_template('progress-report.tmpl.html')
 6 | 
 7 | 
 8 | class BaseReportingMixin(object):
 9 |     """ An abstract mixin for reporting. """
10 | 
11 |     message_format = '%Y-%m-%d %H:%M:%S'
12 | 
13 |     def render(self, message, progress):
14 |         """ Render the output of the report. """
15 |         pass
16 | 
17 | 
18 | class HtmlReportingMixin(BaseReportingMixin):
19 |     """ A reporting mixin that writes progress to an HTML report. """
20 | 
21 |     messages = []
22 |     output = 'metapipe.report.html'
23 | 
24 |     def render(self, message, progress):
25 |         msg = Message(dt.strftime(dt.now(), self.message_format), message)
26 |         self.messages.insert(0, msg)
27 |         with open(self.output, 'w') as f:
28 |             f.write(self.template.render(
29 |                 name=self.name,
30 |                 messages=self.messages, progress=progress, jobs=sorted(self.real_jobs)))
31 | 
32 | 
33 | class TextReportingMixin(BaseReportingMixin):
34 |     """ A reporting mixin that prints any progress to the console. """
35 | 
36 |     def render(self, message, progress):
37 |         print('[{}%] {} {}'.format(progress, dt.strftime(dt.now(),
38 |             self.message_format), message))
39 | 
40 | 
41 | class Message(object):
42 |     def __init__(self, time, text):
43 |         self.time = time
44 |         self.text = text
45 | 


--------------------------------------------------------------------------------
/metapipe/models/sge_job.py:
--------------------------------------------------------------------------------
  1 | from . import Job, call
  2 | 
  3 | 
  4 | class SGEJob(Job):
  5 |     """ A job subclass for running tasks on a SGE queue. """
  6 | 
  7 |     def __init__(self, alias, command, depends_on=[], queue='work'):
  8 |         super(SGEJob, self).__init__(alias, command, depends_on)
  9 |         self.queue = queue
 10 |         self.id = None
 11 |         self.waiting = True     # The job has yet to be submitted.
 12 | 
 13 |     def submit(self):
 14 |         if self.attempts == 0:
 15 |             self.make()
 16 |         self.attempts += 1
 17 |         out, err = call(self.cmd)
 18 |         self.waiting = False
 19 |         self.id = out.split()[2]
 20 | 
 21 |     @property
 22 |     def cmd(self):
 23 |         return ['qsub', '-cwd', '-V', self.filename]
 24 | 
 25 |     def is_running(self):
 26 |         """ Checks to see if the job is running. """
 27 |         qstat = self._grep_qstat('running')
 28 |         if qstat:
 29 |             return True
 30 |         return False
 31 | 
 32 |     def is_queued(self):
 33 |         """ Checks to see if the job is queued. """
 34 |         qstat = self._grep_qstat('queued')
 35 |         if qstat:
 36 |             return True
 37 |         return False
 38 | 
 39 |     def is_complete(self):
 40 |         """ Checks the job's output or log file to determing if
 41 |         the completion criteria was met.
 42 |         """
 43 |         qstat = self._grep_qstat('complete')
 44 |         comp = self._grep_status('complete')
 45 |         if qstat and comp:
 46 |             return True
 47 |         return False
 48 | 
 49 |     def is_error(self):
 50 |         """ Checks to see if the job errored out. """
 51 |         qstat = self._grep_qstat('error')
 52 |         err = self._grep_status('error')
 53 |         if qstat and err:
 54 |             return True
 55 |         return False
 56 | 
 57 |     def _grep_qstat(self, status_type='complete'):
 58 |         """ Greps qstat -e <job_id> for information from the queue.
 59 |         :paramsstatus_type: complete, queued, running, error, gone
 60 |         """
 61 |         args = ("qstat -e %s" % self.id).split()
 62 |         res, _ = call(args)
 63 |         if res == '': return False
 64 |         res = res.split('\n')[2].split()[4]
 65 | 
 66 |         if status_type == 'complete' and res == 'c':
 67 |             return True
 68 |         elif status_type == 'error' and (res == 'e' or res == 'c'):
 69 |             return True
 70 |         elif status_type == 'running' and res == 'r':
 71 |             return True
 72 |         elif status_type == 'queued' and res == 'qw':
 73 |             return True
 74 |         elif status_type == 'gone' and 'unknown job id' in str(res).lower():
 75 |             return True
 76 |         else:
 77 |             return False
 78 | 
 79 |     def _grep_status(self, status_type):
 80 |         """ Greps through the job's current status to see if
 81 |         it returned with the requested status.
 82 |         status_type: complete, error
 83 |         """
 84 |         args = ("qstat -f %s" % self.id).split()
 85 |         res, _ = call(args)
 86 |         exit_status = [line for line in res.split('\n')
 87 |                 if 'exit_status' in line]
 88 |         try:
 89 |             _, __, code = exit_status[0].split()
 90 |         except IndexError:
 91 |             code = None
 92 | 
 93 |         if status_type == 'complete' and code == '0':
 94 |             return True
 95 |         elif status_type == 'error' and code != '0':
 96 |             return True
 97 |         else:
 98 |             return False
 99 | 
100 | 


--------------------------------------------------------------------------------
/metapipe/models/tokens.py:
--------------------------------------------------------------------------------
  1 | """ A set of tokens and convienence functions for input/output files.
  2 | 
  3 | author: Brian Schrader
  4 | since: 2015-12-28
  5 | """
  6 | 
  7 | from __future__ import print_function
  8 | from collections import namedtuple
  9 | import glob, re
 10 | 
 11 | 
 12 | file_pattern = 'mp.{}.output{}'
 13 | alias_pattern = '{command}-{output_number}'
 14 | 
 15 | 
 16 | class PathToken(object):
 17 |     """ A model for a given path. """
 18 | 
 19 |     def __init__(self, alias, path):
 20 |         self.alias = alias
 21 |         self.path = path
 22 | 
 23 |     def __repr__(self):
 24 |         return '<Path {}: {}>'.format(self.alias, self.path)
 25 | 
 26 |     def __eq__(self, other):
 27 |         try:
 28 |             return (self.alias == other.alias or
 29 |                 self.path == other.path)
 30 |         except AttributeError:
 31 |             return False
 32 | 
 33 |     def eval(self):
 34 |         return self.path
 35 | 
 36 | 
 37 | class CommentToken(object):
 38 | 
 39 |     def __init__(self, parts):
 40 |         self.parts = parts
 41 | 
 42 |     def __repr__(self):
 43 |         return '<Comment: {}>'.format(''.join(self.parts))
 44 | 
 45 |     def __eq__(self, other):
 46 |         return ''.join(self.parts) == ''.join(other.parts)
 47 | 
 48 |     def eval(self):
 49 |         return '{}\n'.format(''.join(self.parts))
 50 | 
 51 | 
 52 | class FileToken(object):
 53 |     """ An abc for input/output data classes. Provides various common
 54 |     methods.
 55 |     Warning: This class should not be used directly.
 56 |     """
 57 | 
 58 |     def __init__(self, alias, filename='', cwd=''):
 59 |         self.alias = alias
 60 |         self.filename = filename
 61 | 
 62 |         if len(cwd) > 0 and cwd[-1] != '/':
 63 |             cwd += '/'
 64 |         self.cwd = cwd
 65 | 
 66 |     def __eq__(self, other):
 67 |         try:
 68 |             return (self.alias == other.alias or
 69 |                 self.filename == other.filename)
 70 |         except AttributeError:
 71 |             return False
 72 | 
 73 |     def __hash__(self):
 74 |         return hash(self.alias)
 75 | 
 76 |     @property
 77 |     def path(self):
 78 |         return '{}{}'.format(self.cwd, self.filename)
 79 | 
 80 | 
 81 | class Input(FileToken):
 82 |     """ A model of a single input to a given command. Input tokens can be
 83 |     evaluated to obtain their actual filename(s).
 84 |     """
 85 | 
 86 |     def __init__(self, alias, filename='', cwd='', and_or=''):
 87 |         super(Input, self).__init__(alias, filename, cwd)
 88 |         self.and_or = and_or
 89 | 
 90 |     def __repr__(self):
 91 |         try:
 92 |             eval = self.eval()
 93 |         except Exception:
 94 |             eval = '?'
 95 |         return '<Input: {}->[{}]{}>'.format(self.alias, eval,
 96 |             ' _{}_'.format(self.and_or) if self.and_or else '')
 97 | 
 98 |     def fuzzy_match(self, other):
 99 |         """ Given another token, see if either the major alias identifier
100 |         matches the other alias, or if magic matches the alias.
101 |         """
102 |         magic, fuzzy = False, False
103 |         try:
104 |             magic = self.alias == other.magic
105 |         except AttributeError:
106 |             pass
107 | 
108 |         if '.' in self.alias:
109 |             major = self.alias.split('.')[0]
110 |             fuzzy = major == other.alias
111 |         return magic or fuzzy
112 | 
113 |     def eval(self):
114 |         """ Evaluates the given input and returns a string containing the
115 |         actual filenames represented. If the input token represents multiple
116 |         independent files, then eval will return a list of all the input files
117 |         needed, otherwise it returns the filenames in a string.
118 |         """
119 |         if self.and_or == 'or':
120 |             return [Input(self.alias, file, self.cwd, 'and')
121 |                 for file in self.files]
122 |         return ' '.join(self.files)
123 | 
124 |     @property
125 |     def command_alias(self):
126 |         """ Returns the command alias for a given input. In most cases this
127 |         is just the input's alias but if the input is one of many, then
128 |         `command_alias` returns just the beginning of the alias cooresponding to
129 |         the command's alias.
130 |         """
131 |         if '.' in self.alias:
132 |             return self.alias.split('-')[0]
133 |         return None
134 | 
135 |     @property
136 |     def is_magic(self):
137 |         try:
138 |             return isinstance(self.eval(), list)
139 |         except ValueError:
140 |             return False
141 | 
142 |     @property
143 |     def is_glob(self):
144 |         return '*' in self.filename
145 | 
146 |     @property
147 |     def magic_path(self):
148 |         match = file_pattern.format(self.alias, '*')
149 |         return '{}{}'.format(self.cwd, match)
150 | 
151 |     @property
152 |     def files(self):
153 |         """ Returns a list of all the files that match the given
154 |         input token.
155 |         """
156 |         res = None
157 |         if not res:
158 |             res = glob.glob(self.path)
159 |         if not res and self.is_glob:
160 |             res = glob.glob(self.magic_path)
161 |         if not res:
162 |             res = glob.glob(self.alias)
163 |         if not res:
164 |             raise ValueError('No files match. %s' % self)
165 |         return res
166 | 
167 |     @staticmethod
168 |     def from_string(string, _or=''):
169 |         """ Parse a given string and turn it into an input token. """
170 |         if _or:
171 |             and_or = 'or'
172 |         else:
173 |             and_or = ''
174 |         return Input(string, and_or=and_or)
175 | 
176 | 
177 | class Output(FileToken):
178 |     """ A model of a single output to a given command. Output tokens can be
179 |     evaluated to obtain their actual filename(s).
180 |     """
181 | 
182 |     def __init__(self, alias, filename='', cwd='', magic=''):
183 |         super(Output, self).__init__(alias, filename, cwd)
184 |         self.ext = ''
185 |         self.magic = ''
186 |         self._clean(magic)
187 | 
188 |     def __repr__(self):
189 |         return '<Output: {}->[{}]{} {}>'.format(self.alias, self.eval(),
190 |             (' ' + self.magic) if self.magic else '', self.ext)
191 | 
192 |     def __eq__(self, other):
193 |         """ Overrides the token eq to allow for magic : alias comparison for
194 |         magic inputs. Defaults to the super() eq otherwise.
195 |         """
196 |         try:
197 |             return (self.magic == other.alias or
198 |                 super(Output, self).__eq__(other))
199 |         except AttributeError:
200 |             return False
201 | 
202 |     def eval(self):
203 |         """ Returns a filename to be used for script output. """
204 |         if self.magic:
205 |             return self.magic
206 |         if not self.filename:
207 |             return file_pattern.format(self.alias, self.ext)
208 |         return self.path
209 | 
210 |     def as_input(self):
211 |         """ Returns an input token for the given output. """
212 |         return Input(self.alias, self.eval())
213 | 
214 |     def _clean(self, magic):
215 |         """ Given a magic string, remove the output tag designator. """
216 |         if magic.lower() == 'o':
217 |             self.magic = ''
218 |         elif magic[:2].lower() == 'o:':
219 |             self.magic = magic[2:]
220 |         elif magic[:2].lower() == 'o.':
221 |             self.ext = magic[1:]
222 | 
223 |     @staticmethod
224 |     def from_string(string):
225 |         """ Parse a given string and turn it into an output token. """
226 |         return Output('', magic=string)
227 | 


--------------------------------------------------------------------------------
/metapipe/parser.py:
--------------------------------------------------------------------------------
 1 | """ A parser and other parser related classes. """
 2 | 
 3 | import pyparsing
 4 | 
 5 | from .models import Command, Input, Output, Grammar
 6 | from .models import command_template_factory as ctf
 7 | 
 8 | 
 9 | class Parser(object):
10 | 
11 |     def __init__(self, string):
12 |         self.string = string
13 |         self.commands = []
14 |         self.paths = []
15 |         self.files = []
16 | 
17 |     def consume(self, cwd=None):
18 |         """ Converts the lexer tokens into valid statements. This process
19 |         also checks command syntax.
20 |         """
21 |         first_pass = Grammar.overall.parseString(self.string)
22 |         lowered = { key.lower(): val for key, val in first_pass.iteritems() }
23 | 
24 |         self.commands = ['\n'.join(self._get('commands', lowered))]
25 |         self.job_options = self._get('job_options', lowered)
26 |         self.global_options = self._get('options', lowered)
27 | 
28 |         self.files = self._get('files', lowered)
29 |         self.paths = self._get('paths', lowered)
30 | 
31 |         self.files = self._parse(self.files, Grammar.file, True)
32 |         self.paths = self._parse(self.paths, Grammar.path, True)
33 |         self.job_options = self._parse(self.job_options, Grammar.line)
34 | 
35 |         try:
36 |             command_lines = self._parse(self.commands, Grammar.command_lines)[0]
37 |         except IndexError:
38 |             raise ValueError('Did you write any commands?')
39 | 
40 |         self.commands = []
41 |         for command_line in command_lines:
42 |             comments, command = command_line
43 |             self.commands.append([comments.asList(),
44 |                 self._parse([''.join(command)], Grammar.command)])
45 | 
46 |         self.job_options = [opt.asList() for opt in self.job_options]
47 | 
48 |         self.paths = ctf.get_paths(self.paths)
49 |         self.files = ctf.get_files(self.files)
50 | 
51 |         self.paths.reverse()
52 |         self.files.reverse()
53 |         self.commands.reverse()
54 | 
55 |         return ctf.get_command_templates(self.commands, self.files[:],
56 |             self.paths[:], self.job_options)
57 | 
58 |     def _get(self, key, parser_result):
59 |         """ Given a type and a dict of parser results, return
60 |         the items as a list.
61 |         """
62 |         try:
63 |             list_data = parser_result[key].asList()
64 |             if any(isinstance(obj, str) for obj in list_data):
65 |                 txt_lines = [''.join(list_data)]
66 |             else:
67 |                 txt_lines = [''.join(f) for f in list_data]
68 |         except KeyError:
69 |             txt_lines = []
70 |         return txt_lines
71 | 
72 |     def _parse(self, lines, grammar, ignore_comments=False):
73 |         """ Given a type and a list, parse it using the more detailed
74 |         parse grammar.
75 |         """
76 |         results = []
77 |         for c in lines:
78 |             if c != '' and not (ignore_comments and c[0] == '#'):
79 |                 try:
80 |                     results.append(grammar.parseString(c))
81 |                 except pyparsing.ParseException as e:
82 |                     raise ValueError('Invalid syntax. Verify line {} is '
83 |                         'correct.\n{}\n\n{}'.format(e.lineno, c, e))
84 |         return results
85 | 


--------------------------------------------------------------------------------
/metapipe/runtime.py:
--------------------------------------------------------------------------------
 1 | """ The metapipe runtime.
 2 | 
 3 | author: Brian Schrader
 4 | since: 2015-01-13
 5 | """
 6 | 
 7 | from time import sleep
 8 | 
 9 | from metapipe.models import JobTemplate
10 | 
11 | 
12 | class Runtime(object):
13 | 
14 |     def __init__(self, command_templates, queue_type, job_types,
15 |             job_type='local', sleep_time=1, max_jobs=10):
16 |         self.complete_jobs = []
17 |         self.queue = queue_type()
18 |         self.sleep_time = sleep_time
19 | 
20 |         self.queue.MAX_CONCURRENT_JOBS = max_jobs
21 | 
22 |         job_templates = []
23 |         for command_template in command_templates:
24 |             self.add(command_template, job_types[job_type])
25 | 
26 |     def add(self, command_template, job_class):
27 |         """ Given a command template, add it as a job to the queue. """
28 |         job = JobTemplate(command_template.alias,
29 |             command_template=command_template,
30 |             depends_on=command_template.depends_on, queue=self.queue,
31 |             job_class=job_class)
32 |         self.queue.push(job)
33 | 
34 |     def run(self):
35 |         """ Begins the runtime execution. """
36 |         iterations = 0
37 |         queue = self.queue.tick()
38 |         while True:
39 |             try:
40 |                 next(queue)
41 |             except StopIteration:
42 |                 break
43 | 
44 |             iterations += 1
45 |             sleep(self.sleep_time)
46 |         return iterations
47 | 


--------------------------------------------------------------------------------
/metapipe/templates/__init__.py:
--------------------------------------------------------------------------------
1 | from jinja2 import Environment, PackageLoader
2 | env = Environment(loader=PackageLoader('metapipe', 'templates'))
3 | 


--------------------------------------------------------------------------------
/metapipe/templates/output_script.tmpl.sh:
--------------------------------------------------------------------------------
 1 | #! {{shell}}
 2 | set -e;
 3 | 
 4 | {{options}}
 5 | 
 6 | python - <<END
 7 | import pickle
 8 | 
 9 | with open('{{temp}}', 'rb') as f:
10 |     runtime = pickle.load(f)
11 |     runtime.run()
12 | END
13 | 


--------------------------------------------------------------------------------
/metapipe/templates/progress-report.tmpl.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |     <title>{{name}} Pipeline Progress Report | Metapipe</title>
 8 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
 9 |     <style>
10 |     .log {
11 |       max-height:500px;
12 |       overflow:scroll;
13 |     }
14 |     .code {
15 |       font-family:consolas;
16 |     }
17 |     </style>
18 |     <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
19 |     <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
20 |     <!--[if lt IE 9]>
21 |       <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
22 |       <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
23 |     <![endif]-->
24 |   </head>
25 |   <body>
26 |     <div class="container">
27 |       <h1>{{name}} Pipeline Progress Report</h1>
28 | 
29 |       <div class="progress">
30 |         <div class="progress-bar" role="progressbar" aria-valuenow="{{progress}}"
31 |           aria-valuemin="0" aria-valuemax="100" style="width: {{progress}}%;">
32 |           {{progress}}%
33 |         </div>
34 |       </div>
35 | 
36 |       <div class="row">
37 |         <div class="col-sm-6">
38 |           <h2>Jobs</h2>
39 |           <div class="panel-group" id="accordion" role="tablist"
40 |             aria-multiselectable="true">
41 |           {% for job in jobs %}
42 |             <div class="panel panel-default">
43 |               <div class="panel-heading" role="tab" id="job-heading-{{job.alias}}">
44 |                 <h4 class="panel-title">
45 |                   <a class="collapsed" role="button" data-toggle="collapse"
46 |                   href="#job-info-{{job.alias}}" aria-expanded="false"
47 |                   aria-controls="#job-info-{{job.alias}}" parent="#accordion">
48 |                     Job {{job.alias}}
49 |                   </a>
50 |                 </h4>
51 |               </div>
52 |               <div class="panel-collapse collapse" id="job-info-{{job.alias}}"
53 |               aria-labelledby="job-heading-{{job.alias}}" role="tabpanel">
54 |                 <div class="panel-body">
55 |                   <span class="code">
56 |                     {{job.command.eval()}}
57 |                   </span>
58 |                 </div>
59 |               </div>
60 |           </div>
61 |           {% endfor %}
62 |           </div>
63 | 
64 |         </div>
65 |         <div class="col-sm-6">
66 |           <h2>Log</h2>
67 |           <div class="log" class="list-group">
68 |           {% for msg in messages %}
69 |             <a class="list-group-item">
70 |               <p class="list-group-item-heading">{{msg.text}}</p>
71 |               <i class="list-group-item-text">{{msg.time}}</i>
72 |             </a>
73 |           {% endfor %}
74 |           </div>
75 |         </div>
76 |       </div>
77 |     </div>
78 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
79 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
80 |     <script>
81 |       $('.collapse').collapse();
82 |     </script>
83 |   </body>
84 | </html>
85 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cov-core==1.15.0
 2 | coverage==4.0.3
 3 | mock==1.3.0
 4 | nose==1.3.7
 5 | nose-cov==1.6
 6 | pbr==1.8.1
 7 | pyparsing==2.0.6
 8 | python-coveralls==2.6.0
 9 | PyYAML==3.11
10 | requests==2.9.1
11 | sh==1.11
12 | six==1.10.0
13 | sure==1.2.24
14 | wheel==0.24.0
15 | Jinja2==2.8
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | setup(
 6 |     name='metapipe',
 7 |     version='1.3-1',
 8 |     packages=find_packages(),
 9 |     description='A pipeline for building analysis pipelines.',
10 |     url='https://github.com/TorkamaniLab/metapipe',
11 |     entry_points = {
12 |         "console_scripts": ['metapipe = metapipe.app:main']
13 |         },
14 |     install_requires = ['pyparsing', 'Jinja2', 'mock'],
15 |     author='Brian Schrader',
16 |     author_email='brian@brianschrader.com',
17 |     include_package_data = True,
18 | )
19 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
 1 | import glob, os, shutil
 2 | 
 3 | 
 4 | TEST_FILE_DIR = 'test/files/'
 5 | 
 6 | 
 7 | def setup():
 8 |     """ Copy the testing files to the current working dir. """
 9 |     for file in glob.glob('{}*'.format(TEST_FILE_DIR)):
10 |         new_dest = file.replace(TEST_FILE_DIR, '')
11 |         shutil.copy(file, new_dest)
12 | 
13 | 
14 | def teardown():
15 |     """ Delete the files. """
16 |     for file in glob.glob('{}*'.format(TEST_FILE_DIR)):
17 |         new_dest = file.replace(TEST_FILE_DIR, '')
18 |         os.remove(new_dest)
19 | 
20 |     for file in glob.glob('metapipe.*.job'):
21 |         os.remove(file)
22 | 
23 |     for file in glob.glob('metapipe.*.output*'):
24 |          os.remove(file)
25 | 
26 |     for file in glob.glob('metapipe.*_stdout'):
27 |         os.remove(file)
28 | 
29 |     for file in glob.glob('metapipe.*_stderr'):
30 |         os.remove(file)
31 | 


--------------------------------------------------------------------------------
/test/files/mp.1.1-1.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1-1.output


--------------------------------------------------------------------------------
/test/files/mp.1.1.job:
--------------------------------------------------------------------------------
1 | # This is a test of the comments
2 | sh long_run_task.sh somefile.1 > metapipe.1.1.output.testing_file


--------------------------------------------------------------------------------
/test/files/mp.1.1.job_stderr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.job_stderr


--------------------------------------------------------------------------------
/test/files/mp.1.1.job_stdout:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.job_stdout


--------------------------------------------------------------------------------
/test/files/mp.1.1.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.output


--------------------------------------------------------------------------------
/test/files/mp.1.1.output.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.1.output.gz


--------------------------------------------------------------------------------
/test/files/mp.1.1.output.testing_file:
--------------------------------------------------------------------------------
1 | somefile.1
2 | 


--------------------------------------------------------------------------------
/test/files/mp.1.2.job:
--------------------------------------------------------------------------------
1 | # This is a test of the comments
2 | sh long_run_task.sh somefile.2 > metapipe.1.2.output.testing_file


--------------------------------------------------------------------------------
/test/files/mp.1.2.job_stderr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.job_stderr


--------------------------------------------------------------------------------
/test/files/mp.1.2.job_stdout:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.job_stdout


--------------------------------------------------------------------------------
/test/files/mp.1.2.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.1.2.output


--------------------------------------------------------------------------------
/test/files/mp.1.2.output.testing_file:
--------------------------------------------------------------------------------
1 | somefile.2
2 | 


--------------------------------------------------------------------------------
/test/files/mp.2.1.job:
--------------------------------------------------------------------------------
1 | ## This is another test
2 | #PBS_O_WORKDIR ~/bhuvan
3 | sh long_run_task_err.sh metapipe.1.1.output.testing_file metapipe.1.2.output.testing_file


--------------------------------------------------------------------------------
/test/files/mp.2.1.job_stderr:
--------------------------------------------------------------------------------
1 | THERE WAS AN ERROR
2 | long_run_task_err.sh: 7: exit: Illegal number: -1
3 | 


--------------------------------------------------------------------------------
/test/files/mp.2.1.job_stdout:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.1.job_stdout


--------------------------------------------------------------------------------
/test/files/mp.2.1.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.1.output


--------------------------------------------------------------------------------
/test/files/mp.2.2.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.2.2.output


--------------------------------------------------------------------------------
/test/files/mp.3.1.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.1.output


--------------------------------------------------------------------------------
/test/files/mp.3.2.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.2.output


--------------------------------------------------------------------------------
/test/files/mp.3.3.output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/mp.3.3.output


--------------------------------------------------------------------------------
/test/files/somefile.1:
--------------------------------------------------------------------------------
1 | .
2 | 


--------------------------------------------------------------------------------
/test/files/somefile.1.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.1.bam


--------------------------------------------------------------------------------
/test/files/somefile.1.counts:
--------------------------------------------------------------------------------
1 | .
2 | 


--------------------------------------------------------------------------------
/test/files/somefile.2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2


--------------------------------------------------------------------------------
/test/files/somefile.2.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2.bam


--------------------------------------------------------------------------------
/test/files/somefile.2.counts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.2.counts


--------------------------------------------------------------------------------
/test/files/somefile.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.3


--------------------------------------------------------------------------------
/test/files/somefile.3.counts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.3.counts


--------------------------------------------------------------------------------
/test/files/somefile.4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.4


--------------------------------------------------------------------------------
/test/files/somefile.4.counts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.4.counts


--------------------------------------------------------------------------------
/test/files/somefile.5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.5


--------------------------------------------------------------------------------
/test/files/somefile.6:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.6


--------------------------------------------------------------------------------
/test/files/somefile.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/somefile.bam


--------------------------------------------------------------------------------
/test/files/star.my_output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/test/files/star.my_output


--------------------------------------------------------------------------------
/test/fixtures.py:
--------------------------------------------------------------------------------
  1 | from metapipe.models import *
  2 | 
  3 | # Snippets
  4 | 
  5 | basic_cmd = {
  6 |     'text': """python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh \
  7 |  somefile.txt
  8 | """,
  9 |     'template_parts': [
 10 |         'python', 'somescript.py', '-i',
 11 |         [[Input('1'), Input('2'), Input('3')],
 12 |         [Input('4'), Input('5'), Input('6')]],
 13 |         '-o', Output('1.1'), '-fgh'
 14 |     ],
 15 |     'command_parts': [
 16 |         ['python', 'somescript.py', '-i',
 17 |             [Input('1'), Input('2'), Input('3')],
 18 |             '-o', Output('1.1'), '-fgh'],
 19 |         ['python', 'somescript', '-i',
 20 |             [Input('4'), Input('5'), Input('6')],
 21 |             '-o', Output('1.1'), '-fgh']
 22 |     ]
 23 | }
 24 | 
 25 | magic_cmd = {
 26 |     'text': """python somescript.py {*.counts||} > someout
 27 | """,
 28 |     'template_parts': [
 29 |         ['python', 'somescript.py', '-i',
 30 |             [Input('*.counts', and_or='||'), 'someout'],
 31 |         ]
 32 |     ]
 33 | 
 34 | }
 35 | 
 36 | cmd_magic1 = """python somescript.py {*.counts||} > someout
 37 | """
 38 | 
 39 | cmd_magic2 = """python somescript.py {*.counts,} > someout
 40 | """
 41 | 
 42 | cmd_compound1 = """./somescript {1,2,3,4||test/files/*.counts||}
 43 | """
 44 | 
 45 | cmd_compound2 = """./somescript {1,2,3,4||test/files/*.counts,}
 46 | """
 47 | 
 48 | cmd_multiple_inputs = """bash somescript {1,2,3} --conf {4,5,6}  > {o}
 49 | """
 50 | 
 51 | cmd_suggest_output = """bash somescript {1,2,3} > {o.gz}
 52 | """
 53 | 
 54 | cmd_comment = """# Some comment
 55 | #Some other comment
 56 | bash somescript {1,2,3} > {o.gz}
 57 | """
 58 | 
 59 | cmd_multiple_close_inputs = """
 60 | java -jar trimmomatic PE {*R1_001.fastq.gz||} {*R2_001.fastq.gz||} \
 61 |     {o} {o} {o} {o} \
 62 |     ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true \
 63 |     LEADING:3 TRAILING:3
 64 | """
 65 | 
 66 | cmd_using_multiple_out = """
 67 | gzip --stdout -d {1.1-1||1.1-3} > {o}
 68 | """
 69 | 
 70 | file = """1. somedir/somefile.ext"""
 71 | 
 72 | path = """python /usr/bin/python"""
 73 | 
 74 | 
 75 | # Full input files.
 76 | 
 77 | 
 78 | overall = """
 79 | # Some top comment
 80 | # Another top comment
 81 | # A third top comment
 82 | # Woo!
 83 | 
 84 | [COMMANDS]
 85 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt
 86 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt
 87 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile
 88 | cut -f *.counts > something.file
 89 | paste *.counts > some.file #{o:some.file}
 90 | 
 91 | ./somescript {1,2,3,4||*.counts,}
 92 | 
 93 | rb somescript.rb -i {*.counts||}
 94 | python somescript.py -i {*.counts,}  #{o:*.bam}
 95 | cat {*.bam,}
 96 | 
 97 | cat {2.1} > something.my_output #{o:*.my_output}
 98 | cat {*.my_output,}
 99 | 
100 | [FILES]
101 | 1. somefile.1
102 | 2. somefile.2
103 | 3. somefile.3
104 |  #THIS IS A COMMENT
105 | 4. somefile.4
106 | 5. somefile.5
107 | 6. somefile.6
108 | 
109 | [PATHS]
110 | python /usr/bin/python
111 | # THIS IS also A COMMENT
112 | bash /usr/bin/bash
113 | rb /usr/bin/ruby
114 | cat2 module load cat2; cat2
115 | 
116 | [JOB_OPTIONS]
117 | #PBS_O_WORKDIR=~/someuser
118 | set -e;
119 | module load python
120 | # do something
121 | 
122 | [OPTIONS]
123 | module load python;
124 | set -e
125 | """
126 | 
127 | 
128 | overall_cmd_templates = [
129 |     CommandTemplate('1', [
130 |         PathToken('python', '/usr/bin/python'),
131 |         'somescript.py',
132 |         '-i',
133 |         [[
134 |             Input('1', filename='somefile.1'),
135 |             Input('2', filename='somefile.2'),
136 |             Input('3', filename='somefile.3'),
137 |         ],
138 |         [
139 |             Input('4', filename='somefile.4'),
140 |             Input('5', filename='somefile.5'),
141 |             Input('6', filename='somefile.6'),
142 |         ]],
143 |         '-o',
144 |         Output('1'),
145 |         '-fgh',
146 |         'somefile.txt',
147 |         ]),
148 | ]
149 | 
150 | 
151 | no_paths = """
152 | [COMMANDS]
153 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt
154 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt
155 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile
156 | cut -f *.counts > something.file
157 | paste *.counts > some.file #{o:some.file}
158 | 
159 | ./somescript {1,2,3,4||*.counts,}
160 | 
161 | rb somescript.rb -i {*.counts||}
162 | python somescript.py -i {*.counts,}  #{o:*.bam}
163 | cat {*.bam,}
164 | 
165 | cat {2.1} > something.my_output #{o:*.my_output}
166 | cat {*.my_output,}
167 | 
168 | [FILES]
169 | 1. somefile.1
170 | 2. somefile.2
171 | 3. somefile.3
172 | 4. somefile.4
173 | 5. somefile.5
174 | 6. somefile.6
175 | """
176 | 
177 | 
178 | no_files = """
179 | [COMMANDS]
180 | python somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt
181 | bash somescript.sh -i {1.1||1.2} -o {o} -fgh somefile.txt
182 | rb somescript.rb -i {2.1||2.2||1.1,1.2} >> somefile
183 | cut -f *.counts > something.file
184 | paste *.counts > some.file #{o:some.file}
185 | 
186 | ./somescript {1,2,3,4||*.counts,}
187 | 
188 | rb somescript.rb -i {*.counts||}
189 | python somescript.py -i {*.counts,}  #{o:*.bam}
190 | cat {*.bam,}
191 | 
192 | cat {2.1} > something.my_output #{o:*.my_output}
193 | cat {*.my_output,}
194 | 
195 | [PATHS]
196 | python /usr/bin/python
197 | bash /usr/bin/bash
198 | rb /usr/bin/ruby
199 | """
200 | 
201 | 
202 | no_cmds = """
203 | [PATHS]
204 | python /usr/bin/python
205 | bash /usr/bin/bash
206 | rb /usr/bin/ruby
207 | """
208 | 
209 | multiple_inputs = """
210 | [COMMANDS]
211 | bash somescript {1||2||3} --conf {4||5||6}  > {o}
212 | python somescript.py {1,2,3} --conf {4,5,6}  > {o}
213 | 
214 | [FILES]
215 | 1. somefile.1
216 | 2. somefile.2
217 | 3. somefile.3
218 | 4. somefile.4
219 | 5. somefile.5
220 | 6. somefile.6
221 | """
222 | 
223 | multiple_input_vals = ['bash', 'somescript',
224 |     [[Input('1', 'somefile.1')], [Input('2', 'somefile.2')],
225 |         [Input('3', 'somefile.3')]], '--conf',
226 |     [[Input('4', 'somefile.4')], [Input('5', 'somefile.5')],
227 |         [Input('6', 'somefile.6')]],
228 |     '>', Output('1', 'metapipe.1.output')]
229 | 
230 | 
231 | 
232 | multiple_outputs = """
233 | [COMMANDS]
234 | bash somescript {1||2||3} --log {o} -r {o}
235 | python somescript.py {4,5,6} --log {o} -r {o} --output {o}
236 | 
237 | [FILES]
238 | 1. somefile.1
239 | 2. somefile.2
240 | 3. somefile.3
241 | 4. somefile.4
242 | 5. somefile.5
243 | 6. somefile.6
244 | """
245 | 
246 | multiple_output_vals = ['bash', 'somescript',
247 |     [[Input('1', 'somefile.1')], [Input('2', 'somefile.2')],
248 |         [Input('3', 'somefile.3')]], '--log',
249 |     Output('1', 'metapipe.1.output'), '-r',
250 |     Output('1', 'metapipe.1.output')]
251 | 
252 | 
253 | 
254 | magic_inputs = """
255 | [COMMANDS]
256 | bash somescript {*.counts||}  > {o}
257 | bash togetherness {*.counts}  > {o}
258 | python somescript.py {*.counts||} --conf {*.counts||}  > {o}
259 | 
260 | [FILES]
261 | 1. somefile.1
262 | 2. somefile.2
263 | 3. somefile.3
264 | 4. somefile.4
265 | 5. somefile.5
266 | 6. somefile.6
267 | """
268 | 
269 | full_sample_pipeline = """
270 | [COMMANDS]
271 | # Trimmomatic
272 | java -jar trimmomatic PE {*R1_001.fastq.gz} {*R2_001.fastq.gz} \
273 |     {o} {o} {o} {o} illuminaclip LEADING:3 TRAILING:3
274 | 
275 | # Unzip the outputs from trimmomatic
276 | gzip --stdout -d {*.1.*-2.output||} > {o}
277 | gzip --stdout -d {*.1.*-4.output||} > {o}
278 | 
279 | # Cutadapt
280 | # cutadapt needs unzipped fastq files
281 | cutadapt --cut 7 -o {o} {*.2.output||}
282 | cutadapt --cut 7 -o {o} {*.3.output||}
283 | 
284 | # BowTie
285 | module load bowtie/2.2.3; \
286 | bowtie2 --very-sensitive -N 1 -p 8 -x HG_19 -q -1 {*.4.*.output||} -2 \
287 | {*.5.*.output||} -S {o}
288 | 
289 | # HTSeq
290 | module load python; \
291 | htseq-count {*.7.*.output||} gene_list > {o}
292 | 
293 | # Summary
294 | head --lines -5 {*.8.*.output} > {o}
295 | 
296 | [PATHS]
297 | trimmomatic Trimmomatic-0.35/trimmomatic-0.35.jar
298 | cutadapt ~/.local/bin/cutadapt
299 | illuminaclip ILLUMINACLIP:/gpfs/home/bhuvan/Programs/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10:2:true
300 | HG_19 /gpfs/group/stsi/data/bschrader/hg19/hg19_ucsc
301 | gene_list /gpfs/home/atorkama/iGenomes/Homo_sapiens/UCSC/hg19/Annotation/Archives/archive-2011-08-30-21-45-18/Genes/genes.gtf
302 | """
303 | 
304 | another_sample = """
305 | [COMMANDS]
306 | # Trimmomatic
307 | java -jar trimmomatic PE {1} {2} {o} {o} {o} {o} ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true LEADING:3 TRAILING:3
308 | 
309 | # Unzip the outputs from trimmomatic
310 | gzip --stdout -d {1.1-1||1.1-3} > {o}
311 | 
312 | # Cutadapt
313 | # cutadapt needs unzipped fastq files
314 | cutadapt --cut 7 -o {o} {2.*||}
315 | 
316 | # BowTie
317 | bowtie2 --very-sensitive -N 1 -p 8 -x HG_19 -q -1 {3.1} -2 {3.2} -S {o}
318 | 
319 | # HTSeq
320 | htseq-count {4.1} gene_list > {o}
321 | 
322 | # Summary
323 | head --lines -5 {5.1} > {o}
324 | 
325 | [PATHS]
326 | trimmomatic Trimmomatic-0.35/trimmomatic-0.35.jar
327 | cutadapt ~/.local/bin/cutadapt
328 | HG_19 hg19_ucsc.1.bt2
329 | gene_list genes.gtf
330 | 
331 | 
332 | [FILES]
333 | 1. somefile.1
334 | 2. somefile.2
335 | """
336 | 
337 | long_running = """
338 | [COMMANDS]
339 | cat {1||2||3||4} > {o} && sleep 1
340 | cat {1.1||1.2} && sleep 1
341 | 
342 | [FILES]
343 | 1. somefile.1
344 | 2. somefile.2
345 | 3. somefile.3
346 | 4. somefile.4
347 | 5. somefile.5
348 | 6. somefile.6
349 | """
350 | 
351 | full_output_file_name = """
352 | [COMMANDS]
353 | gzip --stdout {1} > {o.gz}
354 | cat {1.1} > {o.gz}
355 | cat {2.1} > {o.gz}
356 | cat {2.1} > {o.gz}
357 | 
358 | [FILES]
359 | 1. somefile.1
360 | 2. somefile.2
361 | 3. somefile.3
362 | 4. somefile.4
363 | 5. somefile.5
364 | 6. somefile.6
365 | """
366 | 
367 | one_step_pipeline = """
368 | [COMMANDS]
369 | cut somefile > anotherfile
370 | """
371 | 
372 | concurrent = """
373 | [COMMANDS]
374 | # Each one has 10
375 | cat {1||2||3||4||1||2||3||4||1||2} > {o}
376 | cat {1||2||3||4||1||2||3||4||1||2} > {o}
377 | cat {1||2||3||4||1||2||3||4||1||2} > {o}
378 | 
379 | [FILES]
380 | 1. somefile.1
381 | 2. somefile.2
382 | 3. somefile.3
383 | 4. somefile.4
384 | """
385 | 
386 | 
387 | magical_glob = """
388 | [COMMANDS]
389 | split -o breakdown {1} #{o:breakdown/*}
390 | cat {1.*} > {o}
391 | diff {2.*} {1}
392 | 
393 | [FILES]
394 | 1. somefile.1
395 | 2. somefile.2
396 | """
397 | 
398 | magical_glob2 = """
399 | [COMMANDS]
400 | split -o breakdown {1} #{o:breakdown/*}
401 | cat {1.*||} > {o}
402 | diff {2.*} {1}
403 | 
404 | [FILES]
405 | 1. somefile.1
406 | 2. somefile.2
407 | """
408 | 
409 | 
410 | # Job Fixtures
411 | 
412 | pbs_job_qstat_queued = ("""Job id           Name             User             Time Use S Queue
413 | ---------------- ---------------- ---------------- -------- - -----
414 | 4807             scatter          user01           12:56:34 Q batch
415 | """, None)
416 | 
417 | pbs_job_qstat_running = ("""Job id           Name             User             Time Use S Queue
418 | ---------------- ---------------- ---------------- -------- - -----
419 | 4807             scatter          user01           12:56:34 R batch
420 | """, None)
421 | 
422 | pbs_job_qsub = ("""9974279.garibaldi01-adm.cluster.net""", None)
423 | 
424 | 
425 | 
426 | sge_job_qstat_queued = ("""job-ID prior name user state submit/start at queue slots ja-task-ID
427 | -------------------------------------------------------------------
428 | 1 0.00000 hostname sgeadmin qw 09/09/2009 14:58:00 1
429 | """, None)
430 | 
431 | sge_job_qstat_running = ("""job-ID prior name user state submit/start at queue slots ja-task-ID
432 | -------------------------------------------------------------------
433 | 6 0.55500 jobscript. sgeadmin r 09/09/2009 16:18:57 all.q@node001.c 1
434 | """, None)
435 | 
436 | sge_job_qsub = ("""Your job 1 ("hostname") has been submitted""", None)
437 | 


--------------------------------------------------------------------------------
/test/mocks.py:
--------------------------------------------------------------------------------
 1 | """ A series of mocks for metapipe. """
 2 | 
 3 | from metapipe.models import Job
 4 | 
 5 | 
 6 | class MockJob(Job):
 7 | 
 8 |     def __init__(self, alias, command, depends_on=[]):
 9 |         super(MockJob, self).__init__(alias, command, depends_on)
10 |         self._submitted = False
11 |         self._done = False
12 |         self._step = 0
13 | 
14 |     def __repr__(self):
15 |         return '<MockJob: {}>'.format(self.alias)
16 | 
17 |     def submit(self):
18 |         self._step += 1
19 | 
20 |     def is_running(self):
21 |         self._step += 1
22 |         return self._step > 1 and self._step < 10
23 | 
24 |     def is_queued(self):
25 |         return False
26 | 
27 |     def is_complete(self):
28 |         return self._step > 10
29 | 
30 |     def is_fail(self):
31 |         return False
32 | 


--------------------------------------------------------------------------------
/test/test_app.py:
--------------------------------------------------------------------------------
1 | import sure
2 | 
3 | from metapipe.app import *
4 | 
5 | 
6 | def test_app():
7 |     pass
8 | 
9 | 


--------------------------------------------------------------------------------
/test/test_command.py:
--------------------------------------------------------------------------------
  1 | """ Tests for the command class. """
  2 | try:
  3 |     from unittest.mock import Mock, PropertyMock, patch
  4 | except ImportError:
  5 |     from mock import Mock, PropertyMock, patch
  6 | 
  7 | import sure
  8 | 
  9 | from .fixtures import *
 10 | 
 11 | from metapipe.parser import Parser
 12 | from metapipe.models import *
 13 | 
 14 | 
 15 | def test_eval_1():
 16 |     parser = Parser(overall)
 17 | 
 18 |     cmds = parser.consume()
 19 |     cmds[0].eval()[0].eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 20 |         '\nmodule load python\n# do something\n'
 21 |         '/usr/bin/python somescript.py -i '
 22 |         'somefile.1 somefile.2 somefile.3 -o mp.1.1.output '
 23 |         '-fgh somefile.txt')
 24 | 
 25 | 
 26 | def test_eval_2():
 27 |     parser = Parser(overall)
 28 |     cmds = parser.consume()
 29 | 
 30 |     cmds[0].eval()[1].eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 31 |         '\nmodule load python\n# do something\n'
 32 |         '/usr/bin/python somescript.py -i '
 33 |         'somefile.4 somefile.5 somefile.6 -o mp.1.2.output '
 34 |         '-fgh somefile.txt')
 35 | 
 36 | 
 37 | def test_eval_3():
 38 |     parser = Parser(overall)
 39 |     cmds = parser.consume()
 40 |     old_commands = []
 41 |     for cmd in cmds[0:1]:
 42 |         old_commands.extend(cmd.eval())
 43 | 
 44 |     cmd = cmds[1].eval()[0]
 45 |     cmd.update_dependent_files(old_commands)
 46 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 47 |         '\nmodule load python\n# do something\n'
 48 |         '/usr/bin/bash somescript.sh -i mp.1.1.output'
 49 |         ' -o mp.2.1.output -fgh somefile.txt')
 50 | 
 51 | 
 52 | def test_eval_4():
 53 |     parser = Parser(overall)
 54 |     cmds = parser.consume()
 55 |     old_commands = []
 56 |     for cmd in cmds[0:1]:
 57 |         old_commands.extend(cmd.eval())
 58 | 
 59 |     cmd = cmds[1].eval()[1]
 60 |     cmd.update_dependent_files(old_commands)
 61 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 62 |         '\nmodule load python\n# do something\n'
 63 |         '/usr/bin/bash somescript.sh -i mp.1.2.output'
 64 |         ' -o mp.2.2.output -fgh somefile.txt')
 65 | 
 66 | 
 67 | def test_eval_5():
 68 |     parser = Parser(overall)
 69 |     cmds = parser.consume()
 70 |     old_commands = []
 71 |     for cmd in cmds[0:2]:
 72 |         old_commands.extend(cmd.eval())
 73 | 
 74 |     cmd = cmds[2].eval()[0]
 75 |     cmd.update_dependent_files(old_commands)
 76 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 77 |         '\nmodule load python\n# do something\n'
 78 |         '/usr/bin/ruby somescript.rb -i mp.2.1.output'
 79 |         ' >> somefile')
 80 | 
 81 | 
 82 | def test_eval_6():
 83 |     parser = Parser(overall)
 84 |     cmds = parser.consume()
 85 |     old_commands = []
 86 |     for cmd in cmds[0:2]:
 87 |         old_commands.extend(cmd.eval())
 88 | 
 89 |     cmd = cmds[2].eval()[1]
 90 |     cmd.update_dependent_files(old_commands)
 91 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
 92 |         '\nmodule load python\n# do something\n'
 93 |         '/usr/bin/ruby somescript.rb -i mp.2.2.output'
 94 |         ' >> somefile')
 95 | 
 96 | 
 97 | def test_eval_7():
 98 |     parser = Parser(overall)
 99 |     cmds = parser.consume()
100 |     old_commands = []
101 |     for cmd in cmds[0:2]:
102 |         old_commands.extend(cmd.eval())
103 | 
104 |     cmd = cmds[2].eval()[2]
105 |     cmd.update_dependent_files(old_commands)
106 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
107 |         '\nmodule load python\n# do something\n/usr/bin/ruby somescript.rb -i '
108 |         'mp.1.1.output mp.1.2.output >> somefile')
109 | 
110 | 
111 | def test_eval_8():
112 |     parser = Parser(overall)
113 |     cmds = parser.consume()
114 |     old_commands = []
115 |     for cmd in cmds[0:3]:
116 |         old_commands.extend(cmd.eval())
117 | 
118 |     cmd = cmds[3].eval()[0]
119 |     cmd.update_dependent_files(old_commands)
120 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
121 |         '\nmodule load python\n# do something\n'
122 |         'cut -f *.counts > something.file')
123 | 
124 | 
125 | def test_eval_9():
126 |     parser = Parser(overall)
127 |     cmds = parser.consume()
128 |     old_commands = []
129 |     for cmd in cmds[0:4]:
130 |         old_commands.extend(cmd.eval())
131 | 
132 |     cmd = cmds[4].eval()[0]
133 |     cmd.update_dependent_files(old_commands)
134 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
135 |         '\nmodule load python\n# do something\n'
136 |         'paste *.counts > some.file # some.file')
137 | 
138 | 
139 | def test_eval_10():
140 |     parser = Parser(overall)
141 |     cmds = parser.consume()
142 |     old_commands = []
143 |     for cmd in cmds[0:5]:
144 |         old_commands.extend(cmd.eval())
145 | 
146 |     cmd = cmds[5].eval()[0]
147 |     cmd.update_dependent_files(old_commands)
148 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
149 |         '\nmodule load python\n# do something\n'
150 |         './somescript somefile.1 somefile.2 '
151 |         'somefile.3 somefile.4')
152 | 
153 | 
154 | def test_eval_11():
155 |     parser = Parser(overall)
156 |     cmds = parser.consume()
157 |     old_commands = []
158 |     for cmd in cmds[0:5]:
159 |         old_commands.extend(cmd.eval())
160 | 
161 |     cmd = cmds[5].eval()[1]
162 |     cmd.update_dependent_files(old_commands)
163 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
164 |         '\nmodule load python\n# do something\n'
165 |         './somescript somefile.1.counts somefile.2.counts '
166 |         'somefile.3.counts somefile.4.counts')
167 | 
168 | 
169 | def test_eval_12():
170 |     parser = Parser(overall)
171 |     cmds = parser.consume()
172 |     old_commands = []
173 |     for cmd in cmds[0:6]:
174 |         old_commands.extend(cmd.eval())
175 | 
176 |     cmd = cmds[6].eval()[0]
177 |     cmd.update_dependent_files(old_commands)
178 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
179 |         '\nmodule load python\n# do something\n'
180 |         '/usr/bin/ruby somescript.rb -i somefile.1.counts')
181 | 
182 | 
183 | def test_eval_13():
184 |     parser = Parser(overall)
185 |     cmds = parser.consume()
186 |     old_commands = []
187 |     for cmd in cmds[0:6]:
188 |         old_commands.extend(cmd.eval())
189 | 
190 |     cmd = cmds[6].eval()[1]
191 |     cmd.update_dependent_files(old_commands)
192 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
193 |         '\nmodule load python\n# do something\n'
194 |         '/usr/bin/ruby somescript.rb -i somefile.2.counts')
195 | 
196 | 
197 | def test_eval_14():
198 |     parser = Parser(overall)
199 |     cmds = parser.consume()
200 |     old_commands = []
201 |     for cmd in cmds[0:6]:
202 |         old_commands.extend(cmd.eval())
203 | 
204 |     cmd = cmds[6].eval()[2]
205 |     cmd.update_dependent_files(old_commands)
206 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
207 |         '\nmodule load python\n# do something\n'
208 |         '/usr/bin/ruby somescript.rb -i somefile.3.counts')
209 | 
210 | 
211 | def test_eval_14():
212 |     parser = Parser(overall)
213 |     cmds = parser.consume()
214 |     old_commands = []
215 |     for cmd in cmds[0:6]:
216 |         old_commands.extend(cmd.eval())
217 | 
218 |     cmd = cmds[6].eval()[3]
219 |     cmd.update_dependent_files(old_commands)
220 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
221 |         '\nmodule load python\n# do something\n'
222 |         '/usr/bin/ruby somescript.rb -i somefile.4.counts')
223 | 
224 | 
225 | def test_eval_15():
226 |     parser = Parser(overall)
227 |     cmds = parser.consume()
228 |     old_commands = []
229 |     for cmd in cmds[0:7]:
230 |         old_commands.extend(cmd.eval())
231 | 
232 |     cmd = cmds[7].eval()[0]
233 |     cmd.update_dependent_files(old_commands)
234 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
235 |         '\nmodule load python\n# do something\n'
236 |         '/usr/bin/python somescript.py -i somefile.1.counts'
237 |         ' somefile.2.counts somefile.3.counts somefile.4.counts # *.bam')
238 | 
239 | 
240 | def test_eval_16():
241 |     parser = Parser(overall)
242 |     cmds = parser.consume()
243 |     old_commands = []
244 |     for cmd in cmds[0:8]:
245 |         old_commands.extend(cmd.eval())
246 | 
247 |     cmd = cmds[8].eval()[0]
248 |     cmd.update_dependent_files(old_commands)
249 |     cmd.eval().should.equal('#PBS_O_WORKDIR=~/someuser\nset -e;'
250 |         '\nmodule load python\n# do something\n'
251 |         'cat somefile.1.bam somefile.2.bam somefile.bam')
252 | 
253 | 
254 | def test_eval_16_deps():
255 |     parser = Parser(overall)
256 |     cmds = parser.consume()
257 |     old_commands = []
258 |     for cmd in cmds[0:8]:
259 |         old_commands.extend(cmd.eval())
260 | 
261 |     cmd = cmds[8].eval()[0]
262 |     cmd.update_dependent_files(old_commands)
263 |     cmd.depends_on.should.have.length_of(1)
264 | 
265 | 
266 | def test_eval_multiple_inputs():
267 |     parser = Parser(multiple_inputs)
268 |     cmds = parser.consume()
269 |     old_commands = []
270 | 
271 |     cmd = cmds[0].eval()[0]
272 |     print(cmd)
273 |     cmd.update_dependent_files(old_commands)
274 |     cmd.eval().should.equal('bash somescript somefile.1 --conf somefile.4 > '
275 |         'mp.1.1.output')
276 | 
277 | 
278 | def test_multiple_outputs1():
279 |     parser = Parser(multiple_outputs)
280 |     cmds = parser.consume()
281 |     old_commands = []
282 | 
283 |     cmd = cmds[0].eval()[0]
284 |     cmd.update_dependent_files(old_commands)
285 |     cmd.eval().should.equal('bash somescript somefile.1 --log'
286 |         ' mp.1.1-1.output -r mp.1.1-2.output')
287 | 
288 | 
289 | def test_multiple_outputs2():
290 |     parser = Parser(multiple_outputs)
291 |     cmds = parser.consume()
292 |     old_commands = []
293 | 
294 |     cmd = cmds[1].eval()[0]
295 |     cmd.update_dependent_files(old_commands)
296 |     cmd.eval().should.equal('python somescript.py somefile.4 somefile.5 '
297 |         'somefile.6 --log mp.2.1-1.output -r mp.2.1-2.output '
298 |         '--output mp.2.1-3.output')
299 | 
300 | 
301 | def test_another_sample_pipeline():
302 |     parser = Parser(another_sample)
303 | 
304 |     cmds = parser.consume()
305 | 
306 |     old_commands = []
307 | 
308 |     cmd = cmds[0].eval()[0]
309 |     cmd.update_dependent_files(old_commands)
310 |     cmd.eval().should.equal('# Trimmomatic\n'
311 |         'java -jar Trimmomatic-0.35/trimmomatic-0.35.jar '
312 |         'PE somefile.1 somefile.2 mp.1.1-1.output mp.1.1-2.output '
313 |         'mp.1.1-3.output mp.1.1-4.output '
314 |         'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true '
315 |         'LEADING:3 TRAILING:3')
316 | 
317 | 
318 | def test_another_sample_pipeline_1():
319 |     parser = Parser(another_sample)
320 | 
321 |     cmds = parser.consume()
322 | 
323 |     old_commands = []
324 |     for cmd in cmds[0:1]:
325 |         old_commands.extend(cmd.eval())
326 | 
327 |     cmd = cmds[1].eval()[0]
328 |     cmd.update_dependent_files(old_commands)
329 |     cmd.eval().should.equal('# Unzip the outputs from trimmomatic\n'
330 |         'gzip --stdout -d mp.1.1-1.output > '
331 |         'mp.2.1.output')
332 | 
333 | 
334 | def test_another_sample_pipeline_1_deps():
335 |     parser = Parser(another_sample)
336 | 
337 |     cmds = parser.consume()
338 | 
339 |     old_commands = []
340 |     for cmd in cmds[0:1]:
341 |         old_commands.extend(cmd.eval())
342 | 
343 |     cmd = cmds[1].eval()[0]
344 |     cmd.update_dependent_files(old_commands)
345 |     cmd.depends_on.should.have.length_of(1)
346 |     cmd.depends_on[0].should.equal('1.1')
347 | 
348 | 
349 | def test_another_sample_pipeline_2():
350 |     parser = Parser(another_sample)
351 | 
352 |     cmds = parser.consume()
353 | 
354 |     old_commands = []
355 |     for cmd in cmds[0:2]:
356 |         old_commands.extend(cmd.eval())
357 | 
358 |     cmd = cmds[2].eval()[0]
359 |     cmd.update_dependent_files(old_commands)
360 |     cmd.eval().should.equal('# Cutadapt\n# cutadapt needs unzipped fastq '
361 |         'files\n~/.local/bin/cutadapt --cut 7 -o '
362 |         'mp.3.1.output mp.2.1.output')
363 | 
364 | 
365 | def test_another_sample_pipeline_2():
366 |     parser = Parser(another_sample)
367 | 
368 |     cmds = parser.consume()
369 | 
370 |     old_commands = []
371 |     for cmd in cmds[0:2]:
372 |         old_commands.extend(cmd.eval())
373 | 
374 |     cmd = cmds[2].eval()[1]
375 |     cmd.update_dependent_files(old_commands)
376 |     cmd.eval().should.equal('# Cutadapt\n# cutadapt needs unzipped fastq '
377 |         'files\n~/.local/bin/cutadapt --cut 7 -o '
378 |         'mp.3.2.output mp.2.2.output')
379 | 
380 | 
381 | def test_long_running_1():
382 |     parser = Parser(long_running)
383 | 
384 |     old_commands = []
385 | 
386 |     templates = parser.consume()
387 | 
388 |     cmd = templates[0].eval()[0]
389 |     cmd.update_dependent_files(old_commands)
390 |     cmd.eval().should.equal('cat somefile.1 > mp.1.1.output && sleep 1')
391 | 
392 | 
393 | def test_long_running_2():
394 |     parser = Parser(long_running)
395 | 
396 |     templates = parser.consume()
397 | 
398 |     old_commands = []
399 | 
400 |     for cmd in templates[0:1]:
401 |         old_commands.extend(cmd.eval())
402 |     cmd = templates[1].eval()[0]
403 | 
404 |     cmd.update_dependent_files(old_commands)
405 |     cmd.eval().should.equal('cat mp.1.1.output && '
406 |         'sleep 1')
407 | 
408 | 
409 | def test_full_output_file_name():
410 |     parser = Parser(full_output_file_name)
411 | 
412 |     templates = parser.consume()
413 | 
414 |     old_commands = []
415 | 
416 |     cmd = templates[0].eval()[0]
417 | 
418 |     cmd.update_dependent_files(old_commands)
419 |     cmd.eval().should.equal('gzip --stdout somefile.1 > mp.1.1.output.gz')
420 | 
421 | 
422 | def test_full_output_file_name_2():
423 |     parser = Parser(full_output_file_name)
424 | 
425 |     templates = parser.consume()
426 | 
427 |     old_commands = []
428 | 
429 |     for cmd in templates[0:1]:
430 |         old_commands.extend(cmd.eval())
431 |     cmd = templates[1].eval()[0]
432 | 
433 |     cmd.update_dependent_files(old_commands)
434 |     cmd.eval().should.equal('cat mp.1.1.output.gz > mp.2.1.output.gz')
435 | 
436 | 
437 | def test_magical_glob():
438 |     parser = Parser(magical_glob)
439 |     templates = parser.consume()
440 |     old_commands = []
441 | 
442 |     for cmd in templates[0:1]:
443 |         old_commands.extend(cmd.eval())
444 | 
445 |     with patch('metapipe.models.Input.files', new_callable=PropertyMock) as mock_files:
446 |         mock_files.return_value = ['mp.1.1.output', 'mp.1.2.output']
447 |         cmd = templates[1].eval()[0]
448 | 
449 |         cmd.update_dependent_files(old_commands)
450 |         cmd.eval().should.equal('cat mp.1.1.output mp.1.2.output > mp.2.1.output')
451 | 
452 | 
453 | def test_magical_glob2():
454 |     parser = Parser(magical_glob2)
455 |     templates = parser.consume()
456 |     old_commands = []
457 | 
458 |     for cmd in templates[0:1]:
459 |         old_commands.extend(cmd.eval())
460 | 
461 |     with patch('metapipe.models.Input.files', new_callable=PropertyMock) as mock_files:
462 |         mock_files.return_value = ['mp.1.1.output', 'mp.1.2.output']
463 |         cmd = templates[1].eval()[0]
464 | 
465 |     cmd.update_dependent_files(old_commands)
466 |     cmd.eval().should.equal('cat mp.1.1.output > mp.2.1.output')
467 | 


--------------------------------------------------------------------------------
/test/test_command_template.py:
--------------------------------------------------------------------------------
  1 | """ Tests for the output of the command template. """
  2 | 
  3 | import sure
  4 | 
  5 | from .fixtures import *
  6 | 
  7 | from metapipe.parser import Parser
  8 | from metapipe.models import *
  9 | 
 10 | 
 11 | def test_eval_1():
 12 |     parser = Parser(overall)
 13 | 
 14 |     templates = parser.consume()
 15 | 
 16 |     vals = [CommentToken(['#PBS_O_WORKDIR=~/someuser']),
 17 |         CommentToken(['set -e;']),
 18 |         CommentToken(['module load python']),
 19 |         CommentToken(['# do something']),
 20 |         PathToken('python', '/usr/bin/python'), 'somescript.py', '-i',
 21 |         Input('1', 'somefile.1'),
 22 |         Input('2', 'somefile.2'),
 23 |         Input('3', 'somefile.3'),
 24 |         '-o', Output('1.1', 'metapipe.1.1.output'),
 25 |         '-fgh', 'somefile.txt']
 26 |     cmd = templates[0].eval()[0]
 27 |     for i, part in enumerate(cmd.parts):
 28 |         vals[i].should.equal(part)
 29 | 
 30 | 
 31 | def test_eval_2():
 32 |     parser = Parser(overall)
 33 |     templates = parser.consume()
 34 | 
 35 |     vals = [CommentToken(['#PBS_O_WORKDIR=~/someuser']),
 36 |         CommentToken(['set -e;']),
 37 |         CommentToken(['module load python']),
 38 |         CommentToken(['# do something']),
 39 |         PathToken('python', '/usr/bin/python'), 'somescript.py', '-i',
 40 |         Input('4', 'somefile.4'),
 41 |         Input('5', 'somefile.5'),
 42 |         Input('6', 'somefile.6'),
 43 |         '-o', Output('1.2', 'metapipe.1.2.output'),
 44 |         '-fgh', 'somefile.txt']
 45 |     cmd = templates[0].eval()[1]
 46 |     for i, part in enumerate(cmd.parts):
 47 |         vals[i].should.equal(part)
 48 | 
 49 | 
 50 | def test_eval_multiple_inputs1():
 51 |     parser = Parser(multiple_inputs)
 52 | 
 53 |     templates = parser.consume()
 54 | 
 55 |     vals = ['bash', 'somescript',
 56 |         Input('1', 'somefile.1'), '--conf',
 57 |         Input('4', 'somefile.4'),
 58 |         '>', Output('1.1', 'metapipe.1.1.output')]
 59 |     cmd = templates[0].eval()[0]
 60 |     for i, part in enumerate(cmd.parts):
 61 |         vals[i].should.equal(part)
 62 | 
 63 | 
 64 | def test_eval_multiple_inputs2():
 65 |     parser = Parser(multiple_inputs)
 66 | 
 67 |     templates = parser.consume()
 68 | 
 69 |     vals = ['bash', 'somescript',
 70 |         Input('2', 'somefile.2'), '--conf',
 71 |         Input('5', 'somefile.5'),
 72 |         '>', Output('1.2', 'metapipe.1.2.output')]
 73 |     cmd = templates[0].eval()[1]
 74 |     for i, part in enumerate(cmd.parts):
 75 |         vals[i].should.equal(part)
 76 | 
 77 | 
 78 | def test_eval_multiple_inputs3():
 79 |     parser = Parser(multiple_inputs)
 80 | 
 81 |     templates = parser.consume()
 82 | 
 83 |     vals = ['bash', 'somescript',
 84 |         Input('3', 'somefile.3'), '--conf',
 85 |         Input('6', 'somefile.6'),
 86 |         '>', Output('1.3', 'metapipe.1.3.output')]
 87 |     cmd = templates[0].eval()[2]
 88 |     for i, part in enumerate(cmd.parts):
 89 |         vals[i].should.equal(part)
 90 | 
 91 | 
 92 | def test_eval_multiple_inputs4():
 93 |     parser = Parser(multiple_inputs)
 94 | 
 95 |     templates = parser.consume()
 96 | 
 97 |     vals = ['python', 'somescript.py',
 98 |         Input('1', 'somefile.1'),
 99 |         Input('2', 'somefile.2'),
100 |         Input('3', 'somefile.3'), '--conf',
101 |         Input('4', 'somefile.4'),
102 |         Input('5', 'somefile.5'),
103 |         Input('6', 'somefile.6'),
104 |         '>', Output('2.1', 'metapipe.2.1.output')]
105 |     cmd = templates[1].eval()[0]
106 |     for i, part in enumerate(cmd.parts):
107 |         vals[i].should.equal(part)
108 | 
109 | 
110 | def test_eval_magic_input():
111 |     parser = Parser(magic_inputs)
112 | 
113 |     templates = parser.consume()
114 | 
115 |     vals = ['bash', 'somescript',
116 |         Input('*.counts', 'somefile.1'),
117 |         '>', Output('1.1', 'metapipe.1.1.output')]
118 |     cmd = templates[0].eval()[0]
119 |     for i, part in enumerate(cmd.parts):
120 |         vals[i].should.equal(part)
121 | 
122 | 
123 | def test_multiple_outputs():
124 |     parser = Parser(multiple_outputs)
125 | 
126 |     templates = parser.consume()
127 | 
128 |     vals = ['bash', 'somescript',
129 |         Input('1', 'somefile.1'), '--log',
130 |         Output('1.1-1', 'metapipe.1.1-1.output'), '-r',
131 |         Output('1.1-2', 'metapipe.1.1-2.output')]
132 | 
133 |     cmd = templates[0].eval()[0]
134 |     for i, part in enumerate(cmd.parts):
135 |         vals[i].should.equal(part)
136 | 
137 | 
138 | 
139 | def test_another_sample_pipeline():
140 |     parser = Parser(another_sample)
141 | 
142 |     templates = parser.consume()
143 | 
144 | 
145 |     vals = [CommentToken(['#', ' Trimmomatic']),'java', '-jar',
146 |         PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'),
147 |         'PE', Input('1'), Input('2'),
148 |         Output('1.1-1', 'metapipe.1.output'), Output('1.1-2', 'metapipe.1.output'),
149 |         Output('1.1-3', 'metapipe.1.output'), Output('1.1-4', 'metapipe.1.output'),
150 |         'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true',
151 |         'LEADING:3', 'TRAILING:3'
152 |     ]
153 | 
154 |     cmd = templates[0].eval()[0]
155 |     for i, part in enumerate(cmd.parts):
156 |         vals[i].should.equal(part)
157 | 
158 | 
159 | def test_another_sample_pipeline_1():
160 |     parser = Parser(another_sample)
161 | 
162 |     templates = parser.consume()
163 | 
164 | 
165 |     vals = [CommentToken(['#', ' Unzip the outputs from trimmomatic']),
166 |         'gzip', '--stdout', '-d',
167 |         Input('1.1-1'), '>',
168 |         Output('2.1', 'metapipe.2.1.output')]
169 | 
170 |     cmd = templates[1].eval()[0]
171 |     for i, part in enumerate(cmd.parts):
172 |         vals[i].should.equal(part)
173 | 
174 | 
175 | def test_another_sample_pipeline_2():
176 |     parser = Parser(another_sample)
177 | 
178 |     templates = parser.consume()
179 | 
180 | 
181 |     vals = [CommentToken(['#', ' Cutadapt']),
182 |         CommentToken(['#', ' cutadapt needs unzipped fastq files']),
183 |         PathToken('cutadapt', '~/.local/bin/cutadapt'), '--cut', '7',
184 |         '-o', Output('3.1', 'metapipe.3.1.output'), Input('2.*')]
185 | 
186 |     cmd = templates[2].eval()[0]
187 |     for i, part in enumerate(cmd.parts):
188 |         vals[i].should.equal(part)
189 | 
190 | 
191 | def test_long_running_1():
192 |     parser = Parser(long_running)
193 | 
194 |     templates = parser.consume()
195 | 
196 | 
197 |     vals = ['cat', Input('1', 'somefile.1'), '>',
198 |         Output('1.1', 'metapipe.1.1.output'), '&&', 'sleep', '1']
199 | 
200 |     cmd = templates[0].eval()[0]
201 |     for i, part in enumerate(cmd.parts):
202 |         vals[i].should.equal(part)
203 | 
204 | 
205 | def test_long_running_2():
206 |     parser = Parser(long_running)
207 | 
208 |     templates = parser.consume()
209 | 
210 | 
211 |     vals = ['cat', Input('1.1', 'metapipe.1.1.output'), '&&', 'sleep', '1']
212 | 
213 |     cmd = templates[1].eval()[0]
214 |     for i, part in enumerate(cmd.parts):
215 |         vals[i].should.equal(part)
216 | 
217 | 
218 | def test_output_file_name():
219 |     parser = Parser(full_output_file_name)
220 | 
221 |     templates = parser.consume()
222 | 
223 | 
224 |     vals = ['gzip', '--stdout', Input('1', 'somefile.1'), '>',
225 |         Output('1.1', 'metapipe.1.1.output.gz')]
226 | 
227 |     cmd = templates[0].eval()[0]
228 |     for i, part in enumerate(cmd.parts):
229 |         vals[i].should.equal(part)
230 | 
231 | def test_magical_glob():
232 |     parser = Parser(magical_glob)
233 |     templates = parser.consume()
234 | 
235 |     vals = ['cat', Input('1.*', ''), '>',
236 |         Output('2.1', 'mp.2.1.output')]
237 | 
238 |     cmd = templates[1].eval()[0]
239 |     for i, part in enumerate(cmd.parts):
240 |         vals[i].should.equal(part)
241 | 
242 | def test_magical_glob():
243 |     parser = Parser(magical_glob2)
244 |     templates = parser.consume()
245 | 
246 |     vals = ['cat', Input('1.*', ''), '>',
247 |         Output('2.1', 'mp.2.1.output')]
248 | 
249 |     cmd = templates[1].eval()[0]
250 |     for i, part in enumerate(cmd.parts):
251 |         vals[i].should.equal(part)
252 | 


--------------------------------------------------------------------------------
/test/test_command_template_factory.py:
--------------------------------------------------------------------------------
  1 | """ Tests for the output of the command template factory. """
  2 | 
  3 | import sure
  4 | 
  5 | from .fixtures import *
  6 | 
  7 | from metapipe.parser import Parser
  8 | from metapipe.models import *
  9 | 
 10 | 
 11 | def test_multiple_inputs():
 12 |     parser = Parser(multiple_inputs)
 13 | 
 14 |     cmds = parser.consume()
 15 |     for i, part in enumerate(cmds[0].parts):
 16 |         multiple_input_vals[i].should.equal(part)
 17 | 
 18 | 
 19 | def test_multiple_outputs():
 20 |     parser = Parser(multiple_outputs)
 21 | 
 22 |     cmds = parser.consume()
 23 |     for i, part in enumerate(cmds[0].parts):
 24 |         multiple_output_vals[i].should.equal(part)
 25 | 
 26 | 
 27 | def test_full_sample_pipeline():
 28 |     parser = Parser(full_sample_pipeline)
 29 | 
 30 |     cmds = parser.consume()
 31 | 
 32 |     vals = [CommentToken(['#', ' Trimmomatic']), 'java', '-jar',
 33 |         PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'),
 34 |         'PE', [[Input('*R1_001.fastq.gz')]], [[Input('*R2_001.fastq.gz')]],
 35 |         Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'),
 36 |         Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'),
 37 |         PathToken('illuminaclip', 'ILLUMINACLIP:/gpfs/home/bhuvan/Programs/Trimmomatic-0.32/adapters/TruSeq3-PE.fa:2:30:10:2:true'),
 38 |         'LEADING:3', 'TRAILING:3'
 39 |     ]
 40 | 
 41 |     for i, part in enumerate(cmds[0].parts):
 42 |         vals[i].should.equal(part)
 43 | 
 44 | 
 45 | def test_another_sample_pipeline():
 46 |     parser = Parser(another_sample)
 47 | 
 48 |     cmds = parser.consume()
 49 | 
 50 | 
 51 |     vals = [CommentToken(['#', ' Trimmomatic']), 'java', '-jar',
 52 |         PathToken('trimmomatic', 'Trimmomatic-0.35/trimmomatic-0.35.jar>'),
 53 |         'PE', [[Input('1')]], [[Input('2')]],
 54 |         Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'),
 55 |         Output('1', 'metapipe.1.output'), Output('1', 'metapipe.1.output'),
 56 |         'ILLUMINACLIP:Trimmomatic-0.35/adapters/TruSeq3-PE.fa:2:30:10:2:true',
 57 |         'LEADING:3', 'TRAILING:3'
 58 |     ]
 59 | 
 60 |     for i, part in enumerate(cmds[0].parts):
 61 |         vals[i].should.equal(part)
 62 | 
 63 | 
 64 | def test_another_sample_pipeline_1():
 65 |     parser = Parser(another_sample)
 66 | 
 67 |     cmds = parser.consume()
 68 | 
 69 | 
 70 |     vals = [CommentToken(['#', ' Unzip the outputs from trimmomatic']),
 71 |         'gzip', '--stdout', '-d',
 72 |         [[Input('1.1-1')], [Input('1.1-3')]], '>',
 73 |         Output('2', 'metapipe.2.output')]
 74 | 
 75 |     for i, part in enumerate(cmds[1].parts):
 76 |         vals[i].should.equal(part)
 77 | 
 78 | 
 79 | def test_another_sample_pipeline_2():
 80 |     parser = Parser(another_sample)
 81 | 
 82 |     cmds = parser.consume()
 83 | 
 84 | 
 85 |     vals = [CommentToken(['#', ' Cutadapt']),
 86 |         CommentToken(['#', ' cutadapt needs unzipped fastq files']),
 87 |         PathToken('cutadapt', '~/.local/bin/cutadapt'), '--cut', '7', '-o',
 88 |         Output('3', 'metapipe.3.output'),
 89 |             [[Input('2.*')]]]
 90 | 
 91 |     for i, part in enumerate(cmds[2].parts):
 92 |         vals[i].should.equal(part)
 93 | 
 94 | 
 95 | def test_long_running_1():
 96 |     parser = Parser(long_running)
 97 | 
 98 |     cmds = parser.consume()
 99 | 
100 | 
101 |     vals = ['cat', [[Input('1', 'somefile.1')],
102 |         [Input('2', 'somefile.2')], [Input('3', 'somefile.3')],
103 |         [Input('4', 'somefile.4')]], '>',
104 |         Output('1', 'metapipe.1.output'), '&&', 'sleep', '1']
105 | 
106 |     for i, part in enumerate(cmds[0].parts):
107 |         vals[i].should.equal(part)
108 | 
109 | 
110 | def test_long_running_2():
111 |     parser = Parser(long_running)
112 | 
113 |     cmds = parser.consume()
114 | 
115 | 
116 |     vals = ['cat', [[Input('1.1')], [Input('1.2')]], '&&', 'sleep', '1']
117 | 
118 |     for i, part in enumerate(cmds[1].parts):
119 |         vals[i].should.equal(part)
120 | 
121 | 
122 | def test_long_running_2_deps():
123 |     parser = Parser(long_running)
124 | 
125 |     cmds = parser.consume()
126 |     cmds[1]._dependencies.should.have.length_of(1)
127 | 
128 | 
129 | def test_one_step_pipeline():
130 |     parser = Parser(one_step_pipeline)
131 |     cmds = parser.consume()
132 | 
133 |     vals = ['cut', 'somefile', '>', 'anotherfile']
134 |     for i, part in enumerate(cmds[0].parts):
135 |         vals[i].should.equal(part)
136 | 
137 | 
138 | def test_one_step_pipeline():
139 |     parser = Parser(one_step_pipeline)
140 |     cmds = parser.consume()
141 | 
142 |     vals = ['cut', 'somefile', '>', 'anotherfile']
143 |     for i, part in enumerate(cmds[0].parts):
144 |         vals[i].should.equal(part)
145 | 
146 | 


--------------------------------------------------------------------------------
/test/test_grammar.py:
--------------------------------------------------------------------------------
  1 | """ A test of PyParsing. """
  2 | 
  3 | import sure
  4 | 
  5 | from metapipe.models.grammar import Grammar
  6 | 
  7 | from .fixtures import *
  8 | 
  9 | 
 10 | def test_cmd():
 11 |     res = Grammar.command.parseString(basic_cmd['text'])
 12 |     val = ['python somescript.py -i ', '-o ', '-fgh  somefile.txt']
 13 | 
 14 |     for i, c in enumerate(res.command):
 15 |         c.should.equal(val[i])
 16 | 
 17 |     res._in[0][0][0].should.equal('1')
 18 |     res._in[0][0][2].should.equal('2')
 19 |     res._in[0][0][4].should.equal('3')
 20 |     res._in[0][0][6].should.equal('4')
 21 |     res._in[0][0][8].should.equal('5')
 22 |     res._in[0][0][10].should.equal('6')
 23 |     res._in[1][0][0].should.equal('o')
 24 | 
 25 | 
 26 | def test_cmd_output_name():
 27 |     res = Grammar.command.parseString(cmd_suggest_output)
 28 |     val = ['bash somescript ', '> ']
 29 | 
 30 |     for i, c in enumerate(res.command):
 31 |         c.should.equal(val[i])
 32 | 
 33 |     res._in[1][0][0].should.equal('o.gz')
 34 | 
 35 | 
 36 | def test_cmd_magic1():
 37 |     res = Grammar.command.parseString(cmd_magic1)
 38 |     val = ['python somescript.py ', '> someout']
 39 | 
 40 |     for i, c in enumerate(res.command):
 41 |         c.should.equal(val[i])
 42 | 
 43 |     res._in[0][0][0].should.equal('*.counts')
 44 | 
 45 | 
 46 | def test_cmd_magic2():
 47 |     res = Grammar.command.parseString(cmd_magic2)
 48 |     val = ['python somescript.py ', '> someout']
 49 | 
 50 |     for i, c in enumerate(res.command):
 51 |         c.should.equal(val[i])
 52 |     res._in[0][0][0].should.equal('*.counts')
 53 | 
 54 | 
 55 | def test_cmd_compund1():
 56 |     res = Grammar.command.parseString(cmd_compound1)
 57 |     val = ['./somescript ', ['1', '2', '3', '4'], ['test/files/*.counts'], '<<OR>>']
 58 | 
 59 |     for i, c in enumerate(res.command):
 60 |         c.should.equal(val[i])
 61 |     res._in[0][0][0].should.equal('1')
 62 |     res._in[0][0][2].should.equal('2')
 63 |     res._in[0][0][4].should.equal('3')
 64 |     res._in[0][0][6].should.equal('4')
 65 |     res._in[0][0][8].should.equal('test/files/*.counts')
 66 | 
 67 | 
 68 | def test_cmd_compund2():
 69 |     res = Grammar.command.parseString(cmd_compound2)
 70 |     val = ['./somescript ', ['1', '<<AND>>', '2', '<<AND>>', '3', '<<AND>>', '4', '<<OR>>', 'test/files/*.counts', '<<AND>>']]
 71 | 
 72 |     for i, c in enumerate(res.command):
 73 |         c.should.equal(val[i])
 74 |     res._in[0][0][0].should.equal('1')
 75 |     res._in[0][0][2].should.equal('2')
 76 |     res._in[0][0][4].should.equal('3')
 77 |     res._in[0][0][6].should.equal('4')
 78 |     res._in[0][0][8].should.equal('test/files/*.counts')
 79 | 
 80 | 
 81 | def test_file():
 82 | 	res = Grammar.file.parseString(file)
 83 | 	res.alias.should.equal('1')
 84 | 	res.filename.should.equal('somedir/somefile.ext')
 85 | 
 86 | 
 87 | def test_path():
 88 | 	res = Grammar.path.parseString(path)
 89 | 	res.alias.should.equal('python')
 90 | 	res.path.should.equal('/usr/bin/python')
 91 | 
 92 | 
 93 | def test_overall():
 94 | 	res = Grammar.overall.parseString(overall)
 95 | 
 96 | 	res['COMMANDS'][0][0].should.equal('python')
 97 | 	res['COMMANDS'][0][1].should.equal(' somescript.py -i {1,2,3||4,5,6} -o {o} -fgh somefile.txt')
 98 | 
 99 | 
100 | def test_full_sample_pipeline():
101 | 	res = Grammar.overall.parseString(full_sample_pipeline)
102 | 
103 | 	res['COMMANDS'][0][0].should.equal('#')
104 | 	res['COMMANDS'][0][1].should.equal(' Trimmomatic')
105 | 	res['COMMANDS'][1][0].should.equal('java')
106 | 
107 | 
108 | def test_multiple_inputs():
109 | 	res = Grammar.command.parseString(cmd_multiple_inputs)
110 | 	res._in.should.have.length_of(3)
111 | 
112 | 
113 | def test_multiple_close_inputs():
114 | 	res = Grammar.command.parseString(cmd_multiple_close_inputs)
115 | 	res._in.should.have.length_of(6)
116 | 
117 | 
118 | def test_full_pipeline_1():
119 | 	res = Grammar.command.parseString(cmd_using_multiple_out)
120 | 	res._in.should.have.length_of(2)
121 | 
122 | 
123 | def test_multiple_word_paths():
124 |     res = Grammar.overall.parseString(overall)
125 |     path = Grammar.path.parseString(''.join(res['PATHS'][4]))
126 |     path.path.should.equal('module load cat2; cat2')
127 | 


--------------------------------------------------------------------------------
/test/test_job.py:
--------------------------------------------------------------------------------
 1 | """ Tests for the Job ABC
 2 | 
 3 | author: Brian Schrader
 4 | since: 2016-01-27
 5 | """
 6 | 
 7 | from __future__ import print_function
 8 | 
 9 | import sure
10 | 
11 | from metapipe.models import *
12 | 
13 | from .fixtures import *
14 | 
15 | 
16 | def test_new_job():
17 |     alias, command, depends_on = 'test', Command([], []), []
18 |     job = Job(alias, command, depends_on)
19 |     job.alias.should.equal(alias)
20 |     job.command.should.equal(command)
21 |     job.depends_on.should.equal(depends_on)
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/test/test_local_job.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sure
 4 | 
 5 | from metapipe.models import *
 6 | 
 7 | from .fixtures import *
 8 | 
 9 | 
10 | def test_cmd():
11 |     alias = 'test'
12 |     cmd = Command('testcmd', ['test', 'command'])
13 |     job = LocalJob(alias, cmd)
14 |     job.cmd.should.equal(['bash', 'metapipe.test.job'])
15 | 


--------------------------------------------------------------------------------
/test/test_parser.py:
--------------------------------------------------------------------------------
  1 | """ Tests for the overall parser. """
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import sure
  6 | 
  7 | from metapipe.parser import Parser
  8 | from metapipe.models import Input, Output
  9 | 
 10 | from .fixtures import *
 11 | 
 12 | 
 13 | def test_no_paths():
 14 |     parser = Parser(no_paths)
 15 |     res = parser.consume()
 16 | 
 17 |     parser.paths.should.have.length_of(0)
 18 | 
 19 | 
 20 | def test_no_files():
 21 |     parser = Parser(no_files)
 22 |     res = parser.consume()
 23 | 
 24 |     parser.files.should.have.length_of(0)
 25 | 
 26 | 
 27 | def test_no_cmds():
 28 |     parser = Parser(no_cmds)
 29 |     res = parser.consume.when.called.should.throw(ValueError)
 30 | 
 31 | 
 32 | def test_consume_paths():
 33 |     parser = Parser(overall)
 34 |     res = parser.consume()
 35 | 
 36 |     parser.paths[0].alias.should.equal('python')
 37 |     parser.paths[0].path.should.equal('/usr/bin/python')
 38 |     parser.paths[1].alias.should.equal('bash')
 39 |     parser.paths[1].path.should.equal('/usr/bin/bash')
 40 |     parser.paths[2].alias.should.equal('rb')
 41 |     parser.paths[2].path.should.equal('/usr/bin/ruby')
 42 | 
 43 | 
 44 | def test_consume_files():
 45 |     parser = Parser(overall)
 46 |     res = parser.consume()
 47 | 
 48 |     parser.files[0].alias.should.equal('1')
 49 |     parser.files[0].filename.should.equal('somefile.1')
 50 |     parser.files[1].alias.should.equal('2')
 51 |     parser.files[1].filename.should.equal('somefile.2')
 52 | 
 53 | 
 54 | def test_consume_commands_1():
 55 |     parser = Parser(overall)
 56 |     res = parser.consume()
 57 | 
 58 |     res[0].alias.should.equal('1')
 59 |     res[0].parts[4].should.equal(parser.paths[0])
 60 |     res[0].parts[5].should.equal('somescript.py')
 61 |     res[0].parts[6].should.equal('-i')
 62 |     res[0].parts[7][0][0].should.equal(Input('1', filename='somefile.1'))
 63 |     res[0].parts[7][0][1].should.equal(Input('2', filename='somefile.2'))
 64 |     res[0].parts[7][0][2].should.equal(Input('3', filename='somefile.3'))
 65 |     res[0].parts[7][1][0].should.equal(Input('4', filename='somefile.4'))
 66 |     res[0].parts[7][1][1].should.equal(Input('5', filename='somefile.5'))
 67 |     res[0].parts[7][1][2].should.equal(Input('6', filename='somefile.6'))
 68 |     res[0].parts[8].should.equal('-o')
 69 |     res[0].parts[9].should.equal(Output('1'))
 70 |     res[0].parts[10].should.equal('-fgh')
 71 |     res[0].parts[11].should.equal('somefile.txt')
 72 |     res[0]._dependencies.should.have.length_of(0)
 73 | 
 74 | 
 75 | def test_consume_commands_2():
 76 |     parser = Parser(overall)
 77 |     res = parser.consume()
 78 | 
 79 |     res[1].alias.should.equal('2')
 80 |     res[1].parts[4].should.equal(parser.paths[1])
 81 |     res[1].parts[5].should.equal('somescript.sh')
 82 |     res[1].parts[6].should.equal('-i')
 83 |     res[1].parts[7][0][0].should.equal(Input('1.1'))
 84 |     res[1].parts[7][1][0].should.equal(Input('1.2'))
 85 |     res[1].parts[8].should.equal('-o')
 86 |     res[1].parts[9].should.equal(Output('1'))
 87 |     res[1].parts[10].should.equal('-fgh')
 88 |     res[1].parts[11].should.equal('somefile.txt')
 89 |     res[1]._dependencies.should.have.length_of(1)
 90 |     res[1]._dependencies[0].alias.should.equal('1')
 91 | 
 92 | 
 93 | def test_consume_commands_3():
 94 |     parser = Parser(overall)
 95 |     res = parser.consume()
 96 | 
 97 |     res[2].alias.should.equal('3')
 98 |     res[2].parts[4].should.equal(parser.paths[2])
 99 |     res[2].parts[5].should.equal('somescript.rb')
100 |     res[2].parts[6].should.equal('-i')
101 |     res[2].parts[7][0][0].should.equal(Input('2.1'))
102 |     res[2].parts[7][1][0].should.equal(Input('2.2'))
103 |     res[2].parts[7][2][0].should.equal(Input('1.1'))
104 |     res[2].parts[7][2][1].should.equal(Input('1.2'))
105 |     res[2].parts[8].should.equal('>>')
106 |     res[2].parts[9].should.equal('somefile')
107 |     res[2]._dependencies.should.have.length_of(2)
108 | 
109 |     aliases = [dep.alias for dep in res[2]._dependencies]
110 |     aliases.should.contain('2')
111 |     aliases.should.contain('1')
112 | 
113 | 
114 | def test_consume_commands_4():
115 |     parser = Parser(overall)
116 |     res = parser.consume()
117 | 
118 |     res[3].alias.should.equal('4')
119 |     res[3].parts[4].should.equal('cut')
120 |     res[3].parts[5].should.equal('-f')
121 |     res[3].parts[6].should.equal('*.counts')
122 |     res[3].parts[7][0].should.equal('>')
123 |     res[3].parts[8].should.equal('something.file')
124 |     res[3]._dependencies.should.have.length_of(0)
125 | 
126 | 
127 | def test_consume_commands_5():
128 |     parser = Parser(overall)
129 |     res = parser.consume()
130 | 
131 |     res[4].alias.should.equal('5')
132 |     res[4].parts[4].should.equal('paste')
133 |     res[4].parts[5].should.equal('*.counts')
134 |     res[4].parts[6].should.equal('>')
135 |     res[4].parts[9].should.equal(Output('', magic='some.file'))
136 |     res[4]._dependencies.should.have.length_of(0)
137 | 
138 | 
139 | def test_consume_commands_6():
140 |     parser = Parser(overall)
141 |     res = parser.consume()
142 |     print(res[5].parts)
143 |     res[5].alias.should.equal('6')
144 |     res[5].parts[4].should.equal('./somescript')
145 |     res[5].parts[5][0][0].should.equal(Input('1', 'somefile.1'))
146 |     res[5].parts[5][0][1].should.equal(Input('2', 'somefile.2'))
147 |     res[5].parts[5][0][2].should.equal(Input('3', 'somefile.3'))
148 |     res[5].parts[5][1][0].should.equal(Input('4', '*.counts'))
149 |     res[5]._dependencies.should.have.length_of(0)
150 | 
151 | 
152 | def test_consume_commands_7():
153 |     parser = Parser(overall)
154 |     res = parser.consume()
155 | 
156 |     res[6].alias.should.equal('7')
157 |     res[6].parts[4].should.equal(parser.paths[2])
158 |     res[6].parts[5].should.equal('somescript.rb')
159 |     res[6].parts[6].should.equal('-i')
160 |     res[6].parts[7][0][0].should.equal(Input('*.counts',
161 |                                         '*.counts'))
162 |     res[6].parts.should.have.length_of(8)
163 |     res[6]._dependencies.should.have.length_of(0)
164 | 
165 | 
166 | def test_consume_commands_8():
167 |     parser = Parser(overall)
168 |     res = parser.consume()
169 | 
170 |     res[7].alias.should.equal('8')
171 |     res[7].parts[4].should.equal(parser.paths[0])
172 |     res[7].parts[5].should.equal('somescript.py')
173 |     res[7].parts[6].should.equal('-i')
174 |     res[7].parts[7][0][0].should.equal(Input('*.counts',
175 |                                 filename='*.counts'))
176 |     res[7].parts[9].should.equal(Output('', magic='*.bam'))
177 |     res[7]._dependencies.should.have.length_of(0)
178 | 
179 | 
180 | def test_consume_commands_9():
181 |     parser = Parser(overall)
182 |     res = parser.consume()
183 | 
184 |     res[8].alias.should.equal('9')
185 |     res[8].parts[4].should.equal('cat')
186 |     res[8].parts[5][0][0].should.equal(Input('*.bam',
187 |                                 filename='*.bam'))
188 |     res[8]._dependencies.should.have.length_of(1)
189 | 
190 | 
191 | def test_consume_full_sample_pipeline():
192 |     parser = Parser(full_sample_pipeline)
193 |     res = parser.consume()
194 | 
195 |     res[0].alias.should.equal('1')
196 |     res[0].parts[0].should.equal(CommentToken(['#', ' Trimmomatic']))
197 |     res[0].parts[1].should.equal('java')
198 | 
199 | 
200 | def test_consume_multiple_inputs():
201 |     parser = Parser(multiple_inputs)
202 |     res = parser.consume()
203 | 
204 |     res[0].alias.should.equal('1')
205 |     res[0].parts[0].should.equal('bash')
206 |     res[0].parts[2][0][0].should.equal(Input('1',
207 |                                 filename='somefile.1'))
208 |     res[0].parts[2][1][0].should.equal(Input('2',
209 |                                 filename='somefile.2'))
210 |     res[0].parts[2][2][0].should.equal(Input('3',
211 |                                 filename='somefile.3'))
212 |     res[0].parts[4][0][0].should.equal(Input('4',
213 |                                 filename='somefile.4'))
214 |     res[0].parts[4][1][0].should.equal(Input('5',
215 |                                 filename='somefile.5'))
216 |     res[0].parts[4][2][0].should.equal(Input('6',
217 |                                 filename='somefile.6'))
218 |     res[0]._dependencies.should.have.length_of(0)
219 | 
220 | 
221 | def test_consume_global_opts():
222 |     parser = Parser(overall)
223 |     res = parser.consume()
224 |     print(parser.global_options)
225 |     parser.global_options.should.have.length_of(2)
226 | 


--------------------------------------------------------------------------------
/test/test_pbs_job.py:
--------------------------------------------------------------------------------
 1 | """ Tests for the Torque/PBS Job """
 2 | 
 3 | import sure
 4 | from mock import Mock
 5 | 
 6 | from .fixtures import *
 7 | 
 8 | from metapipe.models import pbs_job
 9 | 
10 | 
11 | def test_qstat_queued():
12 |     j = pbs_job.PBSJob('', None)
13 |     pbs_job.call = Mock(return_value=pbs_job_qstat_queued)
14 | 
15 |     j.is_queued().should.equal(True)
16 | 
17 | 
18 | def test_qstat_running():
19 |     j = pbs_job.PBSJob('', None)
20 |     pbs_job.call = Mock(return_value=pbs_job_qstat_running)
21 | 
22 |     j.is_running().should.equal(True)
23 | 
24 | 
25 | def test_qstat_exception():
26 |     j = pbs_job.PBSJob('', None)
27 |     pbs_job.call = Mock(return_value=('', None))
28 | 
29 |     j.is_running().should.equal(False)
30 | 
31 | 
32 | def test_submit():
33 |     j = pbs_job.PBSJob('', None)
34 |     pbs_job.call = Mock(return_value=pbs_job_qsub)
35 |     j.make = Mock()
36 | 
37 |     j.submit()
38 |     j.id.should.equal('9974279')
39 | 


--------------------------------------------------------------------------------
/test/test_queue.py:
--------------------------------------------------------------------------------
 1 | """ Tests for the runtime using a mock job. """
 2 | 
 3 | from __future__ import print_function
 4 | import collections
 5 | 
 6 | import sure
 7 | 
 8 | from metapipe.parser import Parser
 9 | from metapipe.runtime import Runtime
10 | from metapipe.models import *
11 | 
12 | from .mocks import MockJob
13 | from .fixtures import *
14 | 
15 | 
16 | def test_repr():
17 |     q = BaseQueue()
18 |     str(q).should.equal('<Queue: jobs=0>')
19 | 
20 | def test_on_end():
21 |     """ Ticks the queue when it's empty. """
22 |     q = BaseQueue()
23 |     tick = q.tick()
24 | 
25 | def test_progress_1():
26 |     q = ReportingJobQueue()
27 |     q.push(MockJob('', None))
28 |     q.progress.should.equal(0)
29 | 
30 | def test_progress_2():
31 |     q = ReportingJobQueue()
32 |     q.push(MockJob('1.1', None))
33 |     tick = q.tick()
34 |     for _ in range(10):
35 |         next(tick)
36 | 
37 |     q.push(MockJob('2.2', None))
38 |     for _ in range(6):
39 |         next(tick)
40 | 
41 |     q.push(MockJob('3.3', None))
42 |     q.push(MockJob('4.4', None))
43 |     for _ in range(4):
44 |         next(tick)
45 |     q.progress.should.equal(50)
46 | 


--------------------------------------------------------------------------------
/test/test_runtime.py:
--------------------------------------------------------------------------------
  1 | """ Tests for the runtime using a mock job. """
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import sure
  6 | 
  7 | from metapipe.parser import Parser
  8 | from metapipe.runtime import Runtime
  9 | from metapipe.models import *
 10 | 
 11 | from .mocks import MockJob
 12 | from .fixtures import *
 13 | 
 14 | 
 15 | JOB_TYPES = {
 16 |     'mock': MockJob,
 17 |     'local': LocalJob,
 18 | }
 19 | 
 20 | 
 21 | # New Command Tests
 22 | 
 23 | 
 24 | def test_get_new_commands_1():
 25 |     parser = Parser(overall)
 26 |     cmds = parser.consume()[:1]
 27 | 
 28 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 29 |     new = pipeline.queue.queue
 30 |     new.should.have.length_of(1)
 31 | 
 32 | 
 33 | def test_get_new_commands_2():
 34 |     parser = Parser(overall)
 35 |     cmds = parser.consume()[:2]
 36 | 
 37 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 38 |     new = pipeline.queue.queue
 39 |     new.should.have.length_of(2)
 40 | 
 41 | 
 42 | def test_get_new_commands_3():
 43 |     parser = Parser(overall)
 44 |     cmds = parser.consume()[:3]
 45 | 
 46 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 47 |     new = pipeline.queue.queue
 48 |     new.should.have.length_of(3)
 49 | 
 50 | 
 51 | def test_get_new_commands_4():
 52 |     parser = Parser(overall)
 53 |     cmds = parser.consume()[:4]
 54 | 
 55 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 56 |     new = pipeline.queue.queue
 57 |     new.should.have.length_of(4)
 58 | 
 59 | 
 60 | def test_get_new_commands_5():
 61 |     parser = Parser(overall)
 62 |     cmds = parser.consume()[:5]
 63 | 
 64 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 65 |     new = pipeline.queue.queue
 66 |     new.should.have.length_of(5)
 67 | 
 68 | 
 69 | def test_get_new_commands_6():
 70 |     parser = Parser(overall)
 71 |     cmds = parser.consume()[:6]
 72 | 
 73 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 74 |     new = pipeline.queue.queue
 75 |     new.should.have.length_of(6)
 76 | 
 77 | 
 78 | def test_get_new_commands_7():
 79 |     parser = Parser(overall)
 80 |     cmds = parser.consume()[:7]
 81 | 
 82 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 83 |     new = pipeline.queue.queue
 84 |     new.should.have.length_of(7)
 85 | 
 86 | 
 87 | def test_get_new_commands_8():
 88 |     parser = Parser(overall)
 89 |     cmds = parser.consume()[:8]
 90 | 
 91 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
 92 |     new = pipeline.queue.queue
 93 |     new.should.have.length_of(8)
 94 | 
 95 | 
 96 | def test_get_new_commands_9():
 97 |     parser = Parser(overall)
 98 |     cmds = parser.consume()[:9]
 99 | 
100 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock')
101 |     new = pipeline.queue.queue
102 |     new.should.have.length_of(9)
103 | 
104 | 
105 | # Run Tests
106 | 
107 | 
108 | def test_run_1():
109 |     parser = Parser(overall)
110 |     cmds = parser.consume()[:1]
111 | 
112 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
113 |     iters = pipeline.run()
114 |     iters.should.equal(8)
115 | 
116 | 
117 | def test_run_2():
118 |     parser = Parser(overall)
119 |     cmds = parser.consume()[:2]
120 | 
121 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
122 |     iters = pipeline.run()
123 |     iters.should.equal(15)
124 | 
125 | 
126 | def test_run_3():
127 |     parser = Parser(overall)
128 |     cmds = parser.consume()[:3]
129 | 
130 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
131 |     iters = pipeline.run()
132 |     iters.should.equal(23)
133 | 
134 | 
135 | def test_run_4():
136 |     parser = Parser(overall)
137 |     cmds = parser.consume()[:4]
138 | 
139 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
140 |     iters = pipeline.run()
141 |     iters.should.equal(23)
142 | 
143 | 
144 | def test_run_5():
145 |     parser = Parser(overall)
146 |     cmds = parser.consume()[:5]
147 | 
148 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
149 |     iters = pipeline.run()
150 |     iters.should.equal(23)
151 | 
152 | 
153 | def test_run_6():
154 |     parser = Parser(overall)
155 |     cmds = parser.consume()[:6]
156 | 
157 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
158 |     iters = pipeline.run()
159 |     iters.should.equal(23)
160 | 
161 | 
162 | def test_run_7():
163 |     parser = Parser(overall)
164 |     cmds = parser.consume()[:7]
165 | 
166 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
167 |     iters = pipeline.run()
168 |     iters.should.equal(24)
169 | 
170 | 
171 | def test_run_8():
172 |     parser = Parser(overall)
173 |     cmds = parser.consume()[:8]
174 | 
175 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
176 |     iters = pipeline.run()
177 |     iters.should.equal(24)
178 | 
179 | 
180 | def test_run_9():
181 |     parser = Parser(overall)
182 |     cmds = parser.consume()[:9]
183 | 
184 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
185 |     iters = pipeline.run()
186 |     iters.should.equal(25)
187 | 
188 | 
189 | def test_run_10():
190 |     parser = Parser(overall)
191 |     cmds = parser.consume()[:10]
192 | 
193 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
194 |     iters = pipeline.run()
195 |     iters.should.equal(25)
196 | 
197 | 
198 | def test_run_11():
199 |     parser = Parser(overall)
200 |     cmds = parser.consume()[:11]
201 | 
202 |     pipeline = Runtime(cmds, ReportingJobQueue, JOB_TYPES, 'mock', sleep_time=0.01)
203 |     iters = pipeline.run()
204 |     iters.should.be.greater_than(15)
205 | 
206 | 
207 | def test_max_concurrent_jobs():
208 |     parser = Parser(concurrent)
209 |     cmds = parser.consume()
210 | 
211 |     pipeline = Runtime(cmds, ReportingJobQueue, { 'local': MockJob }, 'local', sleep_time=0.01)
212 |     iters = pipeline.run()
213 |     iters.should.be.greater_than(30)
214 | 


--------------------------------------------------------------------------------
/test/test_sge_job.py:
--------------------------------------------------------------------------------
 1 | """ Tests for the StarCluster Job """
 2 | 
 3 | import sure
 4 | from mock import Mock
 5 | 
 6 | from .fixtures import *
 7 | 
 8 | from metapipe.models import sge_job
 9 | 
10 | 
11 | def test_qstat_queued():
12 |     j = sge_job.SGEJob('', None)
13 |     sge_job.call = Mock(return_value=sge_job_qstat_queued)
14 | 
15 |     j.is_queued().should.equal(True)
16 | 
17 | 
18 | def test_qstat_running():
19 |     j = sge_job.SGEJob('', None)
20 |     sge_job.call = Mock(return_value=sge_job_qstat_running)
21 | 
22 |     j.is_running().should.equal(True)
23 | 
24 | 
25 | def test_submit():
26 |     j = sge_job.SGEJob('', None)
27 |     sge_job.call = Mock(return_value=sge_job_qsub)
28 |     j.make = Mock()
29 | 
30 |     j.submit()
31 |     j.id.should.equal('1')
32 | 


--------------------------------------------------------------------------------
/test/test_template.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sure
 4 | 
 5 | from metapipe.templates import env
 6 | 
 7 | from .fixtures import *
 8 | 
 9 | template = env.get_template('output_script.tmpl.sh')
10 | 
11 | def test_make_script():
12 | 
13 |     script = template.render(shell='/usr/bin/sh', temp='metapipe.script')
14 |     script.should.equal("""#! /usr/bin/sh
15 | set -e;
16 | 
17 | 
18 | 
19 | python - <<END
20 | import pickle
21 | 
22 | with open('metapipe.script', 'rb') as f:
23 |     runtime = pickle.load(f)
24 |     runtime.run()
25 | END""")
26 | 


--------------------------------------------------------------------------------
/test/test_tokens.py:
--------------------------------------------------------------------------------
 1 | """ Tests for the StarCluster Job """
 2 | 
 3 | import sure
 4 | from mock import Mock
 5 | 
 6 | from .fixtures import *
 7 | 
 8 | from metapipe.models.tokens import *
 9 | 
10 | 
11 | # Input tokens
12 | 
13 | def test_eval_cwd():
14 |     """ Tests if the cwd property is conserved. """
15 |     i = Input('1', 'metapipe.1.1.output', cwd='/etc/metapipe')
16 |     i.path.should.equal('/etc/metapipe/metapipe.1.1.output')
17 | 


--------------------------------------------------------------------------------