├── .coveragerc
├── .coveralls
├── .editorconfig
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── Makefile
├── conf.py
├── extending.md
├── getting_started.md
├── index.rst
├── make.bat
├── pipeline.gif
├── reporting.md
├── scripting.md
├── syntax.md
└── what_is_metapipe.md
├── metapipe
├── __init__.py
├── __main__.py
├── app.py
├── models
│ ├── __init__.py
│ ├── command.py
│ ├── command_template.py
│ ├── command_template_factory.py
│ ├── grammar.py
│ ├── job.py
│ ├── job_template.py
│ ├── local_job.py
│ ├── pbs_job.py
│ ├── queue.py
│ ├── reporting.py
│ ├── sge_job.py
│ └── tokens.py
├── parser.py
├── runtime.py
└── templates
│ ├── __init__.py
│ ├── output_script.tmpl.sh
│ └── progress-report.tmpl.html
├── requirements.txt
├── setup.py
└── test
├── __init__.py
├── files
├── mp.1.1-1.output
├── mp.1.1.job
├── mp.1.1.job_stderr
├── mp.1.1.job_stdout
├── mp.1.1.output
├── mp.1.1.output.gz
├── mp.1.1.output.testing_file
├── mp.1.2.job
├── mp.1.2.job_stderr
├── mp.1.2.job_stdout
├── mp.1.2.output
├── mp.1.2.output.testing_file
├── mp.2.1.job
├── mp.2.1.job_stderr
├── mp.2.1.job_stdout
├── mp.2.1.output
├── mp.2.2.output
├── mp.3.1.output
├── mp.3.2.output
├── mp.3.3.output
├── somefile.1
├── somefile.1.bam
├── somefile.1.counts
├── somefile.2
├── somefile.2.bam
├── somefile.2.counts
├── somefile.3
├── somefile.3.counts
├── somefile.4
├── somefile.4.counts
├── somefile.5
├── somefile.6
├── somefile.bam
└── star.my_output
├── fixtures.py
├── mocks.py
├── test_app.py
├── test_command.py
├── test_command_template.py
├── test_command_template_factory.py
├── test_grammar.py
├── test_job.py
├── test_local_job.py
├── test_parser.py
├── test_pbs_job.py
├── test_queue.py
├── test_runtime.py
├── test_sge_job.py
├── test_template.py
└── test_tokens.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = tests/*
3 |
--------------------------------------------------------------------------------
/.coveralls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/.coveralls
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig is awesome: http://EditorConfig.org
2 |
3 | # top-most EditorConfig file
4 | root = true
5 |
6 | # Unix-style newlines with a newline ending every file
7 | [*]
8 | end_of_line = lf
9 | insert_final_newline = true
10 |
11 | [*.{js,py}]
12 | charset = utf-8
13 | indent_size = space
14 | indent_size = 4
15 |
16 | [{package.json,.travis.yml}]
17 | indent_style = space
18 | indent_size = 2
19 |
20 | [*.html]
21 | indent_size = 2
22 | indent_style = space
23 |
24 | [*.css]
25 | indent_size = 2
26 | indent_style = space
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | etc/
2 | docs/_build
3 | cover/
4 | .coverage
5 | *cache*
6 | *egg*
7 | build/
8 | dist/
9 | MANIFEST
10 | pipeline
11 | .metapipe
12 | **.mp
13 | **sample**
14 | *.sh
15 | notes/
16 | *.pyc
17 | *.swp
18 | htmlcov/
19 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - '2.7'
4 | - '3.4'
5 | - '3.5'
6 | before_script:
7 | - pip install -r requirements.txt
8 | script: nosetests --with-cov --cov-report html --cov-config .coveragerc --cov metapipe
9 | --cov test
10 | after_success:
11 | - coveralls
12 | deploy:
13 | provider: pypi
14 | user: sonicrocketman
15 | password:
16 | secure: cfDJKNv1BMDsJ3NyOpjwVQwBwO3ZcDMVnEDmcNoS4bwONo/pF7+UFbNm/4+AG2Oo9W5u63YNoR/b1MajbaLd9gBCf7uymrOnLxVtFwq1JDb5BWOsegJwPtlxrKxjKjsBAp5BY7cqOivAWEJZuCi6XjNCyG+QSt0vXKqw4U2xqjfx3KtfSea1Hu0aN1YvFn1otod9faXPK80T/4AZ1Ytmauq12vzla1bLJz7djYS2ApBM+pEJodOhw9V53CknrBpm9SfgFRs5xOkKB7FY8Tq208AxDvcufxkwUsqzoipOzfGcBFhVQdREOOLbWUKmExufCHhyXWVmp7yrkLwGX4REWI+unq6SFU61mknVizLfphJ0DSLGWzcoPxwP3vk39q3PHP1XKojEkCicIb5C6r3YJqYpFtF83YjuEmJXew+9GdP1KWyWS5G1xslhxZvklAdkSPsn65GmABFsSNrMLyVdCTllGpgnjrpcbf1jEMP8MTp6+qc8YVjdEDtzgeJ8aoSyC6K9dRg95qixb1COqzTrF0N4LDKRGKKJrHFg+JXUZDSPYdpju5oz1ohm3/96SmdYGqL+ilO1RT3gxhlFV1X30AymAGUcVKCCLpj9dauQALeA16sKvtcFYVxjunjzJwz+OzM7AYlvFd+ak618x4btnmybsT0Nc93enT9seI+LSlE=
17 | on:
18 | tags: true
19 | distributions: sdist bdist_wheel
20 | branch: master
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) [year] [fullname]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include metapipe/templates/*
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Metapipe
2 |
3 | *A pipeline generator and runtime system*
4 |
5 | [](https://travis-ci.org/TorkamaniLab/metapipe)
6 | [](https://coveralls.io/github/TorkamaniLab/metapipe?branch=master)
7 | [](https://img.shields.io/badge/Python-2.7-blue.svg)
8 | [](https://img.shields.io/badge/Python-3.4-blue.svg)
9 | [](https://img.shields.io/badge/Python-3.5-blue.svg)
10 | [](https://github.com/TorkamaniLab/metapipe/blob/master/LICENSE)
11 |
12 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you.
13 |
14 |
15 |
16 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines.
17 |
18 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue!
19 |
20 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem)
21 |
22 |
23 | ## How do I get it?
24 |
25 | It's super simple!
26 |
27 | `pip install metapipe`
28 |
29 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5!
30 |
31 |
32 | ## What does it do?
33 |
34 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change!
35 |
36 |
37 | ## Documentation & Help
38 |
39 | [Check out the full documentation at ReadTheDocs →](http://metapipe.readthedocs.org/en/latest/index.html)
40 |
41 | If you need help with Metapipe, or you'd like to chat about new features, get in touch by filing an issue, or at `#metapipe` on freenode!
42 |
43 |
44 | ### Here's a sample!
45 |
46 | Let's say you have a few command-line tools that you want to string together into a pipeline. You used to have to know Python, Perl, Bash, or some other scripting language; now you can use Metapipe!
47 |
48 | ```bash
49 | [COMMANDS]
50 | # Let's get the first and third columns from each of
51 | # our files, and put the output in seperate files.
52 | cut -f 1,3 {1||2||3} > {o}
53 |
54 | # Once that's done, we'll need to take the output and
55 | # run each through our custom processing script individually.
56 | # Here we can give a custom extension to the default output file.
57 | python3 my_script.py --output {o.processed.csv} -i {1.*||}
58 |
59 | # Finally, we want to collect each sample and analyze
60 | # them all together. We also need to use a custom version
61 | # of Python for this.
62 | custom_python anaylysis.py -o {o.results.txt} {2.*}
63 |
64 | [FILES]
65 | 1. controls.1.csv
66 | 2. controls.2.csv
67 | 3. controls.3.csv
68 |
69 | [PATHS]
70 | custom_python ~/path/to/my/custom/python/version
71 | ```
72 |
73 | Excluding the comments, this entire analysis pipeline is 13 lines long, and extremely readable! What's even better? If you want to change any steps, its super easy! That's the power of Metapipe!
74 |
75 |
76 | ## No Queue? No Problem!
77 |
78 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail.
79 |
80 | To run metapipe locally, see the app's help menu!
81 |
82 | `metapipe --help`
83 |
84 |
85 | ## Other Queue Systems
86 |
87 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!*
88 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help
23 | help:
24 | @echo "Please use \`make ' where is one of"
25 | @echo " html to make standalone HTML files"
26 | @echo " dirhtml to make HTML files named index.html in directories"
27 | @echo " singlehtml to make a single large HTML file"
28 | @echo " pickle to make pickle files"
29 | @echo " json to make JSON files"
30 | @echo " htmlhelp to make HTML files and a HTML help project"
31 | @echo " qthelp to make HTML files and a qthelp project"
32 | @echo " applehelp to make an Apple Help Book"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 | @echo " coverage to run coverage check of the documentation (if enabled)"
49 |
50 | .PHONY: clean
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | .PHONY: html
55 | html:
56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
57 | @echo
58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
59 |
60 | .PHONY: dirhtml
61 | dirhtml:
62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
63 | @echo
64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
65 |
66 | .PHONY: singlehtml
67 | singlehtml:
68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
69 | @echo
70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
71 |
72 | .PHONY: pickle
73 | pickle:
74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
75 | @echo
76 | @echo "Build finished; now you can process the pickle files."
77 |
78 | .PHONY: json
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | .PHONY: htmlhelp
85 | htmlhelp:
86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
87 | @echo
88 | @echo "Build finished; now you can run HTML Help Workshop with the" \
89 | ".hhp project file in $(BUILDDIR)/htmlhelp."
90 |
91 | .PHONY: qthelp
92 | qthelp:
93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
94 | @echo
95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/metapipe.qhcp"
98 | @echo "To view the help file:"
99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/metapipe.qhc"
100 |
101 | .PHONY: applehelp
102 | applehelp:
103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | @echo
105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | @echo "N.B. You won't be able to view it unless you put it in" \
107 | "~/Library/Documentation/Help or install it in your application" \
108 | "bundle."
109 |
110 | .PHONY: devhelp
111 | devhelp:
112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | @echo
114 | @echo "Build finished."
115 | @echo "To view the help file:"
116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/metapipe"
117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/metapipe"
118 | @echo "# devhelp"
119 |
120 | .PHONY: epub
121 | epub:
122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | @echo
124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 |
126 | .PHONY: latex
127 | latex:
128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | @echo
130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | "(use \`make latexpdf' here to do that automatically)."
133 |
134 | .PHONY: latexpdf
135 | latexpdf:
136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | @echo "Running LaTeX files through pdflatex..."
138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 |
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | @echo "Running LaTeX files through platex and dvipdfmx..."
145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 |
148 | .PHONY: text
149 | text:
150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | @echo
152 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
153 |
154 | .PHONY: man
155 | man:
156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | @echo
158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 |
160 | .PHONY: texinfo
161 | texinfo:
162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | @echo
164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | @echo "Run \`make' in that directory to run these through makeinfo" \
166 | "(use \`make info' here to do that automatically)."
167 |
168 | .PHONY: info
169 | info:
170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | @echo "Running Texinfo files through makeinfo..."
172 | make -C $(BUILDDIR)/texinfo info
173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 |
175 | .PHONY: gettext
176 | gettext:
177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | @echo
179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 |
181 | .PHONY: changes
182 | changes:
183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | @echo
185 | @echo "The overview file is in $(BUILDDIR)/changes."
186 |
187 | .PHONY: linkcheck
188 | linkcheck:
189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | @echo
191 | @echo "Link check complete; look for any errors in the above output " \
192 | "or in $(BUILDDIR)/linkcheck/output.txt."
193 |
194 | .PHONY: doctest
195 | doctest:
196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | @echo "Testing of doctests in the sources finished, look at the " \
198 | "results in $(BUILDDIR)/doctest/output.txt."
199 |
200 | .PHONY: coverage
201 | coverage:
202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | @echo "Testing of coverage in the sources finished, look at the " \
204 | "results in $(BUILDDIR)/coverage/python.txt."
205 |
206 | .PHONY: xml
207 | xml:
208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | @echo
210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 |
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | @echo
216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # metapipe documentation build configuration file, created by
5 | # sphinx-quickstart on Mon Jan 25 16:10:38 2016.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | import sys
17 | import os
18 |
19 | from recommonmark.parser import CommonMarkParser
20 |
21 | source_parsers = {
22 | '.md': CommonMarkParser,
23 | }
24 |
25 | # Read the Docs theme
26 |
27 | # on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org
28 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
29 |
30 | if not on_rtd: # only import and set the theme if we're building docs locally
31 | import sphinx_rtd_theme
32 | html_theme = 'sphinx_rtd_theme'
33 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
34 | # otherwise, readthedocs.org uses their theme by default, so no need to specify it
35 |
36 | # If extensions (or modules to document with autodoc) are in another directory,
37 | # add these directories to sys.path here. If the directory is relative to the
38 | # documentation root, use os.path.abspath to make it absolute, like shown here.
39 | #sys.path.insert(0, os.path.abspath('.'))
40 |
41 | # -- General configuration ------------------------------------------------
42 |
43 | # If your documentation needs a minimal Sphinx version, state it here.
44 | #needs_sphinx = '1.0'
45 |
46 | # Add any Sphinx extension module names here, as strings. They can be
47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
48 | # ones.
49 | extensions = []
50 |
51 | # Add any paths that contain templates here, relative to this directory.
52 | #templates_path = ['_templates']
53 |
54 | # The suffix(es) of source filenames.
55 | # You can specify multiple suffix as a list of string:
56 | # source_suffix = ['.rst', '.md']
57 | source_suffix = ['.rst', '.md']
58 |
59 | # The encoding of source files.
60 | #source_encoding = 'utf-8-sig'
61 |
62 | # The master toctree document.
63 | master_doc = 'index'
64 |
65 | # General information about the project.
66 | project = 'metapipe'
67 | copyright = '2016, Brian Schrader'
68 | author = 'Brian Schrader'
69 |
70 | # The version info for the project you're documenting, acts as replacement for
71 | # |version| and |release|, also used in various other places throughout the
72 | # built documents.
73 | #
74 | # The short X.Y version.
75 | version = '0.1'
76 | # The full version, including alpha/beta/rc tags.
77 | release = '0.1'
78 |
79 | # The language for content autogenerated by Sphinx. Refer to documentation
80 | # for a list of supported languages.
81 | #
82 | # This is also used if you do content translation via gettext catalogs.
83 | # Usually you set "language" from the command line for these cases.
84 | language = None
85 |
86 | # There are two options for replacing |today|: either, you set today to some
87 | # non-false value, then it is used:
88 | #today = ''
89 | # Else, today_fmt is used as the format for a strftime call.
90 | #today_fmt = '%B %d, %Y'
91 |
92 | # List of patterns, relative to source directory, that match files and
93 | # directories to ignore when looking for source files.
94 | exclude_patterns = ['_build']
95 |
96 | # The reST default role (used for this markup: `text`) to use for all
97 | # documents.
98 | #default_role = None
99 |
100 | # If true, '()' will be appended to :func: etc. cross-reference text.
101 | #add_function_parentheses = True
102 |
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #add_module_names = True
106 |
107 | # If true, sectionauthor and moduleauthor directives will be shown in the
108 | # output. They are ignored by default.
109 | #show_authors = False
110 |
111 | # The name of the Pygments (syntax highlighting) style to use.
112 | pygments_style = 'sphinx'
113 |
114 | # A list of ignored prefixes for module index sorting.
115 | #modindex_common_prefix = []
116 |
117 | # If true, keep warnings as "system message" paragraphs in the built documents.
118 | #keep_warnings = False
119 |
120 | # If true, `todo` and `todoList` produce output, else they produce nothing.
121 | todo_include_todos = False
122 |
123 |
124 | # -- Options for HTML output ----------------------------------------------
125 |
126 | # The theme to use for HTML and HTML Help pages. See the documentation for
127 | # a list of builtin themes.
128 | #html_theme = 'alabaster'
129 |
130 | # Theme options are theme-specific and customize the look and feel of a theme
131 | # further. For a list of options available for each theme, see the
132 | # documentation.
133 | #html_theme_options = {}
134 |
135 | # Add any paths that contain custom themes here, relative to this directory.
136 | #html_theme_path = []
137 |
138 | # The name for this set of Sphinx documents. If None, it defaults to
139 | # " v documentation".
140 | #html_title = None
141 |
142 | # A shorter title for the navigation bar. Default is the same as html_title.
143 | #html_short_title = None
144 |
145 | # The name of an image file (relative to this directory) to place at the top
146 | # of the sidebar.
147 | #html_logo = None
148 |
149 | # The name of an image file (within the static path) to use as favicon of the
150 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
151 | # pixels large.
152 | #html_favicon = None
153 |
154 | # Add any paths that contain custom static files (such as style sheets) here,
155 | # relative to this directory. They are copied after the builtin static files,
156 | # so a file named "default.css" will overwrite the builtin "default.css".
157 | html_static_path = ['_static']
158 |
159 | # Add any extra paths that contain custom files (such as robots.txt or
160 | # .htaccess) here, relative to this directory. These files are copied
161 | # directly to the root of the documentation.
162 | #html_extra_path = []
163 |
164 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
165 | # using the given strftime format.
166 | #html_last_updated_fmt = '%b %d, %Y'
167 |
168 | # If true, SmartyPants will be used to convert quotes and dashes to
169 | # typographically correct entities.
170 | #html_use_smartypants = True
171 |
172 | # Custom sidebar templates, maps document names to template names.
173 | #html_sidebars = {}
174 |
175 | # Additional templates that should be rendered to pages, maps page names to
176 | # template names.
177 | #html_additional_pages = {}
178 |
179 | # If false, no module index is generated.
180 | #html_domain_indices = True
181 |
182 | # If false, no index is generated.
183 | #html_use_index = True
184 |
185 | # If true, the index is split into individual pages for each letter.
186 | #html_split_index = False
187 |
188 | # If true, links to the reST sources are added to the pages.
189 | #html_show_sourcelink = True
190 |
191 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
192 | #html_show_sphinx = True
193 |
194 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
195 | #html_show_copyright = True
196 |
197 | # If true, an OpenSearch description file will be output, and all pages will
198 | # contain a tag referring to it. The value of this option must be the
199 | # base URL from which the finished HTML is served.
200 | #html_use_opensearch = ''
201 |
202 | # This is the file name suffix for HTML files (e.g. ".xhtml").
203 | #html_file_suffix = None
204 |
205 | # Language to be used for generating the HTML full-text search index.
206 | # Sphinx supports the following languages:
207 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
208 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
209 | #html_search_language = 'en'
210 |
211 | # A dictionary with options for the search language support, empty by default.
212 | # Now only 'ja' uses this config value
213 | #html_search_options = {'type': 'default'}
214 |
215 | # The name of a javascript file (relative to the configuration directory) that
216 | # implements a search results scorer. If empty, the default will be used.
217 | #html_search_scorer = 'scorer.js'
218 |
219 | # Output file base name for HTML help builder.
220 | htmlhelp_basename = 'metapipedoc'
221 |
222 | # -- Options for LaTeX output ---------------------------------------------
223 |
224 | latex_elements = {
225 | # The paper size ('letterpaper' or 'a4paper').
226 | #'papersize': 'letterpaper',
227 |
228 | # The font size ('10pt', '11pt' or '12pt').
229 | #'pointsize': '10pt',
230 |
231 | # Additional stuff for the LaTeX preamble.
232 | #'preamble': '',
233 |
234 | # Latex figure (float) alignment
235 | #'figure_align': 'htbp',
236 | }
237 |
238 | # Grouping the document tree into LaTeX files. List of tuples
239 | # (source start file, target name, title,
240 | # author, documentclass [howto, manual, or own class]).
241 | latex_documents = [
242 | (master_doc, 'metapipe.tex', 'metapipe Documentation',
243 | 'Brian Schrader', 'manual'),
244 | ]
245 |
246 | # The name of an image file (relative to this directory) to place at the top of
247 | # the title page.
248 | #latex_logo = None
249 |
250 | # For "manual" documents, if this is true, then toplevel headings are parts,
251 | # not chapters.
252 | #latex_use_parts = False
253 |
254 | # If true, show page references after internal links.
255 | #latex_show_pagerefs = False
256 |
257 | # If true, show URL addresses after external links.
258 | #latex_show_urls = False
259 |
260 | # Documents to append as an appendix to all manuals.
261 | #latex_appendices = []
262 |
263 | # If false, no module index is generated.
264 | #latex_domain_indices = True
265 |
266 |
267 | # -- Options for manual page output ---------------------------------------
268 |
269 | # One entry per manual page. List of tuples
270 | # (source start file, name, description, authors, manual section).
271 | man_pages = [
272 | (master_doc, 'metapipe', 'metapipe Documentation',
273 | [author], 1)
274 | ]
275 |
276 | # If true, show URL addresses after external links.
277 | #man_show_urls = False
278 |
279 |
280 | # -- Options for Texinfo output -------------------------------------------
281 |
282 | # Grouping the document tree into Texinfo files. List of tuples
283 | # (source start file, target name, title, author,
284 | # dir menu entry, description, category)
285 | texinfo_documents = [
286 | (master_doc, 'metapipe', 'metapipe Documentation',
287 | author, 'metapipe', 'One line description of project.',
288 | 'Miscellaneous'),
289 | ]
290 |
291 | # Documents to append as an appendix to all manuals.
292 | #texinfo_appendices = []
293 |
294 | # If false, no module index is generated.
295 | #texinfo_domain_indices = True
296 |
297 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
298 | #texinfo_show_urls = 'footnote'
299 |
300 | # If true, do not generate a @detailmenu in the "Top" node's menu.
301 | #texinfo_no_detailmenu = False
302 |
--------------------------------------------------------------------------------
/docs/extending.md:
--------------------------------------------------------------------------------
1 | # Extending Metapipe
2 |
3 | Metapipe provides 2 extension points for developers to extend it's functionality: custom Queues and custom Job Types. In most cases, custom queues are an advanced feature that most users and developers will not need to worry about, but if you must, it is there.
4 |
5 | To add support for a queue system not included with metapipe, all you need to do is add a job type.
6 |
7 |
8 | ## Custom Job types
9 |
10 | All job types are subclasses of the `metapipe.models.Job` class. The base job class implements a lot of the functionality that is common between all job types, and has method stubs for the required functionality that needs to be implemented by any subclass. This section will cover what duty job subclasses have, how to subclass the main `Job` and what to fill in.
11 |
12 |
13 | ### The Root Job Class
14 |
15 | The code for the main job class can be found [here][job]. To create your own job type, simply subclass this as follows:
16 |
17 | ```python
18 | from metapipe.models import Job
19 |
20 | class MyCustomJob(Job):
21 |
22 | def __repr__(self):
23 | return ''.format(self.cmd)
24 | ```
25 |
26 | There are 6 methods you need to fill in to have a complete job class. Your full job subclass should have the following form:
27 |
28 | ```python
29 | class MyCustomJob(Job):
30 |
31 | def __repr__(self):
32 | return ''.format(self.cmd)
33 |
34 | # Override these...
35 |
36 | @property
37 | def cmd(self):
38 | """ Returns the command needed to submit the calculations.
39 | Normally, this would be just running the command, however if
40 | using a queue system, then this should return the command to
41 | submit the command to the queue.
42 | """
43 | pass
44 |
45 | def submit(self):
46 | """ Submits the job to be run. If an external queue system is used,
47 | this method submits itself to that queue. Else it runs the job itself.
48 | :see: call
49 | """
50 | pass
51 |
52 | def is_running(self):
53 | """ Returns whether the job is running or not. """
54 | pass
55 |
56 | def is_queued(self):
57 | """ Returns whether the job is queued or not.
58 | This function is only used if jobs are submitted to an external queue.
59 | """
60 | pass
61 |
62 | def is_complete(self):
63 | """ Returns whether the job is complete or not. """
64 | pass
65 |
66 | def is_error(self):
67 | """ Checks to see if the job errored out. """
68 | pass
69 | ```
70 |
71 | The duty of the job types is to submit the jobs when asked by the queue, and to inform the queue about the status of jobs. The queue needs to know when a job is running, queued, complete, or when an error has occurred.
72 |
73 | Each of the `is_*` callbacks should return a boolean value, and the cmd property should return the bash command (as an array of strings) that can be called to run the job. The job class has an attribute `filename` that contains the value of the bash script containing the job command (i.e. `['bash', self.filename]`).
74 |
75 | **IMPORTANT:** All of the above handlers are required for custom job types to function properly.
76 |
77 | Here is the code for the `cmd` property of the `PBSJob` class:
78 |
79 | ```python
80 | class PBSJob(Job):
81 | #...
82 | @property
83 | def cmd(self):
84 | return ['qsub', self.filename]
85 | #...
86 | ```
87 |
88 | The `submit` call should do any logic pertaining to submitting the job or tracking the number of total submissions. For example, here is the code for submitting a job to the PBS queue:
89 |
90 | ```python
91 | class PBSJob(Job):
92 | #...
93 | def submit(self, job):
94 | if self.attempts == 0:
95 | job.make()
96 | self.attempts += 1
97 | out = call(job.cmd)
98 | self.waiting = False
99 | self.id = out[:out.index('.')]
100 | #...
101 | ```
102 |
103 | As you can see, it keeps track of the number of times the job was submitted, and then calls the `call` function, provided in the root job module, to execute the job. Since PBS assigns job ids to each job at submission-time, it also captures that information and saves it for later use.
104 |
105 | [job]: https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/models/job.py#L20
106 |
107 |
108 | ## Custom Queues
109 |
110 | In the event that your analysis requires more control over the submission process for jobs, the metapipe module also allows for the customization of queue logic by subclassing `metapipe.models.Queue`. This section will cover how to subclass the root queue, but it is left to the reader to determine why you might want to do this. From personal experience, customizing the queue should be a very rare requirement.
111 |
112 |
113 | ### The Root Queue class
114 |
115 | As is the case for custom job types, all queues inherit from the root Queue in `metapipe.models.Queue`, including the main `JobQueue` that is used by the metapipe command line tool.
116 |
117 | To customize the response of the queue to various types of events subclass it and fill in the following methods, all the methods are optional so just omit any handlers that you don't need.
118 |
119 | ```python
120 | class MyCustomQueue(object):
121 |
122 | def __repr__(self):
123 | return '' % len(self.queue)
124 |
125 | # Callbacks...
126 |
127 | def on_start(self):
128 | """ Called when the queue is starting up. """
129 | pass
130 |
131 | def on_end(self):
132 | """ Called when the queue is shutting down. """
133 | pass
134 |
135 | def on_locked(self):
136 | """ Called when the queue is locked and no jobs can proceed.
137 | If this callback returns True, then the queue will be restarted,
138 | else it will be terminated.
139 | """
140 | return True
141 |
142 | def on_tick(self):
143 | """ Called when a tick of the queue is complete. """
144 | pass
145 |
146 | def on_ready(self, job):
147 | """ Called when a job is ready to be submitted.
148 | :param job: The given job that is ready.
149 | """
150 | pass
151 |
152 | def on_submit(self, job):
153 | """ Called when a job has been submitted.
154 | :param job: The given job that has been submitted.
155 | """
156 | pass
157 |
158 | def on_complete(self, job):
159 | """ Called when a job has completed.
160 | :param job: The given job that has completed.
161 | """
162 | pass
163 |
164 | def on_error(self, job):
165 | """ Called when a job has errored.
166 | :param job: The given job that has errored.
167 | """
168 | pass
169 | ```
170 |
171 |
172 | ## Using Your Custom Code
173 |
174 | Once you have subclassed and filled in the required code for your custom job type or queue, it is time to use your code. If your code adapts metapipe to work on a common computing platform, or system then please consider contributing to the metapipe project. This helps the rest of the community use a broader range of hardware to solve our problems!
175 |
176 |
177 | ### Building your custom pipeline
178 |
179 | Use the following code to build your pipeline. This code is taken directly from [metapipe's app.py][app] tool which is the command line tool that metapipe uses to build pipelines.
180 |
181 | ```python
182 | import MyCustomJob
183 |
184 | JOB_TYPES = {
185 | 'my_custom_job_type': MyCustomJob
186 | }
187 |
188 | parser = Parser(config)
189 | try:
190 | command_templates = parser.consume()
191 | except ValueError as e:
192 | raise SyntaxError('Invalid config file. \n%s' % e)
193 |
194 | pipeline = Runtime(command_templates, JOB_TYPES, 'my_custom_job_type')
195 | ```
196 |
197 | **IMPORTANT:** Adding custom queues is coming soon!
198 |
199 | For more information on how to script metapipe once you have custom jobs, see [Scripting Metapipe](scripting.html)
200 |
201 |
202 |
--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | This section contains a quick guide for installing, and using metapipe. For the detailed syntax guide, see the [Metapipe Syntax][syntax]
4 |
5 | [syntax]: syntax.html
6 |
7 |
8 | ## Installation
9 |
10 | Metapipe is available on PyPi so installing is easy.
11 |
12 | ```bash
13 | $ pip install metapipe
14 | ```
15 |
16 |
17 | To make it easy, metapipe runs on Python 2.7, 3.4, and 3.5!
18 |
19 |
20 | ## Using metapipe
21 |
22 | By default, metapipe is both a command line tool and a Python module that can be used to build and run pipelines in code. This means that whether you're a user, or a developer Metapipe can be adapted to fit your needs.
23 |
24 | To see metapipe's help menu, type the following, just as you'd expect.
25 |
26 | ```bash
27 | $ metapipe --help
28 | ```
29 |
30 | ## Sample Pipeline
31 |
32 | Here's a simple pipeline you can use for testing metapipe. Typically complex pipelines are used for things like bioinformatics, or some batch processing
33 |
34 | But first, we need some sample files to work with. Run these commands to generate them.
35 |
36 | ```bash
37 | $ echo "SAMPLE DATA 1" > test_file.1.txt
38 | $ echo "SAMPLE DATA 2" > test_file.2.txt
39 | $ echo "SAMPLE DATA 3" > test_file.3.txt
40 | ```
41 |
42 | Now that we have our data, let's analyze it! Here's our sample pipeline:
43 |
44 | ```bash
45 | [COMMANDS]
46 | # Remove the ending number from each of our data files.
47 | cut -f 1-2 -d ' ' {1||2||3} > {o}
48 |
49 | # Paste each of the files together and save it to a final output.
50 | # Since this is our last step, and only 1 output there's no need to have
51 | # metapipe name the output file. We'll call it something ourselves.
52 | paste {1.1,1.2,1.3} > final_output.txt
53 |
54 | [FILES]
55 | 1. test_file.1.txt
56 | 2. test_file.2.txt
57 | 3. test_file.3.txt
58 | ```
59 |
60 | Save that as `sample_pipeline.mp`, open a terminal, and `cd` to that directory.
61 |
62 |
63 | ### Run the sample pipeline locally
64 |
65 | Local execution is the default for metapipe so you just need to specify your metapipe file and an output destination.
66 |
67 | ```bash
68 | $ metapipe -o pipeline.sh sample_pipeline.mp
69 | ```
70 |
71 | This will generate an output script named `pipeline.sh` which will run the pipeline. Simply run it to start your pipeline!
72 |
73 | ```bash
74 | $ sh pipeline.sh
75 | ```
76 |
77 | That's it! Metapipe will run in the foreground watching your jobs complete until everything finishes.
78 |
79 |
80 | ### Run the sample pipeline on PBS
81 |
82 | Simply change the metapipe command to the following:
83 |
84 | ```bash
85 | $ metapipe -o pipeline.sh -j pbs sample_pipeline.mp
86 | ```
87 |
88 | Then simply submit metapipe as a job:
89 |
90 | ```bash
91 | $ qsub pipeline.sh
92 | ```
93 |
94 | Metapipe will run as a job on the PBS/Torque queue and submit other jobs to the same queue! It will keep tabs on the running jobs and submit them when they're ready, then exit when all jobs finish.
95 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. metapipe documentation master file, created by
2 | sphinx-quickstart on Mon Jan 25 16:10:38 2016.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to metapipe's documentation!
7 | ====================================
8 |
9 | Contents:
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 |
14 | what_is_metapipe.md
15 | getting_started.md
16 | syntax.md
17 | scripting.md
18 | extending.md
19 | reporting.md
20 |
21 |
22 | Indices and tables
23 | ==================
24 |
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 |
29 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | echo. coverage to run coverage check of the documentation if enabled
41 | goto end
42 | )
43 |
44 | if "%1" == "clean" (
45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
46 | del /q /s %BUILDDIR%\*
47 | goto end
48 | )
49 |
50 |
51 | REM Check if sphinx-build is available and fallback to Python version if any
52 | %SPHINXBUILD% 1>NUL 2>NUL
53 | if errorlevel 9009 goto sphinx_python
54 | goto sphinx_ok
55 |
56 | :sphinx_python
57 |
58 | set SPHINXBUILD=python -m sphinx.__init__
59 | %SPHINXBUILD% 2> nul
60 | if errorlevel 9009 (
61 | echo.
62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
63 | echo.installed, then set the SPHINXBUILD environment variable to point
64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
65 | echo.may add the Sphinx directory to PATH.
66 | echo.
67 | echo.If you don't have Sphinx installed, grab it from
68 | echo.http://sphinx-doc.org/
69 | exit /b 1
70 | )
71 |
72 | :sphinx_ok
73 |
74 |
75 | if "%1" == "html" (
76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
77 | if errorlevel 1 exit /b 1
78 | echo.
79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
80 | goto end
81 | )
82 |
83 | if "%1" == "dirhtml" (
84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
85 | if errorlevel 1 exit /b 1
86 | echo.
87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
88 | goto end
89 | )
90 |
91 | if "%1" == "singlehtml" (
92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
93 | if errorlevel 1 exit /b 1
94 | echo.
95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
96 | goto end
97 | )
98 |
99 | if "%1" == "pickle" (
100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | if errorlevel 1 exit /b 1
102 | echo.
103 | echo.Build finished; now you can process the pickle files.
104 | goto end
105 | )
106 |
107 | if "%1" == "json" (
108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | if errorlevel 1 exit /b 1
110 | echo.
111 | echo.Build finished; now you can process the JSON files.
112 | goto end
113 | )
114 |
115 | if "%1" == "htmlhelp" (
116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | if errorlevel 1 exit /b 1
118 | echo.
119 | echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | goto end
122 | )
123 |
124 | if "%1" == "qthelp" (
125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\metapipe.qhcp
131 | echo.To view the help file:
132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\metapipe.ghc
133 | goto end
134 | )
135 |
136 | if "%1" == "devhelp" (
137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | if errorlevel 1 exit /b 1
139 | echo.
140 | echo.Build finished.
141 | goto end
142 | )
143 |
144 | if "%1" == "epub" (
145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | if errorlevel 1 exit /b 1
147 | echo.
148 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | goto end
150 | )
151 |
152 | if "%1" == "latex" (
153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | if errorlevel 1 exit /b 1
155 | echo.
156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | goto end
158 | )
159 |
160 | if "%1" == "latexpdf" (
161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | cd %BUILDDIR%/latex
163 | make all-pdf
164 | cd %~dp0
165 | echo.
166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdfja" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf-ja
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "text" (
181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | if errorlevel 1 exit /b 1
183 | echo.
184 | echo.Build finished. The text files are in %BUILDDIR%/text.
185 | goto end
186 | )
187 |
188 | if "%1" == "man" (
189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | if errorlevel 1 exit /b 1
191 | echo.
192 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | goto end
194 | )
195 |
196 | if "%1" == "texinfo" (
197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | if errorlevel 1 exit /b 1
199 | echo.
200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | goto end
202 | )
203 |
204 | if "%1" == "gettext" (
205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | if errorlevel 1 exit /b 1
207 | echo.
208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | goto end
210 | )
211 |
212 | if "%1" == "changes" (
213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | if errorlevel 1 exit /b 1
215 | echo.
216 | echo.The overview file is in %BUILDDIR%/changes.
217 | goto end
218 | )
219 |
220 | if "%1" == "linkcheck" (
221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | if errorlevel 1 exit /b 1
223 | echo.
224 | echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | goto end
227 | )
228 |
229 | if "%1" == "doctest" (
230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | if errorlevel 1 exit /b 1
232 | echo.
233 | echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | goto end
236 | )
237 |
238 | if "%1" == "coverage" (
239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | if errorlevel 1 exit /b 1
241 | echo.
242 | echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | goto end
245 | )
246 |
247 | if "%1" == "xml" (
248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | if errorlevel 1 exit /b 1
250 | echo.
251 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | goto end
253 | )
254 |
255 | if "%1" == "pseudoxml" (
256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | if errorlevel 1 exit /b 1
258 | echo.
259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | goto end
261 | )
262 |
263 | :end
264 |
--------------------------------------------------------------------------------
/docs/pipeline.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TorkamaniLab/metapipe/15592e5b0c217afb00ac03503f8d0d7453d4baf4/docs/pipeline.gif
--------------------------------------------------------------------------------
/docs/reporting.md:
--------------------------------------------------------------------------------
1 | # Measuring Pipeline Progress
2 |
3 | While Metapipe runs your pipeline, it writes updates to `stdout`. These can be helpful, but most times it can be more helpful to get additional information in a more helpful format.
4 |
5 | Metapipe provides a few different methods of visualizing the progress of your pipeline. These options are specified by the `--report-type` option.
6 |
7 |
8 | ## Text based reporting
9 |
10 | ```
11 | --report-type text
12 | ```
13 |
14 | This option is the default. Metapipe will write to `stdout` and this can be redirected to a file.
15 |
16 |
17 | ## HTML based reporting
18 |
19 | ```
20 | --report-type html
21 | ```
22 |
23 | Using this option, Metapipe will generate an HTML report of the pipeline as it runs. This static report represents the current state of the pipeline and what steps have already been completed. The report also includes a progress bar that reports a visualization of the rough progress of the pipeline.
24 |
25 | **Important:** This progress indicator is based on the number of overall steps to be completed and represents the number of steps remaining. This has no correlation with the amount of time remaining, as that depends on the length of time each step takes.
26 |
--------------------------------------------------------------------------------
/docs/scripting.md:
--------------------------------------------------------------------------------
1 | # Scripting Metapipe
2 |
3 | In addition to being a command line tool, metapipe is also a Python module. You can use this module to extend, or script metapipe to fit your specific uses. This section will discuss scripting metapipe, and building/running jobs using Python. For information on how to extend metapipe's builtin job types or queue system, see [Extending Metapipe](extending.html).
4 |
5 |
6 | ## The Run Interface
7 |
8 | The first, and easiest way to script Metapipe is by invoking it via the Python interface.
9 |
10 |
11 | ```python
12 | from metapipe import run
13 |
14 | config_text = get_config_text()
15 | run(config_text)
16 | ```
17 |
18 | For detailed information, see the [run method's docstring](https://github.com/TorkamaniLab/metapipe/blob/master/metapipe/app.py#L90).
19 |
20 |
--------------------------------------------------------------------------------
/docs/syntax.md:
--------------------------------------------------------------------------------
1 | # Metapipe Syntax
2 |
3 | The syntax for Pipeline Config files is as follows.
4 |
5 |
6 | ## Section Definitions
7 |
8 | In each Metapipe file, there are a number of different sections you can specify. Each has their own purpose and function. Each section is denoted with a header in brackets at the top of the section.
9 |
10 | All sections support comments, and in most sections, they are not parsed as input.
11 |
12 |
13 | ### Commands
14 |
15 | The commands section is the only required Metapipe config section. Specified by the `[COMMANDS]` header, this is where the various steps of the pipeline are specified. Commands are very similar to normal shell commands, and most shell commands are valid. The only difference is in the input/output of each command. For these sections, use Metapipe's command syntax to indicate the location and desired input and output.
16 |
17 | **Example:**
18 |
19 | ```bash
20 | [COMMANDS]
21 | # Here we cat a hardcoded input file into sed
22 | # and redirect the output to a metapipe output token.
23 | cat somefile.txt | sed 's/replace me/with me' > {o}
24 | ```
25 |
26 | Metapipe automatically creates a filename for the given output token and assigns that file an alias. The alias structure is `command_number.command_iteration-output_number`, where the output number is optional.
27 |
28 | **Important:** Commands are *NOT* run sequentially. As commands are parsed, they are evaluated based on what inputs they take in and what outputs they generate. For more information: see [Command Structure](#command-structure). Commands are run as soon as they are deemed ready and any command that does not specify inputs via Metapipe's input patterns will be run immediately.
29 |
30 |
31 | ### Paths
32 |
33 | The paths section allows users to simplify their commands by creating aliases or short names to binaries. Paths are structured as a single word alias followed by a space and the rest of the line is considered the path. The paths section is denoted by the `[PATHS]` header.
34 |
35 | ```bash
36 | [COMMANDS]
37 | # Here we've aliased Python. When the script is generated,
38 | # the hardcoded path will be substituted in.
39 | python2 my_script.py
40 |
41 | # Here we're using the builtin python and using paths
42 | # to simplify the arguments.
43 | python my_script.py somefile
44 |
45 | [PATHS]
46 | python2 /usr/local/bin/python2.7.4
47 | somefile /a/long/file/path
48 | ```
49 |
50 | Paths can also be used to create pseudo-variables for long configuration options. When doing this, it's recommended to use a bash-variable-like syntax because it reminds the reader that the variable is not a literal in the command.
51 |
52 | **Reminder**: Paths are substituted in after the inputs have been processed. This means that `{}` characters are treated as literals and not as input markers.
53 |
54 | ```bash
55 | [COMMANDS]
56 | # Here, the braces represent an output token,
57 | # but the $OPTIONS variable will be evaluated
58 | # as a literal {}
59 | python my_script.py -o {o} $OPTIONS
60 |
61 | [PATHS]
62 | $OPTIONS -rfg --do-something --no-save --get --no-get -I {}
63 | ```
64 |
65 |
66 | ### Files
67 |
68 | For a given pipeline, there is usually a set of input or auxiliary files. These files go through the analysis and other steps require the output of one command as the input for another. This is where most of the power of Metapipe's syntax comes into play. The files section is denoted as `[FILES]`.
69 |
70 | Files are specified using a number followed by a period, and then the path to the given file. The number is the file's alias, and once that alias is assigned, it can be used in commands.
71 |
72 | ```bash
73 | [COMMANDS]
74 | cat {1} | sed 's/replace me/with me' > {o}
75 | cat {2} | cut -f 1 | sort | uniq > {o}
76 |
77 | [FILES]
78 | 1. somefile.1
79 | 2. /path/to/somefile.2
80 |
81 | ```
82 |
83 | In this example, we use the aliases of files 1 and 2 to perform different analysis on each file. Then, when the input files need to change, they can be changed in the `[FILES]` section and the pipeline remains the same.
84 |
85 |
86 | ### Job Options
87 |
88 | The job options section, denoted by `[JOB_OPTIONS]`, is a section that allows the user to specify a global set of options for all jobs. This helps reduce pipeline redundancy.
89 |
90 | ```bash
91 | # Each of the commands in this pipeline need to
92 | # be working in a scratch directory.
93 | [COMMANDS]
94 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
95 | cat somefile.2.txt | sed 's/replace me/with you' > {o}
96 | cat somefile.3.txt | sed 's/replace you/with me' > {o}
97 |
98 | [JOB_OPTIONS]
99 | set -e
100 | cd /var/my_project/
101 |
102 | # This config will result in the following:
103 | # ------- Job 1 ---------
104 | set -e
105 | cd /var/my_project/
106 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
107 | ```
108 |
109 | The set of commands in Job Options will be carried over to every job in the pipeline. This can be extremely useful when setting configuration comments for a queue system.
110 |
111 | ```bash
112 | # Each of the commands needs 4GB of RAM
113 | [COMMANDS]
114 | cat somefile.1.txt | sed 's/replace me/with me' > {o}
115 | cat somefile.2.txt | sed 's/replace me/with you' > {o}
116 | cat somefile.3.txt | sed 's/replace you/with me' > {o}
117 |
118 | [JOB_OPTIONS]
119 | #PBS -l mem=4096mb
120 | ```
121 |
122 | Job Options allow users to make their pipelines more clear and less redundant by allowing them to follow the [DRY][dry] principle.
123 |
124 | [dry]: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself
125 |
126 |
127 | ## Command Structure
128 |
129 | Now that all of the concepts and supported sections have been explained, it's time to take a look at the command structure and how to take advantage of Metapipe's advanced features.
130 |
131 |
132 | ### Input Patterns
133 |
134 | Consider the following command:
135 |
136 | ```bash
137 | [COMMANDS]
138 | python somescript {1||2||3}
139 |
140 | [FILES]
141 | 1. some_file1.txt
142 | 2. some_file2.txt
143 | 3. some_file3.txt
144 | ```
145 |
146 | This command will run the python script 3 times in parallel, once with each
147 | file specified. The output will look something like this:
148 |
149 | ```bash
150 | # Output
151 | # ------
152 |
153 | python somescript some_file1.txt
154 | python somescript some_file2.txt
155 | python somescript some_file3.txt
156 | ```
157 |
158 | #### Running a script with multiple inputs
159 |
160 | Let's say that you have a script with takes multiple files as input. In this
161 | case the syntax becomes:
162 |
163 | ```bash
164 | [COMMANDS]
165 | python somescript {1,2,3}
166 |
167 | [FILES]
168 | 1. some_file1.txt
169 | 2. some_file2.txt
170 | 3. some_file3.txt
171 |
172 | # Output
173 | # ------
174 |
175 | python somescript some_file1.txt some_file2.txt some_file3.txt
176 | ```
177 |
178 |
179 | ### Output Patterns
180 |
181 | Whenever a script would take an explicit output filename you can use the output
182 | pattern syntax to tell metapipe where/what it should use.
183 |
184 | ```bash
185 | [COMMANDS]
186 | python somescript -o {o} {1||2||3}
187 |
188 | [FILES]
189 | 1. some_file1.txt
190 | 2. some_file2.txt
191 | 3. some_file3.txt
192 |
193 | # Output
194 | # ------
195 |
196 | python somescript -o mp.1.1.output some_file1.txt
197 | python somescript -o mp.1.2.output some_file2.txt
198 | python somescript -o mp.1.3.output some_file3.txt
199 | ```
200 |
201 | Metapipe will generate the filename with the command's alias inside. An upcoming feature will provide more useful output names.
202 |
203 |
204 | #### Implicit or Hardcoded output
205 |
206 | In a case where the script or command you want to use generates an output that
207 | is not passed through the command, but you need to use for another step in the
208 | pipeline, you can use output patterns to tell metapipe what to look for.
209 |
210 | Consider this:
211 |
212 | ```bash
213 | [COMMANDS]
214 | # This command doesn't provide an output filename
215 | # so metapipe can't automatically track it.
216 | ./do_count {1||2}
217 | ./analyze.sh {1.*}
218 |
219 | [FILES]
220 | 1. foo.txt
221 | 2. bar.txt
222 | ```
223 |
224 | This set of commands is invalid because the second command (`./analyze.sh`)
225 | doesn't know what the output of command 1 is because it isn't specified.
226 | The split command generates output based on the input filenames it is given.
227 |
228 | Since we wrote the `./do_count` script, we know that it generates files with a
229 | `.counts` extension. But since we don't explicitly specify the files, in
230 | this case Metapipe cannot assume the file names generated by step 1 and this
231 | config file is invalid.
232 |
233 | We can tell metapipe what the output should look like by using an output pattern.
234 |
235 | ```bash
236 | [COMMANDS]
237 | # We've now told Metapipe what the output file name
238 | # will look like. It can now track the file as normal.
239 | ./do_counts {1||2} #{o:*.counts}
240 | ./analyze.sh {2.*}
241 |
242 | [FILES]
243 | 1. foo.txt
244 | 2. bar.txt
245 | ```
246 |
247 | The above example tells metapipe that the output of command 1, which is
248 | hardcoded in the script will have an output that ends in `.counts`. Now that
249 | the output of command 1 is known, command 2 will wait until command 1 finishes.
250 |
251 | When the output marker has the form `{o}`, then metapipe will insert a
252 | pregenerated filename to the command. The output marker `{o:}` means
253 | that the output of the script is *not* determined by the input of the script,
254 | but it *will* match given pattern. This means that later commands will be able
255 | to reference the files by name.
256 |
257 |
258 | ### Multiple Inputs and Outputs
259 |
260 | Often times a given shell command will either take multiple dynamic files as input, or generate multiple files as output. In either case, metapipe provides a way to manage and track these files.
261 |
262 | For multiple inputs, metapipe expects the number of inputs per command to be the same, and will iterate over them in order.
263 |
264 | **Example:**
265 |
266 | ```bash
267 | # Given the following:
268 | [COMMANDS]
269 | bash somescript {1||2||3} --conf {4||5||6} > {o}
270 |
271 | [FILES]
272 | 1. somefile.1
273 | 2. somefile.2
274 | 3. somefile.3
275 |
276 | # Metapipe will return this:
277 | bash somescript somefile.1 --conf somefile.4 > mp.1.1.output
278 | bash somescript somefile.2 --conf somefile.5 > mp.1.2.output
279 | bash somescript somefile.3 --conf somefile.6 > mp.1.3.output
280 | ```
281 |
282 | Metapipe will name the multiple output files as follows (in order from left to right):
283 |
284 | `mp.{command_number}.{sub_command_number}-{output_number}`
285 |
286 | **Example:**
287 |
288 | ```bash
289 | # Given an input like the one below:
290 | [COMMANDS]
291 | bash somescript {1||2||3} --log {o} -r {o}
292 |
293 | [FILES]
294 | 1. somefile.1
295 | 2. somefile.2
296 | 3. somefile.3
297 |
298 | # metapipe will generate the following:
299 | bash somescript somefile.1 --log mp.1.1-1.output -r mp.1.1-2.output
300 | bash somescript somefile.2 --log mp.1.2-1.output -r mp.1.2-2.output
301 | bash somescript somefile.3 --log mp.1.3-1.output -r mp.1.3-2.output
302 | ```
303 |
304 |
305 |
306 | ## Sample config.mp file
307 |
308 | ```bash
309 | [COMMANDS]
310 | # Here we run our analysis script on every gzipped file
311 | # in the current directory and output the results to a file.
312 | python my_custom_script.py -o {o} {*.gz||}
313 |
314 | # Take all the outputs of step 1 and feed them to cut.
315 | cut -f 1 {1.*||} > {o}
316 |
317 | # Oh no! You hardcode the output name? No problem! Just tell metapipe
318 | # what the filename is.
319 | python my_other_custom_code.py {2.*} #{o:hardcoded_output.csv}
320 |
321 | # Now you want to compare your results to some controls? Ok!
322 | # Metapipe wil compare your hardcoded_output to all 3
323 | # controls at the same time!
324 | python my_compare_script.py -o {o} $OPTIONS --compare {1||2||3} {3.1}
325 |
326 | # Finally, you want to make some pretty graphs? No problem!
327 | # But wait! You want R 2.0 for this code? Just create an alias for R!
328 | Rscript my_cool_graphing_code.r {4.*} > {o}
329 |
330 | [FILES]
331 | 1. controls.1.csv
332 | 2. controls.2.csv
333 | 3. controls.3.csv
334 |
335 | [PATHS]
336 | Rscript ~/path/to/my/custom/R/version
337 | $OPTIONS -rne --get --no-get -v --V --log-level 1
338 | ```
339 |
--------------------------------------------------------------------------------
/docs/what_is_metapipe.md:
--------------------------------------------------------------------------------
1 | # Metapipe
2 |
3 | *A pipeline for building analysis pipelines.*
4 |
5 | Metapipe is a simple command line tool for building and running complex analysis pipelines. If you use a PBS/Torque queue for cluster computing, or if you have complex batch processing that you want simplified, metapipe is the tool for you.
6 |
7 | Metapipe's goal is to improve **readability**, and **maintainability** when building complex pipelines.
8 |
9 | In addition to helping you generate and maintain complex pipelines, **metapipe also helps you debug them**! How? Well metapipe watches your jobs execute and keeps tabs on them. This means, unlike conventional batch queue systems like PBS/Torque alone, metapipe can give you accurate error information, and even resubmit failing jobs! Metapipe enhances the power of any PBS/Torque queue!
10 |
11 | - What if I [don't use PBS/Torque](#other-queue-systems), or [a queue system at all?](#no-queue-no-problem)
12 |
13 |
14 | ## What does it do?
15 |
16 | In the bad old days (before metapipe), if you wanted to make an analysis pipeline, you needed to know how to code. **Not anymore!** Metapipe makes it easy to build and run your analysis pipelines! **No more code, just commands!** This makes your pipelines easy to understand and change!
17 |
18 | A sample metapipe file can be found in [Metapipe Syntax](syntax.html)
19 |
20 |
21 | ## No Queue? No Problem!
22 |
23 | Lots of people don't use a PBS/Torque queue system, or a queue system at all, and metapipe can help them as well! Metapipe runs locally and will give you all the same benefits of a batch queue system! It runs jobs in parallel, and provide detailed feedback when jobs go wrong, and automatic job re-running if they fail.
24 |
25 | To run metapipe locally, see the app's help menu!
26 |
27 | `metapipe --help`
28 |
29 |
30 | ## Other Queue Systems
31 |
32 | Metapipe is a very modular tool, and is designed to support any execution backend. Right now we only support PBS, but if you know just a little bit of Python, you can add support for any queue easily! *More information coming soon!*
33 |
34 |
--------------------------------------------------------------------------------
/metapipe/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import run
2 |
--------------------------------------------------------------------------------
/metapipe/__main__.py:
--------------------------------------------------------------------------------
1 | from app import main
2 | main()
3 |
--------------------------------------------------------------------------------
/metapipe/app.py:
--------------------------------------------------------------------------------
1 | """ A pipeline that generates analysis pipelines.
2 |
3 | author: Brian Schrader
4 | since: 2015-12-22
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse, pickle, os, sys
10 |
11 | from multiprocessing import cpu_count
12 |
13 | import pyparsing
14 |
15 | from .parser import Parser
16 | from .models import Command, LocalJob, PBSJob, SGEJob, \
17 | HtmlReportingJobQueue, TextReportingJobQueue
18 | from .runtime import Runtime
19 | from metapipe.templates import env
20 |
21 |
22 | __version__ = '1.2-1'
23 |
24 |
25 | PIPELINE_ALIAS = "metapipe.queue.job"
26 |
27 | JOB_TYPES = {
28 | 'local': LocalJob,
29 | 'pbs': PBSJob,
30 | 'sge': SGEJob,
31 | }
32 |
33 | QUEUE_TYPES = {
34 | 'text': TextReportingJobQueue,
35 | 'html': HtmlReportingJobQueue,
36 | }
37 |
38 |
39 | def main():
40 | """ Parses the command-line args, and calls run. """
41 | parser = argparse.ArgumentParser(
42 | description='A pipeline that generates analysis pipelines.')
43 | parser.add_argument('input', nargs='?',
44 | help='A valid metapipe configuration file.')
45 | parser.add_argument('-o', '--output',
46 | help='An output destination. If none is provided, the '
47 | 'results will be printed to stdout.',
48 | default=sys.stdout)
49 | parser.add_argument('-t', '--temp',
50 | help='A desired metapipe binary file. This is used to store '
51 | 'temp data between generation and execution. '
52 | '(Default: "%(default)s")', default='.metapipe')
53 | parser.add_argument('-s', '--shell',
54 | help='The path to the shell to be used when executing the '
55 | 'pipeline. (Default: "%(default)s)"',
56 | default='/bin/bash')
57 | parser.add_argument('-r', '--run',
58 | help='Run the pipeline as soon as it\'s ready.',
59 | action='store_true')
60 | parser.add_argument('-n', '--name',
61 | help='A name for the pipeline.',
62 | default='')
63 | parser.add_argument('-j', '--job-type',
64 | help='The destination for calculations (i.e. local, a PBS '
65 | 'queue on a cluster, etc).\nOptions: {}. '
66 | '(Default: "%(default)s)"'.format(JOB_TYPES.keys()),
67 | default='local')
68 | parser.add_argument('-p', '--max-jobs',
69 | help='The maximum number of concurrent jobs allowed. '
70 | 'Defaults to maximum available cores.',
71 | default=None)
72 | parser.add_argument('--report-type',
73 | help='The output report type. By default metapipe will '
74 | 'print updates to the console. \nOptions: {}. '
75 | '(Default: "%(default)s)"'.format(QUEUE_TYPES.keys()),
76 | default='text')
77 | parser.add_argument('-v','--version',
78 | help='Displays the current version of the application.',
79 | action='store_true')
80 | args = parser.parse_args()
81 |
82 | if args.version:
83 | print('Version: {}'.format(__version__))
84 | sys.exit(0)
85 |
86 | try:
87 | with open(args.input) as f:
88 | config = f.read()
89 | except IOError:
90 | print('No valid config file found.')
91 | return -1
92 |
93 | run(config, args.max_jobs, args.output, args.job_type, args.report_type,
94 | args.shell, args.temp, args.run)
95 |
96 |
97 | def run(config, max_jobs, output=sys.stdout, job_type='local',
98 | report_type='text', shell='/bin/bash', temp='.metapipe', run_now=False):
99 | """ Create the metapipe based on the provided input. """
100 | if max_jobs == None:
101 | max_jobs = cpu_count()
102 |
103 | parser = Parser(config)
104 | try:
105 | command_templates = parser.consume()
106 | except ValueError as e:
107 | raise SyntaxError('Invalid config file. \n%s' % e)
108 | options = '\n'.join(parser.global_options)
109 |
110 | queue_type = QUEUE_TYPES[report_type]
111 | pipeline = Runtime(command_templates,queue_type,JOB_TYPES,job_type,max_jobs)
112 |
113 | template = env.get_template('output_script.tmpl.sh')
114 | with open(temp, 'wb') as f:
115 | pickle.dump(pipeline, f, 2)
116 | script = template.render(shell=shell,
117 | temp=os.path.abspath(temp), options=options)
118 |
119 | if run_now:
120 | output = output if output != sys.stdout else PIPELINE_ALIAS
121 | submit_job = make_submit_job(shell, output, job_type)
122 | submit_job.submit()
123 |
124 | try:
125 | f = open(output, 'w')
126 | output = f
127 | except TypeError:
128 | pass
129 |
130 | output.write(script)
131 | f.close()
132 |
133 |
134 | def make_submit_job(shell, output, job_type):
135 | """ Preps the metapipe main job to be submitted. """
136 | run_cmd = [shell, output]
137 | submit_command = Command(alias=PIPELINE_ALIAS, cmds=run_cmd)
138 | submit_job = get_job(submit_command, job_type)
139 | submit_job.make()
140 | return submit_job
141 |
142 |
143 | if __name__ == '__main__':
144 | main()
145 |
--------------------------------------------------------------------------------
/metapipe/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .command import Command
2 | from .command_template import CommandTemplate
3 | from .command_template_factory import *
4 | from .job import Job, call
5 | from .job_template import JobTemplate
6 | from .local_job import LocalJob
7 | from .grammar import Grammar
8 | from .pbs_job import PBSJob
9 | from .sge_job import SGEJob
10 | from .queue import HtmlReportingJobQueue, TextReportingJobQueue, \
11 | ReportingJobQueue, BaseQueue
12 | from .tokens import FileToken, Input, Output, PathToken, CommentToken
13 |
--------------------------------------------------------------------------------
/metapipe/models/command.py:
--------------------------------------------------------------------------------
1 | """ A command model that can be easily transformed into jobs.
2 |
3 | author: Brian Schrader
4 | since: 2015-12-21
5 | """
6 |
7 | from .tokens import Input, Output, FileToken, PathToken, alias_pattern
8 |
9 |
10 | class Command(object):
11 |
12 | def __init__(self, alias, parts=[]):
13 | self.alias = alias
14 | self.parts = parts
15 | if len(self.output_parts) > 1:
16 | for i, output in enumerate(self.output_parts):
17 | output.alias = alias_pattern.format(command=self.alias,
18 | output_number=i+1)
19 | else:
20 | for output in self.output_parts:
21 | output.alias = self.alias
22 |
23 | def __repr__(self):
24 | return ''.format(self.alias)
25 |
26 | @property
27 | def depends_on(self):
28 | """ Returns a list of command template aliases that the given command
29 | template depends on.
30 | """
31 | return [part.command_alias for part in self.input_parts
32 | if part.command_alias is not None]
33 |
34 | @property
35 | def input_parts(self):
36 | """ Returns a list of the input tokens in the list of parts. """
37 | return [part for part in self.file_parts
38 | if isinstance(part, Input)]
39 |
40 | @property
41 | def output_parts(self):
42 | """ Returns a list of the output tokens in the list of parts. """
43 | return [part for part in self.file_parts
44 | if isinstance(part, Output)]
45 |
46 | @property
47 | def file_parts(self):
48 | """ Returns a list of the file tokens in the list of parts. """
49 | file_parts = []
50 | for part in self.parts:
51 | try:
52 | for sub_part in part:
53 | if isinstance(sub_part, FileToken):
54 | file_parts.append(sub_part)
55 | except TypeError:
56 | if isinstance(part, FileToken):
57 | file_parts.append(part)
58 | return file_parts
59 |
60 | @property
61 | def path_parts(self):
62 | """ Returns a list of the path tokens in the list of parts. """
63 | return [part for part in self.parts
64 | if isinstance(part, PathToken)]
65 |
66 | def update_dependent_files(self, prev_commands=[]):
67 | """ Update the command's dependencies based on the evaluated input and
68 | output of previous commands.
69 | """
70 | for command in prev_commands:
71 | for my_input in self.input_parts:
72 | for their_output in command.output_parts:
73 | if their_output == my_input:
74 | my_input.filename = their_output.eval()
75 |
76 | def eval(self):
77 | """ Evaluate the given job and return a complete shell script to be run
78 | by the job manager.
79 | """
80 | eval = []
81 | for part in self.parts:
82 | try:
83 | result = part.eval()
84 | except AttributeError:
85 | result = part
86 | if result[-1] != '\n':
87 | result += ' '
88 | eval.append(result)
89 | return ''.join(eval).strip()
90 |
91 |
92 |
--------------------------------------------------------------------------------
/metapipe/models/command_template.py:
--------------------------------------------------------------------------------
1 | """ A template for creating commands.
2 |
3 | author: Brian Schrader
4 | since: 2016-01-13
5 | """
6 |
7 | import copy, collections
8 |
9 | from .tokens import Input, Output, FileToken, PathToken, CommentToken
10 | from .command import Command
11 |
12 |
13 | class Ticker(object):
14 |
15 | def __init__(self, maxlen, value=0):
16 | self.maxlen = maxlen
17 | self.value = value
18 |
19 | def tick(self, n=1):
20 | self.value += n
21 | if self.value >= self.maxlen:
22 | self.value -= self.maxlen
23 |
24 |
25 | class CommandTemplate(Command):
26 |
27 | def __init__(self, alias, parts=[], dependencies=[]):
28 | self.alias = alias
29 | self.parts = parts
30 | self._dependencies = dependencies
31 |
32 | def __repr__(self):
33 | return ''.format(self.alias,
34 | len(self.parts), len(self._dependencies))
35 |
36 | @property
37 | def depends_on(self):
38 | """ Returns a list of command template aliases that the given command
39 | template depends on.
40 | """
41 | return [dep.alias for dep in self._dependencies]
42 |
43 | @property
44 | def file_parts(self):
45 | """ Returns a list of the file tokens in the list of parts. """
46 | return _search_for_files(self.parts)
47 |
48 | def eval(self):
49 | """ Returns a list of Command objects that can be evaluated as their
50 | string values. Each command will track it's preliminary dependencies,
51 | but these values should not be depended on for running commands.
52 | """
53 | max_size = _get_max_size(self.parts)
54 | parts_list = _grow([[]], max_size-1)
55 |
56 | counter = Ticker(max_size)
57 | parts = self.parts[:]
58 | while len(parts) > 0:
59 | parts_list, counter = _get_parts_list(parts,
60 | parts_list, counter)
61 |
62 | commands = []
63 | for i, parts in enumerate(parts_list):
64 | alias = self._get_alias(i+1)
65 | new_parts = copy.deepcopy(parts)
66 | commands.append(Command(alias=alias, parts=new_parts))
67 | return commands
68 |
69 | def _get_alias(self, index):
70 | """ Given an index, return the string alias for that command. """
71 | return '{}.{}'.format(self.alias, index)
72 |
73 |
74 | def _get_parts_list(to_go, so_far=[[]], ticker=None):
75 | """ Iterates over to_go, building the list of parts. To provide
76 | items for the beginning, use so_far.
77 | """
78 | try:
79 | part = to_go.pop(0)
80 | except IndexError:
81 | return so_far, ticker
82 |
83 | # Lists of input groups
84 | if isinstance(part, list) and any(isinstance(e, list) for e in part):
85 | while len(part) > 0:
86 | so_far, ticker = _get_parts_list(part, so_far, ticker)
87 | ticker.tick()
88 | # Input Group
89 | elif isinstance(part, list) and any(isinstance(e, Input) for e in part):
90 | while len(part) > 0:
91 | so_far, ticker = _get_parts_list(part, so_far, ticker)
92 | # Magic Inputs
93 | elif isinstance(part, Input) and part.is_magic:
94 | inputs = part.eval()
95 | while len(inputs) > 0:
96 | so_far, ticker = _get_parts_list(inputs, so_far, ticker)
97 | ticker.tick()
98 | # Normal inputs
99 | elif isinstance(part, Input) and not part.is_magic:
100 | so_far[ticker.value].append(part)
101 | # Everything else
102 | else:
103 | so_far = _append(so_far, part)
104 |
105 | return so_far, ticker
106 |
107 |
108 | def _get_max_size(parts, size=1):
109 | """ Given a list of parts, find the maximum number of commands
110 | contained in it.
111 | """
112 | max_group_size = 0
113 | for part in parts:
114 | if isinstance(part, list):
115 | group_size = 0
116 | for input_group in part:
117 | group_size += 1
118 |
119 | if group_size > max_group_size:
120 | max_group_size = group_size
121 |
122 | magic_size = _get_magic_size(parts)
123 | return max_group_size * magic_size
124 |
125 |
126 | def _get_magic_size(parts, size=1):
127 | for part in parts:
128 | if isinstance(part, Input) and part.is_magic:
129 | magic_size = len(part.eval())
130 | if magic_size > size:
131 | return magic_size
132 | elif isinstance(part, list):
133 | size = _get_magic_size(part, size)
134 | return size
135 |
136 |
137 | def _append(so_far, item):
138 | """ Appends an item to all items in a list of lists. """
139 | for sub_list in so_far:
140 | sub_list.append(item)
141 | return so_far
142 |
143 |
144 | def _grow(list_of_lists, num_new):
145 | """ Given a list of lists, and a number of new lists to add, copy the
146 | content of the first list into the new ones, and add them to the list
147 | of lists.
148 | """
149 | first = list_of_lists[0]
150 | for i in range(num_new):
151 | list_of_lists.append(copy.deepcopy(first))
152 | return list_of_lists
153 |
154 |
155 | def _search_for_files(parts):
156 | """ Given a list of parts, return all of the nested file parts. """
157 | file_parts = []
158 | for part in parts:
159 | if isinstance(part, list):
160 | file_parts.extend(_search_for_files(part))
161 | elif isinstance(part, FileToken):
162 | file_parts.append(part)
163 | return file_parts
164 |
165 |
166 |
--------------------------------------------------------------------------------
/metapipe/models/command_template_factory.py:
--------------------------------------------------------------------------------
1 | """ A factory for building individual commands based on the full list
2 | of commands and inputs.
3 |
4 | author: Brian Schrader
5 | since: 2016-01-12
6 | """
7 |
8 |
9 | from .tokens import Input, Output, PathToken, CommentToken
10 | from .command import Command
11 | from .command_template import CommandTemplate
12 | from .grammar import OR_TOKEN, AND_TOKEN
13 |
14 |
15 | def get_command_templates(command_tokens, file_tokens=[], path_tokens=[],
16 | job_options=[]):
17 | """ Given a list of tokens from the grammar, return a
18 | list of commands.
19 | """
20 | files = get_files(file_tokens)
21 | paths = get_paths(path_tokens)
22 | job_options = get_options(job_options)
23 |
24 | templates = _get_command_templates(command_tokens, files, paths,
25 | job_options)
26 |
27 | for command_template in templates:
28 | command_template._dependencies = _get_prelim_dependencies(
29 | command_template, templates)
30 | return templates
31 |
32 |
33 | def get_files(file_tokens, cwd=None):
34 | """ Given a list of parser file tokens, return a list of input objects
35 | for them.
36 | """
37 | if not file_tokens:
38 | return []
39 |
40 | token = file_tokens.pop()
41 | try:
42 | filename = token.filename
43 | except AttributeError:
44 | filename = ''
45 |
46 | if cwd:
47 | input = Input(token.alias, filename, cwd=cwd)
48 | else:
49 | input = Input(token.alias, filename)
50 |
51 | return [input] + get_files(file_tokens)
52 |
53 |
54 | def get_paths(path_tokens):
55 | """ Given a list of parser path tokens, return a list of path objects
56 | for them.
57 | """
58 | if len(path_tokens) == 0:
59 | return []
60 |
61 | token = path_tokens.pop()
62 | path = PathToken(token.alias, token.path)
63 | return [path] + get_paths(path_tokens)
64 |
65 |
66 | def get_options(options):
67 | """ Given a list of options, tokenize them. """
68 | return _get_comments(options)
69 |
70 |
71 | # Internal Implementation
72 |
73 |
74 | def _get_command_templates(command_tokens, files=[], paths=[], job_options=[],
75 | count=1):
76 | """ Reversivly create command templates. """
77 | if not command_tokens:
78 | return []
79 |
80 | comment_tokens, command_token = command_tokens.pop()
81 | parts = []
82 |
83 | parts += job_options + _get_comments(comment_tokens)
84 | for part in command_token[0]:
85 | # Check for file
86 | try:
87 | parts.append(_get_file_by_alias(part, files))
88 | continue
89 | except (AttributeError, ValueError):
90 | pass
91 |
92 | # Check for path/string
93 | for cut in part.split():
94 | try:
95 | parts.append(_get_path_by_name(cut, paths))
96 | continue
97 | except ValueError:
98 | pass
99 |
100 | parts.append(cut)
101 |
102 | command_template = CommandTemplate(alias=str(count), parts=parts)
103 | [setattr(p, 'alias', command_template.alias)
104 | for p in command_template.output_parts]
105 | return [command_template] + _get_command_templates(command_tokens,
106 | files, paths, job_options, count+1)
107 |
108 |
109 | def _get_prelim_dependencies(command_template, all_templates):
110 | """ Given a command_template determine which other templates it
111 | depends on. This should not be used as the be-all end-all of
112 | dependencies and before calling each command, ensure that it's
113 | requirements are met.
114 | """
115 | deps = []
116 | for input in command_template.input_parts:
117 | if '.' not in input.alias:
118 | continue
119 | for template in all_templates:
120 | for output in template.output_parts:
121 | if input.fuzzy_match(output):
122 | deps.append(template)
123 | break
124 | return list(set(deps))
125 |
126 |
127 | def _get_file_by_alias(part, files):
128 | """ Given a command part, find the file it represents. If not found,
129 | then returns a new token representing that file.
130 | :throws ValueError: if the value is not a command file alias.
131 | """
132 | # Make Output
133 | if _is_output(part):
134 | return Output.from_string(part.pop())
135 |
136 | # Search/Make Input
137 | else:
138 | inputs = [[]]
139 |
140 | if part.magic_or:
141 | and_or = 'or'
142 | else:
143 | and_or = 'and'
144 |
145 | for cut in part.asList():
146 | if cut == OR_TOKEN:
147 | inputs.append([])
148 | continue
149 | if cut == AND_TOKEN:
150 | continue
151 |
152 | input = Input(cut, filename=cut, and_or=and_or)
153 | for file in files:
154 | if file.alias == cut:
155 | # Override the filename
156 | input.filename = file.filename
157 | inputs[-1].append(input)
158 | break
159 | else:
160 | inputs[-1].append(input)
161 |
162 |
163 | return [input for input in inputs if input]
164 |
165 |
166 | def _get_path_by_name(part, paths):
167 | """ Given a command part, find the path it represents.
168 | :throws ValueError: if no valid file is found.
169 | """
170 | for path in paths:
171 | if path.alias == part:
172 | return path
173 | raise ValueError
174 |
175 | def _get_comments(parts):
176 | """ Given a list of parts representing a list of comments, return the list
177 | of comment tokens
178 | """
179 | return [CommentToken(part) for part in parts]
180 |
181 |
182 | def _is_output(part):
183 | """ Returns whether the given part represents an output variable. """
184 | if part[0].lower() == 'o':
185 | return True
186 | elif part[0][:2].lower() == 'o:':
187 | return True
188 | elif part[0][:2].lower() == 'o.':
189 | return True
190 | else:
191 | return False
192 |
193 |
--------------------------------------------------------------------------------
/metapipe/models/grammar.py:
--------------------------------------------------------------------------------
1 | """ Grammars for various parts of the input file. """
2 |
3 | from pyparsing import *
4 |
5 |
6 | approved_printables = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`|~'
7 |
8 | lbrack = Literal('[').suppress()
9 | rbrack = Literal(']').suppress()
10 | pound = Literal('#')
11 |
12 | OR_TOKEN = '<>'
13 | AND_TOKEN = '<>'
14 |
15 | class classproperty(property):
16 | def __get__(self, cls, owner):
17 | return self.fget.__get__(None, owner)()
18 |
19 |
20 | class Grammar(object):
21 | """ A container class for the various grammars in the input files. """
22 |
23 | _section = lbrack + Word(alphas+'_') + rbrack
24 | line = ~lbrack + Word(printables) + restOfLine
25 | _non_comment_line = ~pound + Group(Word(printables) + restOfLine)
26 |
27 | __command_input_output = (
28 | Suppress('{') +
29 | OneOrMore(
30 | Group(OneOrMore(
31 | Combine(
32 | Word(alphanums+'.*:/_-') +
33 | Optional('.' + Word(nums))
34 | ) +
35 | Optional((
36 | Suppress(',' + FollowedBy('}')) ^
37 | Suppress(',')
38 | ).addParseAction(replaceWith(AND_TOKEN)).setResultsName('_and')) +
39 | Optional(
40 | ('||' + FollowedBy('}')).addParseAction(
41 | replaceWith(OR_TOKEN)).setResultsName('magic_or') ^
42 | Suppress('||').addParseAction(
43 | replaceWith(OR_TOKEN)).setResultsName('_or')
44 | )
45 | ))) +
46 | Suppress('}')
47 | )
48 |
49 | @classproperty
50 | @staticmethod
51 | def overall():
52 | """ The overall grammer for pulling apart the main input files. """
53 | return ZeroOrMore(Grammar.comment) + Dict(ZeroOrMore(Group(
54 | Grammar._section + ZeroOrMore(Group(Grammar.line)))
55 | ))
56 |
57 | @classproperty
58 | @staticmethod
59 | def comment():
60 | return ('#' + Optional(restOfLine))
61 |
62 | @classproperty
63 | @staticmethod
64 | def file():
65 | """ Grammar for files found in the overall input files. """
66 | return (
67 | Optional(Word(alphanums).setResultsName('alias') +
68 | Suppress(Literal('.'))) + Suppress(White()) +
69 | Word(approved_printables).setResultsName('filename')
70 | )
71 |
72 | @classproperty
73 | @staticmethod
74 | def path():
75 | """ Grammar for paths found in the overall input files. """
76 | return (
77 | Word(approved_printables).setResultsName('alias') +
78 | Suppress(White()) +
79 | restOfLine.setResultsName('path')
80 | )
81 |
82 | @classproperty
83 | @staticmethod
84 | def command_lines():
85 | """ Grammar for commands found in the overall input files. """
86 | return ZeroOrMore(Group(
87 | Group(ZeroOrMore(Group(Grammar.comment))) + Grammar._non_comment_line
88 | ))
89 |
90 | @classproperty
91 | @staticmethod
92 | def command():
93 | """ Grammar for commands found in the overall input files. """
94 | return (
95 | OneOrMore(
96 | Word(approved_printables+' ').setResultsName('command',
97 | listAllMatches=True) ^
98 | Grammar.__command_input_output.setResultsName('_in',
99 | listAllMatches=True)
100 | )
101 | )
102 |
--------------------------------------------------------------------------------
/metapipe/models/job.py:
--------------------------------------------------------------------------------
1 | """ A basic job model, and local job implementation.
2 |
3 | author: Brian Schrader
4 | since: 2016-01-04
5 | """
6 |
7 | import os
8 | from subprocess import Popen, PIPE
9 |
10 |
11 | def call(args, stdout=PIPE, stderr=PIPE):
12 | """ Calls the given arguments in a seperate process
13 | and returns the contents of standard out.
14 | """
15 | p = Popen(args, stdout=stdout, stderr=stderr)
16 | out, err = p.communicate()
17 |
18 | try:
19 | return out.decode(sys.stdout.encoding), err.decode(sys.stdout.encoding)
20 | except Exception:
21 | return out, err
22 |
23 |
24 | class Job(object):
25 | """ A template job class that just runs the given command script locally.
26 | To make your own custom jobs, subclass this Job and override the status methods, the submit method, and cmd property.
27 |
28 | Submitting a job cannot block execution. The submit call should return
29 | immediately so that other jobs can be executed, and tracked.
30 | """
31 |
32 | JOB_FILE_PATTERN = 'metapipe.{}.job'
33 | MAX_RETRY = 5
34 |
35 | def __init__(self, alias, command, depends_on=[]):
36 | """ Create an new job with the given name, and command. """
37 | self.command = command
38 | self.depends_on = depends_on
39 | self.alias = alias
40 | self.attempts = 0
41 | self.filename = self.JOB_FILE_PATTERN.format(self.alias)
42 |
43 | def __repr__(self):
44 | return ''.format(self.cmd)
45 |
46 | def __cmp__(self, other):
47 | return cmp(self.alias, other.alias)
48 |
49 | def make(self):
50 | """ Evaluate the command, and write it to a file. """
51 | eval = self.command.eval()
52 | with open(self.filename, 'w') as f:
53 | f.write(eval)
54 |
55 | @property
56 | def should_retry(self):
57 | return self.attempts < self.MAX_RETRY
58 |
59 | # Override these...
60 |
61 | @property
62 | def cmd(self):
63 | """ Returns the command needed to submit the calculations.
64 | Normally, this would be just running the command, however if
65 | using a queue system, then this should return the command to
66 | submit the command to the queue.
67 | """
68 | pass
69 |
70 | def submit(self):
71 | """ Submits the job to be run. If an external queue system is used,
72 | this method submits itself to that queue. Else it runs the job itself.
73 | :see: call
74 | """
75 | pass
76 |
77 | def is_running(self):
78 | """ Returns whether the job is running or not. """
79 | pass
80 |
81 | def is_queued(self):
82 | """ Returns whether the job is queued or not.
83 | This function is only used if jobs are submitted to an external queue.
84 | """
85 | pass
86 |
87 | def is_complete(self):
88 | """ Returns whether the job is complete or not. """
89 | pass
90 |
91 | def is_error(self):
92 | """ Checks to see if the job errored out. """
93 | pass
94 |
95 | def is_failed(self):
96 | """ Checks to see if the job has failed. This is usually if the job
97 | should not be resubmitted.
98 | """
99 | pass
100 |
101 |
102 |
--------------------------------------------------------------------------------
/metapipe/models/job_template.py:
--------------------------------------------------------------------------------
1 | """ A template that evaluates to muliple jobs and places them back on the queue.
2 | author: Brian Schrader
3 | since: 2016-02-19
4 | """
5 |
6 | from .job import Job
7 |
8 |
9 | class JobTemplate(Job):
10 |
11 | def __init__(self, alias, command_template, depends_on, queue, job_class):
12 | super(JobTemplate, self).__init__(alias, command_template, depends_on)
13 | self.command_template = command_template
14 | self.queue = queue
15 | self.job_class = job_class
16 | self.jobs = []
17 |
18 | def __repr__(self):
19 | return ''.format(self.alias)
20 |
21 | def submit(self):
22 | jobs = self._get_jobs_from_template(self.command_template, self.job_class)
23 | [self.queue.push(job) for job in jobs]
24 | self.jobs = jobs
25 |
26 | def is_running(self):
27 | if len(self.jobs) > 0:
28 | return any(job.is_running() for job in self.jobs)
29 | return False
30 |
31 | def is_queued(self):
32 | return False
33 |
34 | def is_complete(self):
35 | if len(self.jobs) > 0:
36 | return all(job.is_complete() for job in self.jobs)
37 | return False
38 |
39 | def is_error(self):
40 | if len(self.jobs) > 0:
41 | return all(job.is_error() for job in self.jobs)
42 | return False
43 |
44 | def is_fail(self):
45 | self.attempts > self.MAX_RETRY
46 |
47 | def _get_jobs_from_template(self, template, job_class):
48 | """ Given a template, a job class, construct jobs from
49 | the given template.
50 | """
51 | jobs = []
52 | for command in template.eval():
53 | alias = command.alias
54 | depends_on = [job.alias
55 | for job in self.queue.all_jobs
56 | for deps in command.depends_on
57 | if deps == job.alias]
58 | command.update_dependent_files([job.command
59 | for job in self.queue.all_jobs
60 | if not isinstance(job, JobTemplate)])
61 |
62 | job = job_class(alias, command, depends_on)
63 | jobs.append(job)
64 | return jobs
65 |
66 |
--------------------------------------------------------------------------------
/metapipe/models/local_job.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 | from . import Job, call
4 |
5 |
6 | LOCAL_LOG_FORMAT = '{}_{}'
7 |
8 |
9 | class LocalJobCallThread(threading.Thread):
10 | """ A class that handles calling subprocesses in seperate threads. """
11 |
12 | def __init__(self, callable, *args, **kwargs):
13 | self.stdout = None
14 | self.stderr = None
15 | self.callable = callable
16 | self.args = args
17 | self.kwargs = kwargs
18 | threading.Thread.__init__(self)
19 | self.stdout = None
20 | self.stderr = None
21 |
22 | def run(self):
23 | self.stdout, self.stderr = self.callable(*self.args, **self.kwargs)
24 |
25 |
26 | class LocalJob(Job):
27 | """ A subclass of job for local calculations. """
28 |
29 | def __init__(self, alias, command, depends_on=[], shell='bash'):
30 | super(LocalJob, self).__init__(alias, command, depends_on)
31 | self.shell = shell
32 | self._task = None
33 | self._err = False
34 |
35 | def __repr__(self):
36 | return ''.format(self.cmd)
37 |
38 | @property
39 | def cmd(self):
40 | return [self.shell, self.filename]
41 |
42 | def submit(self):
43 | self.make()
44 | self.attempts += 1
45 | self._task = LocalJobCallThread(call, self.cmd)
46 | self._task.start()
47 |
48 | def is_running(self):
49 | try:
50 | return self._task.is_alive()
51 | except AttributeError:
52 | return False
53 |
54 | def is_queued(self):
55 | """ Returns False since local jobs are not submitted to an
56 | external queue.
57 | """
58 | return False
59 |
60 | def is_complete(self):
61 | try:
62 | if not self._task.is_alive():
63 | self._task.join()
64 | self._write_log()
65 | return True
66 | except AttributeError:
67 | pass
68 | return False
69 |
70 | def is_error(self):
71 | """ Checks to see if the job errored out. """
72 | try:
73 | if self._task.is_alive():
74 | if len(self._task.stderr.readlines()) > 0:
75 | self._task.join()
76 | self._write_log()
77 | return True
78 | except AttributeError:
79 | pass
80 | return False
81 |
82 | def is_fail(self):
83 | return not self.should_retry
84 |
85 | def _write_log(self):
86 | alias = Job.JOB_FILE_PATTERN.format(self.alias)
87 | outlog, errlog = (LOCAL_LOG_FORMAT.format(alias, 'stdout'),
88 | LOCAL_LOG_FORMAT.format(alias, 'stderr'))
89 |
90 | with open(outlog, 'w+') as f:
91 | f.write(str(self._task.stdout))
92 | with open(errlog, 'w+') as f:
93 | f.write(str(self._task.stderr))
94 |
95 |
--------------------------------------------------------------------------------
/metapipe/models/pbs_job.py:
--------------------------------------------------------------------------------
1 | from . import Job, call
2 |
3 |
4 | class PBSJob(Job):
5 | """ A job subclass for running tasks on a PBS queue. """
6 |
7 | def __init__(self, alias, command, depends_on=[], queue='work'):
8 | super(PBSJob, self).__init__(alias, command, depends_on)
9 | self.queue = queue
10 | self.id = None
11 | self.waiting = True # The job has yet to be submitted.
12 |
13 | def submit(self):
14 | if self.attempts == 0:
15 | self.make()
16 | self.attempts += 1
17 | out, err = call(self.cmd)
18 | self.waiting = False
19 | self.id = out[:out.index('.')]
20 |
21 | @property
22 | def cmd(self):
23 | return ['qsub', self.filename]
24 |
25 | def is_running(self):
26 | """ Checks to see if the job is running. """
27 | qstat = self._grep_qstat('running')
28 | if qstat:
29 | return True
30 | return False
31 |
32 | def is_queued(self):
33 | """ Checks to see if the job is queued. """
34 | qstat = self._grep_qstat('queued')
35 | if qstat:
36 | return True
37 | return False
38 |
39 | def is_complete(self):
40 | """ Checks the job's output or log file to determing if
41 | the completion criteria was met.
42 | """
43 | qstat = self._grep_qstat('complete')
44 | comp = self._grep_status('complete')
45 | if qstat and comp:
46 | return True
47 | return False
48 |
49 | def is_fail(self):
50 | return not self.should_retry
51 |
52 | def is_error(self):
53 | """ Checks to see if the job errored out. """
54 | qstat = self._grep_qstat('error')
55 | err = self._grep_status('error')
56 | if qstat and err:
57 | return True
58 | return False
59 |
60 | def _grep_qstat(self, status_type='complete'):
61 | """ Greps qstat -e for information from the queue.
62 | :paramsstatus_type: complete, queued, running, error, gone
63 | """
64 | args = "qstat -e {}".format(self.id).split()
65 | res, _ = call(args)
66 | if res == '': return False
67 | res = res.split('\n')[2].split()[4]
68 |
69 | if status_type == 'complete' and res == 'C':
70 | return True
71 | elif status_type == 'error' and (res == 'E' or res == 'C'):
72 | return True
73 | elif status_type == 'running' and res == 'R':
74 | return True
75 | elif status_type == 'queued' and res == 'Q':
76 | return True
77 | elif status_type == 'gone' and 'unknown job id' in str(res).lower():
78 | return True
79 | else:
80 | return False
81 |
82 | def _grep_status(self, status_type):
83 | """ Greps through the job's current status to see if
84 | it returned with the requested status.
85 | status_type: complete, error
86 | """
87 | args = "qstat -f {}".format(self.id).split()
88 | res, _ = call(args)
89 | exit_status = [line for line in res.split('\n')
90 | if 'exit_status' in line]
91 | try:
92 | _, __, code = exit_status[0].split()
93 | except IndexError:
94 | code = None
95 |
96 | if status_type == 'complete' and code == '0':
97 | return True
98 | elif status_type == 'error' and code != '0':
99 | return True
100 | else:
101 | return False
102 |
103 |
--------------------------------------------------------------------------------
/metapipe/models/queue.py:
--------------------------------------------------------------------------------
1 | """ A simple manager for a task queue.
2 |
3 | The manager handles creating, submitting, and managing
4 | running jobs, and can even resubmit jobs that have failed.
5 |
6 | author: Brian Schrader
7 | since: 2015-08-27
8 | """
9 | from .reporting import BaseReportingMixin, HtmlReportingMixin, TextReportingMixin
10 | from .job_template import JobTemplate
11 |
12 | class BaseQueue(object):
13 | """ An abstract class for managing a queue of jobs. To use this class,
14 | subclass it and fill in the callbacks you need.
15 | """
16 |
17 | MAX_CONCURRENT_JOBS = 10
18 | def __init__(self, name=''):
19 | self.name = name
20 | self.queue = []
21 | self.running = []
22 | self.failed = []
23 | self.complete = []
24 |
25 | def __repr__(self):
26 | return '' % str(len(self.active_jobs))
27 |
28 | @property
29 | def is_empty(self):
30 | return len(self.active_jobs) == 0
31 |
32 | @property
33 | def active_jobs(self):
34 | """ Returns a list of all jobs submitted to the queue,
35 | or in progress.
36 | """
37 | return list(set(self.queue + self.running))
38 |
39 | @property
40 | def all_jobs(self):
41 | """ Returns a list of all jobs submitted to the queue, complete,
42 | in-progess or failed.
43 | """
44 | return list(set(self.complete + self.failed + self.queue + self.running))
45 |
46 | @property
47 | def progress(self):
48 | """ Returns the percentage, current and total number of
49 | jobs in the queue.
50 | """
51 | total = len(self.all_jobs)
52 | remaining = total - len(self.active_jobs) if total > 0 else 0
53 | percent = int(100 * (float(remaining) / total)) if total > 0 else 0
54 | return percent
55 |
56 | def ready(self, job):
57 | """ Determines if the job is ready to be sumitted to the
58 | queue. It checks if the job depends on any currently
59 | running or queued operations.
60 | """
61 | no_deps = len(job.depends_on) == 0
62 | all_complete = all(j.is_complete() for j in self.active_jobs
63 | if j.alias in job.depends_on)
64 | none_failed = not any(True for j in self.failed
65 | if j.alias in job.depends_on)
66 | queue_is_open = len(self.running) < self.MAX_CONCURRENT_JOBS
67 | return queue_is_open and (no_deps or (all_complete and none_failed))
68 |
69 | def locked(self):
70 | """ Determines if the queue is locked. """
71 | if len(self.failed) == 0:
72 | return False
73 | for fail in self.failed:
74 | for job in self.active_jobs:
75 | if fail.alias in job.depends_on:
76 | return True
77 |
78 | def push(self, job):
79 | """ Push a job onto the queue. This does not submit the job. """
80 | self.queue.append(job)
81 |
82 | def tick(self):
83 | """ Submits all the given jobs in the queue and watches their
84 | progress as they proceed. This function yields at the end of
85 | each iteration of the queue.
86 | :raises RuntimeError: If queue is locked.
87 | """
88 | self.on_start()
89 | while not self.is_empty:
90 | cruft = []
91 | for job in self.queue:
92 | if not self.ready(job):
93 | continue
94 | self.on_ready(job)
95 | try:
96 | job.submit()
97 | except ValueError:
98 | if job.should_retry:
99 | self.on_error(job)
100 | job.attempts += 1
101 | else:
102 | self.on_fail(job)
103 | cruft.append(job)
104 | self.failed.append(job)
105 | else:
106 | self.running.append(job)
107 | self.on_submit(job)
108 | cruft.append(job)
109 |
110 | self.queue = [job for job in self.queue if job not in cruft]
111 |
112 | cruft = []
113 | for job in self.running:
114 | if job.is_running() or job.is_queued():
115 | pass
116 | elif job.is_complete():
117 | self.on_complete(job)
118 | cruft.append(job)
119 | self.complete.append(job)
120 | elif job.is_fail():
121 | self.on_fail(job)
122 | cruft.append(job)
123 | self.failed.append(job)
124 | elif job.is_error():
125 | self.on_error(job)
126 | cruft.append(job)
127 | else:
128 | pass
129 | self.running = [job for job in self.running if job not in cruft]
130 |
131 | if self.locked() and self.on_locked():
132 | raise RuntimeError
133 | self.on_tick()
134 | yield
135 | self.on_end()
136 |
137 | # Callbacks...
138 |
139 | def on_start(self):
140 | """ Called when the queue is starting up. """
141 | pass
142 |
143 | def on_end(self):
144 | """ Called when the queue is shutting down. """
145 | pass
146 |
147 | def on_locked(self):
148 | """ Called when the queue is locked and no jobs can proceed.
149 | If this callback returns True, then the queue will be restarted,
150 | else it will be terminated.
151 | """
152 | return True
153 |
154 | def on_tick(self):
155 | """ Called when a tick of the queue is complete. """
156 | pass
157 |
158 | def on_ready(self, job):
159 | """ Called when a job is ready to be submitted.
160 | :param job: The given job that is ready.
161 | """
162 | pass
163 |
164 | def on_submit(self, job):
165 | """ Called when a job has been submitted.
166 | :param job: The given job that has been submitted.
167 | """
168 | pass
169 |
170 | def on_complete(self, job):
171 | """ Called when a job has completed.
172 | :param job: The given job that has completed.
173 | """
174 | pass
175 |
176 | def on_error(self, job):
177 | """ Called when a job has errored. By default, the job
178 | is resubmitted until some max threshold is reached.
179 | :param job: The given job that has errored.
180 | """
181 | pass
182 |
183 | def on_fail(self, job):
184 | """ Called when a job has failed after multiple resubmissions. The
185 | given job will be removed from the queue.
186 | :param job: The given job that has errored.
187 | """
188 | pass
189 |
190 |
191 | class ReportingJobQueue(BaseReportingMixin, BaseQueue):
192 | """ An abstract subclass of the Queue which reports on progress. """
193 |
194 | @property
195 | def real_jobs(self):
196 | """ Returns all jobs that represent work. """
197 | return [j for j in self.all_jobs if not isinstance(j, JobTemplate)]
198 |
199 | def on_locked(self):
200 | self.render('The queue is locked. Please check the logs.',
201 | self.progress)
202 | return True
203 |
204 | def on_submit(self, job):
205 | if not isinstance(job, JobTemplate):
206 | self.render('Submitted: %s' % job.alias, self.progress)
207 |
208 | def on_complete(self, job):
209 | if not isinstance(job, JobTemplate):
210 | self.render('Complete: %s' % job.alias, self.progress)
211 |
212 | def on_error(self, job):
213 | if not isinstance(job, JobTemplate):
214 | self.render('Error: Job %s has failed, retrying (%s/%s)'
215 | % (job.alias, str(job.attempts), str(job.MAX_RETRY)), self.progress)
216 |
217 | def on_fail(self, job):
218 | if not isinstance(job, JobTemplate):
219 | self.render('Error: Job %s has failed. Retried %s times.'
220 | % (job.alias, str(job.attempts)), self.progress)
221 |
222 | def on_end(self):
223 | self.render('All jobs are complete.', self.progress)
224 |
225 |
226 | class HtmlReportingJobQueue(HtmlReportingMixin, ReportingJobQueue):
227 | """ A queue that generates HTML reports. """
228 | pass
229 |
230 |
231 | class TextReportingJobQueue(TextReportingMixin, ReportingJobQueue):
232 | """ A queue that generates textual reports. """
233 | pass
234 |
--------------------------------------------------------------------------------
/metapipe/models/reporting.py:
--------------------------------------------------------------------------------
1 | """ A series of mixins for reporting. """
2 | from datetime import datetime as dt
3 |
4 | from metapipe.templates import env
5 | template = env.get_template('progress-report.tmpl.html')
6 |
7 |
8 | class BaseReportingMixin(object):
9 | """ An abstract mixin for reporting. """
10 |
11 | message_format = '%Y-%m-%d %H:%M:%S'
12 |
13 | def render(self, message, progress):
14 | """ Render the output of the report. """
15 | pass
16 |
17 |
18 | class HtmlReportingMixin(BaseReportingMixin):
19 | """ A reporting mixin that writes progress to an HTML report. """
20 |
21 | messages = []
22 | output = 'metapipe.report.html'
23 |
24 | def render(self, message, progress):
25 | msg = Message(dt.strftime(dt.now(), self.message_format), message)
26 | self.messages.insert(0, msg)
27 | with open(self.output, 'w') as f:
28 | f.write(self.template.render(
29 | name=self.name,
30 | messages=self.messages, progress=progress, jobs=sorted(self.real_jobs)))
31 |
32 |
33 | class TextReportingMixin(BaseReportingMixin):
34 | """ A reporting mixin that prints any progress to the console. """
35 |
36 | def render(self, message, progress):
37 | print('[{}%] {} {}'.format(progress, dt.strftime(dt.now(),
38 | self.message_format), message))
39 |
40 |
41 | class Message(object):
42 | def __init__(self, time, text):
43 | self.time = time
44 | self.text = text
45 |
--------------------------------------------------------------------------------
/metapipe/models/sge_job.py:
--------------------------------------------------------------------------------
1 | from . import Job, call
2 |
3 |
4 | class SGEJob(Job):
5 | """ A job subclass for running tasks on a SGE queue. """
6 |
7 | def __init__(self, alias, command, depends_on=[], queue='work'):
8 | super(SGEJob, self).__init__(alias, command, depends_on)
9 | self.queue = queue
10 | self.id = None
11 | self.waiting = True # The job has yet to be submitted.
12 |
13 | def submit(self):
14 | if self.attempts == 0:
15 | self.make()
16 | self.attempts += 1
17 | out, err = call(self.cmd)
18 | self.waiting = False
19 | self.id = out.split()[2]
20 |
21 | @property
22 | def cmd(self):
23 | return ['qsub', '-cwd', '-V', self.filename]
24 |
25 | def is_running(self):
26 | """ Checks to see if the job is running. """
27 | qstat = self._grep_qstat('running')
28 | if qstat:
29 | return True
30 | return False
31 |
32 | def is_queued(self):
33 | """ Checks to see if the job is queued. """
34 | qstat = self._grep_qstat('queued')
35 | if qstat:
36 | return True
37 | return False
38 |
39 | def is_complete(self):
40 | """ Checks the job's output or log file to determing if
41 | the completion criteria was met.
42 | """
43 | qstat = self._grep_qstat('complete')
44 | comp = self._grep_status('complete')
45 | if qstat and comp:
46 | return True
47 | return False
48 |
49 | def is_error(self):
50 | """ Checks to see if the job errored out. """
51 | qstat = self._grep_qstat('error')
52 | err = self._grep_status('error')
53 | if qstat and err:
54 | return True
55 | return False
56 |
57 | def _grep_qstat(self, status_type='complete'):
58 | """ Greps qstat -e for information from the queue.
59 | :paramsstatus_type: complete, queued, running, error, gone
60 | """
61 | args = ("qstat -e %s" % self.id).split()
62 | res, _ = call(args)
63 | if res == '': return False
64 | res = res.split('\n')[2].split()[4]
65 |
66 | if status_type == 'complete' and res == 'c':
67 | return True
68 | elif status_type == 'error' and (res == 'e' or res == 'c'):
69 | return True
70 | elif status_type == 'running' and res == 'r':
71 | return True
72 | elif status_type == 'queued' and res == 'qw':
73 | return True
74 | elif status_type == 'gone' and 'unknown job id' in str(res).lower():
75 | return True
76 | else:
77 | return False
78 |
79 | def _grep_status(self, status_type):
80 | """ Greps through the job's current status to see if
81 | it returned with the requested status.
82 | status_type: complete, error
83 | """
84 | args = ("qstat -f %s" % self.id).split()
85 | res, _ = call(args)
86 | exit_status = [line for line in res.split('\n')
87 | if 'exit_status' in line]
88 | try:
89 | _, __, code = exit_status[0].split()
90 | except IndexError:
91 | code = None
92 |
93 | if status_type == 'complete' and code == '0':
94 | return True
95 | elif status_type == 'error' and code != '0':
96 | return True
97 | else:
98 | return False
99 |
100 |
--------------------------------------------------------------------------------
/metapipe/models/tokens.py:
--------------------------------------------------------------------------------
1 | """ A set of tokens and convienence functions for input/output files.
2 |
3 | author: Brian Schrader
4 | since: 2015-12-28
5 | """
6 |
7 | from __future__ import print_function
8 | from collections import namedtuple
9 | import glob, re
10 |
11 |
12 | file_pattern = 'mp.{}.output{}'
13 | alias_pattern = '{command}-{output_number}'
14 |
15 |
16 | class PathToken(object):
17 | """ A model for a given path. """
18 |
19 | def __init__(self, alias, path):
20 | self.alias = alias
21 | self.path = path
22 |
23 | def __repr__(self):
24 | return ''.format(self.alias, self.path)
25 |
26 | def __eq__(self, other):
27 | try:
28 | return (self.alias == other.alias or
29 | self.path == other.path)
30 | except AttributeError:
31 | return False
32 |
33 | def eval(self):
34 | return self.path
35 |
36 |
37 | class CommentToken(object):
38 |
39 | def __init__(self, parts):
40 | self.parts = parts
41 |
42 | def __repr__(self):
43 | return ''.format(''.join(self.parts))
44 |
45 | def __eq__(self, other):
46 | return ''.join(self.parts) == ''.join(other.parts)
47 |
48 | def eval(self):
49 | return '{}\n'.format(''.join(self.parts))
50 |
51 |
52 | class FileToken(object):
53 | """ An abc for input/output data classes. Provides various common
54 | methods.
55 | Warning: This class should not be used directly.
56 | """
57 |
58 | def __init__(self, alias, filename='', cwd=''):
59 | self.alias = alias
60 | self.filename = filename
61 |
62 | if len(cwd) > 0 and cwd[-1] != '/':
63 | cwd += '/'
64 | self.cwd = cwd
65 |
66 | def __eq__(self, other):
67 | try:
68 | return (self.alias == other.alias or
69 | self.filename == other.filename)
70 | except AttributeError:
71 | return False
72 |
73 | def __hash__(self):
74 | return hash(self.alias)
75 |
76 | @property
77 | def path(self):
78 | return '{}{}'.format(self.cwd, self.filename)
79 |
80 |
81 | class Input(FileToken):
82 | """ A model of a single input to a given command. Input tokens can be
83 | evaluated to obtain their actual filename(s).
84 | """
85 |
86 | def __init__(self, alias, filename='', cwd='', and_or=''):
87 | super(Input, self).__init__(alias, filename, cwd)
88 | self.and_or = and_or
89 |
90 | def __repr__(self):
91 | try:
92 | eval = self.eval()
93 | except Exception:
94 | eval = '?'
95 | return '[{}]{}>'.format(self.alias, eval,
96 | ' _{}_'.format(self.and_or) if self.and_or else '')
97 |
98 | def fuzzy_match(self, other):
99 | """ Given another token, see if either the major alias identifier
100 | matches the other alias, or if magic matches the alias.
101 | """
102 | magic, fuzzy = False, False
103 | try:
104 | magic = self.alias == other.magic
105 | except AttributeError:
106 | pass
107 |
108 | if '.' in self.alias:
109 | major = self.alias.split('.')[0]
110 | fuzzy = major == other.alias
111 | return magic or fuzzy
112 |
113 | def eval(self):
114 | """ Evaluates the given input and returns a string containing the
115 | actual filenames represented. If the input token represents multiple
116 | independent files, then eval will return a list of all the input files
117 | needed, otherwise it returns the filenames in a string.
118 | """
119 | if self.and_or == 'or':
120 | return [Input(self.alias, file, self.cwd, 'and')
121 | for file in self.files]
122 | return ' '.join(self.files)
123 |
124 | @property
125 | def command_alias(self):
126 | """ Returns the command alias for a given input. In most cases this
127 | is just the input's alias but if the input is one of many, then
128 | `command_alias` returns just the beginning of the alias cooresponding to
129 | the command's alias.
130 | """
131 | if '.' in self.alias:
132 | return self.alias.split('-')[0]
133 | return None
134 |
135 | @property
136 | def is_magic(self):
137 | try:
138 | return isinstance(self.eval(), list)
139 | except ValueError:
140 | return False
141 |
142 | @property
143 | def is_glob(self):
144 | return '*' in self.filename
145 |
146 | @property
147 | def magic_path(self):
148 | match = file_pattern.format(self.alias, '*')
149 | return '{}{}'.format(self.cwd, match)
150 |
151 | @property
152 | def files(self):
153 | """ Returns a list of all the files that match the given
154 | input token.
155 | """
156 | res = None
157 | if not res:
158 | res = glob.glob(self.path)
159 | if not res and self.is_glob:
160 | res = glob.glob(self.magic_path)
161 | if not res:
162 | res = glob.glob(self.alias)
163 | if not res:
164 | raise ValueError('No files match. %s' % self)
165 | return res
166 |
167 | @staticmethod
168 | def from_string(string, _or=''):
169 | """ Parse a given string and turn it into an input token. """
170 | if _or:
171 | and_or = 'or'
172 | else:
173 | and_or = ''
174 | return Input(string, and_or=and_or)
175 |
176 |
177 | class Output(FileToken):
178 | """ A model of a single output to a given command. Output tokens can be
179 | evaluated to obtain their actual filename(s).
180 | """
181 |
182 | def __init__(self, alias, filename='', cwd='', magic=''):
183 | super(Output, self).__init__(alias, filename, cwd)
184 | self.ext = ''
185 | self.magic = ''
186 | self._clean(magic)
187 |
188 | def __repr__(self):
189 | return '[{}]{} {}>'.format(self.alias, self.eval(),
190 | (' ' + self.magic) if self.magic else '', self.ext)
191 |
192 | def __eq__(self, other):
193 | """ Overrides the token eq to allow for magic : alias comparison for
194 | magic inputs. Defaults to the super() eq otherwise.
195 | """
196 | try:
197 | return (self.magic == other.alias or
198 | super(Output, self).__eq__(other))
199 | except AttributeError:
200 | return False
201 |
202 | def eval(self):
203 | """ Returns a filename to be used for script output. """
204 | if self.magic:
205 | return self.magic
206 | if not self.filename:
207 | return file_pattern.format(self.alias, self.ext)
208 | return self.path
209 |
210 | def as_input(self):
211 | """ Returns an input token for the given output. """
212 | return Input(self.alias, self.eval())
213 |
214 | def _clean(self, magic):
215 | """ Given a magic string, remove the output tag designator. """
216 | if magic.lower() == 'o':
217 | self.magic = ''
218 | elif magic[:2].lower() == 'o:':
219 | self.magic = magic[2:]
220 | elif magic[:2].lower() == 'o.':
221 | self.ext = magic[1:]
222 |
223 | @staticmethod
224 | def from_string(string):
225 | """ Parse a given string and turn it into an output token. """
226 | return Output('', magic=string)
227 |
--------------------------------------------------------------------------------
/metapipe/parser.py:
--------------------------------------------------------------------------------
1 | """ A parser and other parser related classes. """
2 |
3 | import pyparsing
4 |
5 | from .models import Command, Input, Output, Grammar
6 | from .models import command_template_factory as ctf
7 |
8 |
9 | class Parser(object):
10 |
11 | def __init__(self, string):
12 | self.string = string
13 | self.commands = []
14 | self.paths = []
15 | self.files = []
16 |
17 | def consume(self, cwd=None):
18 | """ Converts the lexer tokens into valid statements. This process
19 | also checks command syntax.
20 | """
21 | first_pass = Grammar.overall.parseString(self.string)
22 | lowered = { key.lower(): val for key, val in first_pass.iteritems() }
23 |
24 | self.commands = ['\n'.join(self._get('commands', lowered))]
25 | self.job_options = self._get('job_options', lowered)
26 | self.global_options = self._get('options', lowered)
27 |
28 | self.files = self._get('files', lowered)
29 | self.paths = self._get('paths', lowered)
30 |
31 | self.files = self._parse(self.files, Grammar.file, True)
32 | self.paths = self._parse(self.paths, Grammar.path, True)
33 | self.job_options = self._parse(self.job_options, Grammar.line)
34 |
35 | try:
36 | command_lines = self._parse(self.commands, Grammar.command_lines)[0]
37 | except IndexError:
38 | raise ValueError('Did you write any commands?')
39 |
40 | self.commands = []
41 | for command_line in command_lines:
42 | comments, command = command_line
43 | self.commands.append([comments.asList(),
44 | self._parse([''.join(command)], Grammar.command)])
45 |
46 | self.job_options = [opt.asList() for opt in self.job_options]
47 |
48 | self.paths = ctf.get_paths(self.paths)
49 | self.files = ctf.get_files(self.files)
50 |
51 | self.paths.reverse()
52 | self.files.reverse()
53 | self.commands.reverse()
54 |
55 | return ctf.get_command_templates(self.commands, self.files[:],
56 | self.paths[:], self.job_options)
57 |
58 | def _get(self, key, parser_result):
59 | """ Given a type and a dict of parser results, return
60 | the items as a list.
61 | """
62 | try:
63 | list_data = parser_result[key].asList()
64 | if any(isinstance(obj, str) for obj in list_data):
65 | txt_lines = [''.join(list_data)]
66 | else:
67 | txt_lines = [''.join(f) for f in list_data]
68 | except KeyError:
69 | txt_lines = []
70 | return txt_lines
71 |
72 | def _parse(self, lines, grammar, ignore_comments=False):
73 | """ Given a type and a list, parse it using the more detailed
74 | parse grammar.
75 | """
76 | results = []
77 | for c in lines:
78 | if c != '' and not (ignore_comments and c[0] == '#'):
79 | try:
80 | results.append(grammar.parseString(c))
81 | except pyparsing.ParseException as e:
82 | raise ValueError('Invalid syntax. Verify line {} is '
83 | 'correct.\n{}\n\n{}'.format(e.lineno, c, e))
84 | return results
85 |
--------------------------------------------------------------------------------
/metapipe/runtime.py:
--------------------------------------------------------------------------------
1 | """ The metapipe runtime.
2 |
3 | author: Brian Schrader
4 | since: 2015-01-13
5 | """
6 |
7 | from time import sleep
8 |
9 | from metapipe.models import JobTemplate
10 |
11 |
12 | class Runtime(object):
13 |
14 | def __init__(self, command_templates, queue_type, job_types,
15 | job_type='local', sleep_time=1, max_jobs=10):
16 | self.complete_jobs = []
17 | self.queue = queue_type()
18 | self.sleep_time = sleep_time
19 |
20 | self.queue.MAX_CONCURRENT_JOBS = max_jobs
21 |
22 | job_templates = []
23 | for command_template in command_templates:
24 | self.add(command_template, job_types[job_type])
25 |
26 | def add(self, command_template, job_class):
27 | """ Given a command template, add it as a job to the queue. """
28 | job = JobTemplate(command_template.alias,
29 | command_template=command_template,
30 | depends_on=command_template.depends_on, queue=self.queue,
31 | job_class=job_class)
32 | self.queue.push(job)
33 |
34 | def run(self):
35 | """ Begins the runtime execution. """
36 | iterations = 0
37 | queue = self.queue.tick()
38 | while True:
39 | try:
40 | next(queue)
41 | except StopIteration:
42 | break
43 |
44 | iterations += 1
45 | sleep(self.sleep_time)
46 | return iterations
47 |
--------------------------------------------------------------------------------
/metapipe/templates/__init__.py:
--------------------------------------------------------------------------------
1 | from jinja2 import Environment, PackageLoader
2 | env = Environment(loader=PackageLoader('metapipe', 'templates'))
3 |
--------------------------------------------------------------------------------
/metapipe/templates/output_script.tmpl.sh:
--------------------------------------------------------------------------------
1 | #! {{shell}}
2 | set -e;
3 |
4 | {{options}}
5 |
6 | python - <
2 |
3 |
4 |
5 |
6 |
7 | {{name}} Pipeline Progress Report | Metapipe
8 |
9 |
18 |
19 |
20 |
24 |
25 |
26 |