├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.rst
├── aclimatise
├── __init__.py
├── cli.py
├── cli_types.py
├── converter
│ ├── __init__.py
│ ├── cwl.py
│ ├── janis.py
│ ├── wdl.py
│ └── yml.py
├── execution
│ ├── __init__.py
│ ├── docker.py
│ ├── help.py
│ ├── local.py
│ └── man.py
├── flag_parser
│ ├── __init__.py
│ ├── elements.py
│ └── parser.py
├── integration.py
├── model.py
├── name_generation.py
├── nlp.py
├── parser.py
├── usage_parser
│ ├── __init__.py
│ ├── elements.py
│ ├── model.py
│ └── parser.py
└── yaml.py
├── docs
├── Makefile
├── README.md
├── _static
│ └── railroad.html
├── api.rst
├── changes.rst
├── cli.rst
├── conf.py
├── grammar.rst
├── index.rst
├── installation.rst
├── make.bat
└── model.rst
├── environment.yml
├── setup.cfg
├── setup.py
└── test
├── __init__.py
├── conftest.py
├── executors
├── __init__.py
├── test_docker.py
├── test_local.py
└── test_man.py
├── flags
├── conftest.py
├── test_bedtools.py
├── test_bwa.py
├── test_bwakit.py
├── test_gth.py
├── test_htseq.py
├── test_pisces.py
├── test_podchecker.py
├── test_samtools.py
└── test_singularity.py
├── name_generation
├── __init__.py
├── conftest.py
├── test_batch.py
├── test_case.py
├── test_description.py
├── test_group.py
└── test_single_flag.py
├── test_cli.py
├── test_convert.py
├── test_data
├── bedtools.txt
├── bedtools
│ ├── bedtools.yml
│ ├── bedtools_annotate.yml
│ ├── bedtools_bamtobed.yml
│ ├── bedtools_bamtofastq.yml
│ ├── bedtools_bed12tobed6.yml
│ ├── bedtools_bedpetobam.yml
│ ├── bedtools_bedtobam.yml
│ ├── bedtools_closest.yml
│ ├── bedtools_cluster.yml
│ ├── bedtools_complement.yml
│ ├── bedtools_coverage.yml
│ ├── bedtools_expand.yml
│ ├── bedtools_fisher.yml
│ ├── bedtools_flank.yml
│ ├── bedtools_genomecov.yml
│ ├── bedtools_getfasta.yml
│ ├── bedtools_groupby.yml
│ ├── bedtools_igv.yml
│ ├── bedtools_intersect.yml
│ ├── bedtools_jaccard.yml
│ ├── bedtools_links.yml
│ ├── bedtools_makewindows.yml
│ ├── bedtools_map.yml
│ ├── bedtools_maskfasta.yml
│ ├── bedtools_merge.yml
│ ├── bedtools_multicov.yml
│ ├── bedtools_multiinter.yml
│ ├── bedtools_nuc.yml
│ ├── bedtools_overlap.yml
│ ├── bedtools_pairtobed.yml
│ ├── bedtools_pairtopair.yml
│ ├── bedtools_random.yml
│ ├── bedtools_reldist.yml
│ ├── bedtools_sample.yml
│ ├── bedtools_shift.yml
│ ├── bedtools_shuffle.yml
│ ├── bedtools_slop.yml
│ ├── bedtools_sort.yml
│ ├── bedtools_spacing.yml
│ ├── bedtools_split.yml
│ ├── bedtools_subtract.yml
│ ├── bedtools_tag.yml
│ ├── bedtools_unionbedg.yml
│ └── bedtools_window.yml
├── bedtools_closest.txt
├── bedtools_coverage.txt
├── bedtools_multiinter.txt
├── bedtools_random.txt
├── bedtools_spacing.txt
├── bedtools_subtract.txt
├── bedtools_window.txt
├── bowtie2_build.txt
├── bwa.txt
├── bwa_bwt2sa.txt
├── bwa_bwtupdate.txt
├── bwa_index.txt
├── bwa_mem.txt
├── dinosaur.txt
├── gth.txt
├── htseq_count.txt
├── mauve.txt
├── pisces.txt
├── podchecker.txt
├── samtools.txt
├── samtools
│ ├── samtools.yml
│ ├── samtools_addreplacerg.yml
│ ├── samtools_bedcov.yml
│ ├── samtools_calmd.yml
│ ├── samtools_cat.yml
│ ├── samtools_collate.yml
│ ├── samtools_depad.yml
│ ├── samtools_depth.yml
│ ├── samtools_dict.yml
│ ├── samtools_faidx.yml
│ ├── samtools_fasta.yml
│ ├── samtools_fastq.yml
│ ├── samtools_fixmate.yml
│ ├── samtools_flags.yml
│ ├── samtools_flagstat.yml
│ ├── samtools_fqidx.yml
│ ├── samtools_idxstats.yml
│ ├── samtools_index.yml
│ ├── samtools_markdup.yml
│ ├── samtools_merge.yml
│ ├── samtools_mpileup.yml
│ ├── samtools_phase.yml
│ ├── samtools_quickcheck.yml
│ ├── samtools_reheader.yml
│ ├── samtools_sort.yml
│ ├── samtools_split.yml
│ ├── samtools_stats.yml
│ ├── samtools_targetcut.yml
│ ├── samtools_tview.yml
│ └── samtools_view.yml
├── samtools_bedcov.txt
├── samtools_dict.txt
├── samtools_pl.txt
├── samtools_quickcheck.txt
└── typeHLA.txt
├── test_explore_e2e.py
├── test_model.py
├── test_parse_e2e.py
├── test_type_inference.py
├── test_yaml_dump.py
├── usage
└── test_usage.py
└── util.py
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name:
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | test_conda:
7 | # Run tests using conda, which gives us bioinformatics tools installed
8 | runs-on: ubuntu-latest
9 | container: continuumio/miniconda3
10 | steps:
11 | - uses: actions/checkout@v2
12 |
13 | - name: apt dependencies, and fix the `man` pages
14 | run: |
15 | rm /etc/dpkg/dpkg.cfg.d/docker
16 | apt-get update && apt install -y --reinstall man coreutils manpages build-essential git git-man
17 | mandb --create
18 |
19 | - uses: actions/setup-dotnet@v1.7.2
20 | with:
21 | dotnet-version: "2.1.x"
22 |
23 | - name: Tests
24 | run: |
25 | source /root/.bashrc
26 | conda env create
27 | conda activate aclimatise-test
28 | pip install --upgrade pip wheel setuptools
29 | pip install -e .[dev]
30 | python -m spacy download en
31 | pytest --tb=native --log-cli-level INFO
32 | shell: bash
33 |
34 | test_system:
35 | runs-on: ubuntu-latest
36 |
37 | strategy:
38 | matrix:
39 | python-version: [3.6, 3.7, 3.8]
40 |
41 | steps:
42 | - uses: actions/checkout@v2
43 |
44 | - name: Set up Python ${{ matrix.python-version }}
45 | uses: actions/setup-python@v1
46 | with:
47 | python-version: ${{ matrix.python-version }}
48 |
49 | - name: Install dependencies
50 | run: |
51 | pip install --upgrade pip wheel setuptools
52 | pip install -e .[dev]
53 | python -m spacy download en
54 |
55 | - uses: pre-commit/action@v1.0.1
56 |
57 | - name: Test with pytest
58 | run: |
59 | pytest --tb=native
60 |
61 | - name: Test the documentation still builds
62 | run: |
63 | cd docs
64 | make html
65 |
66 | publish:
67 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
68 | needs: [test_system, test_conda]
69 | runs-on: ubuntu-latest
70 | steps:
71 | - uses: actions/checkout@v2
72 |
73 | - name: Set up Python ${{ matrix.python-version }}
74 | uses: actions/setup-python@v1
75 | with:
76 | python-version: 3.8
77 |
78 | - name: Compile package
79 | run: |
80 | pip install -U wheel setuptools
81 | pip install -e .[dev]
82 | python -m spacy download en
83 | python setup.py sdist bdist_wheel
84 |
85 | - name: Create GitHub Release
86 | id: create_release
87 | uses: actions/create-release@v1
88 | env:
89 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
90 | with:
91 | tag_name: ${{ github.ref }}
92 | release_name: Release ${{ github.ref }}
93 | draft: false
94 | prerelease: false
95 |
96 | - name: Publish package
97 | uses: pypa/gh-action-pypi-publish@master
98 | with:
99 | user: __token__
100 | password: ${{ secrets.pypi_password }}
101 |
102 | - name: Generate railroad diagram
103 | run: |
104 | # We need an unpublished version of PyParsing for this
105 | pip install -U git+https://github.com/pyparsing/pyparsing.git#egg=pyparsing[diagrams]
106 | aclimatise railroad > docs/_static/railroad.html
107 |
108 | - name: Build the documentation
109 | run: |
110 | cd docs
111 | make html
112 |
113 | - name: Deploy docs
114 | uses: peaceiris/actions-gh-pages@v3
115 | with:
116 | github_token: ${{ secrets.GITHUB_TOKEN }}
117 | publish_dir: docs/_build/html
118 | publish_branch: gh-pages
119 | enable_jekyll: false
120 |
121 | - name: Update Basecamp
122 | uses: peter-evans/repository-dispatch@v1
123 | with:
124 | token: ${{ secrets.REPOACCESSTOKEN }}
125 | event-type: aclimatise-update
126 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # celery beat schedule file
88 | celerybeat-schedule
89 |
90 | # SageMath parsed files
91 | *.sage.py
92 |
93 | # Environments
94 | .env
95 | .venv
96 | env/
97 | venv/
98 | ENV/
99 | env.bak/
100 | venv.bak/
101 |
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 |
106 | # Rope project settings
107 | .ropeproject
108 |
109 | # mkdocs documentation
110 | /site
111 |
112 | # mypy
113 | .mypy_cache/
114 | .dmypy.json
115 | dmypy.json
116 |
117 | # Pyre type checker
118 | .pyre/
119 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | known_third_party = WDL,click,cwl_utils,cwltool,inflection,pkg_resources,pyhash,pyparsing,pytest,ruamel,setuptools,spacy,wdlgen,wordsegment,regex,num2words,word2number,psutil,packaging,docker,attr
3 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/timothycrosley/isort
3 | rev: 4.3.21-2
4 | hooks:
5 | - id: isort
6 | types: [python]
7 | args:
8 | - "--multi-line=3"
9 | - "--trailing-comma"
10 | - "--force-grid-wrap=0"
11 | - "--use-parentheses"
12 | - "--line-width=88"
13 |
14 | - repo: https://github.com/psf/black
15 | rev: 20.8b1
16 | hooks:
17 | - id: black
18 |
19 | - repo: https://github.com/pre-commit/mirrors-prettier
20 | rev: v2.1.2
21 | hooks:
22 | - id: prettier
23 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | aCLImatise
2 | ***********
3 | |DOI|
4 |
5 | .. |DOI| image:: https://zenodo.org/badge/DOI/10.1093/bioinformatics/btaa1033.svg
6 | :target: https://doi.org/10.1093/bioinformatics/btaa1033
7 |
8 | For the full documentation, refer to the `Github Pages Website
9 | `_.
10 |
11 | ======================================================================
12 |
13 | aCLImatise is a Python library and command-line utility for parsing the help output
14 | of a command-line tool and then outputting a description of the tool in a more
15 | structured format, for example a
16 | `Common Workflow Language tool definition `_.
17 |
18 | Currently aCLImatise supports both `CWL `_ and
19 | `WDL `_ outputs, but other formats will be considered in the future, especially pull
20 | requests to support them.
21 |
22 | Please also refer to `The aCLImatise Base Camp `_, which is a database of pre-computed tool definitions
23 | generated by the aCLImatise parser. Most bioinformatics tools have a tool definition already generated in the Base Camp,
24 | so you may not need to run aCLImatise directly.
25 |
26 | aCLImatise is now published in the journal *Bioinformatics*. You can read the application note here: https://doi.org/10.1093/bioinformatics/btaa1033.
27 | To cite aCLImatise, please use the citation generator provided by the journal.
28 |
29 | Example
30 | -------
31 |
32 | Lets say you want to create a CWL workflow containing the common Unix ``wc`` (word count)
33 | utility. Running ``wc --help`` returns:
34 |
35 | .. code-block::
36 |
37 | Usage: wc [OPTION]... [FILE]...
38 | or: wc [OPTION]... --files0-from=F
39 | Print newline, word, and byte counts for each FILE, and a total line if
40 | more than one FILE is specified. A word is a non-zero-length sequence of
41 | characters delimited by white space.
42 |
43 | With no FILE, or when FILE is -, read standard input.
44 |
45 | The options below may be used to select which counts are printed, always in
46 | the following order: newline, word, character, byte, maximum line length.
47 | -c, --bytes print the byte counts
48 | -m, --chars print the character counts
49 | -l, --lines print the newline counts
50 | --files0-from=F read input from the files specified by
51 | NUL-terminated names in file F;
52 | If F is - then read names from standard input
53 | -L, --max-line-length print the maximum display width
54 | -w, --words print the word counts
55 | --help display this help and exit
56 | --version output version information and exit
57 |
58 | GNU coreutils online help:
59 | Full documentation at:
60 | or available locally via: info '(coreutils) wc invocation'
61 |
62 | If you run ``aclimatise explore wc``, which means "parse the wc command and all subcommands",
63 | you'll end up with the following files in your current directory:
64 |
65 | * ``wc.cwl``
66 | * ``wc.wdl``
67 | * ``wc.yml``
68 |
69 | These are representations of the command ``wc`` in 3 different formats. If you look at ``wc.wdl``, you'll see that it
70 | contains a WDL-compatible tool definition for ``wc``:
71 |
72 | .. code-block:: text
73 |
74 | version 1.0
75 | task Wc {
76 | input {
77 | Boolean bytes
78 | Boolean chars
79 | Boolean lines
80 | String files__from
81 | Boolean max_line_length
82 | Boolean words
83 | }
84 | command <<<
85 | wc \
86 | ~{true="--bytes" false="" bytes} \
87 | ~{true="--chars" false="" chars} \
88 | ~{true="--lines" false="" lines} \
89 | ~{if defined(files__from) then ("--files0-from " + '"' + files__from + '"') else ""} \
90 | ~{true="--max-line-length" false="" max_line_length} \
91 | ~{true="--words" false="" words}
92 | >>>
93 | }
94 |
--------------------------------------------------------------------------------
/aclimatise/__init__.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | from aclimatise.converter import WrapperGenerator
4 | from aclimatise.converter.cwl import CwlGenerator
5 | from aclimatise.converter.janis import JanisGenerator
6 | from aclimatise.converter.wdl import WdlGenerator
7 | from aclimatise.converter.yml import YmlGenerator
8 | from aclimatise.execution import Executor
9 | from aclimatise.execution.docker import DockerExecutor
10 | from aclimatise.execution.local import LocalExecutor
11 | from aclimatise.execution.man import ManPageExecutor
12 | from aclimatise.integration import parse_help
13 | from aclimatise.model import Command, Flag
14 | from deprecated import deprecated
15 |
16 | default_executor = LocalExecutor()
17 |
18 |
19 | @deprecated(
20 | reason="Please use the explore method on the executors directly. e.g. `LocalExecutor().explore()`"
21 | )
22 | def explore_command(
23 | cmd: typing.List[str],
24 | flags: typing.Iterable[str] = (["--help"], ["-h"], [], ["--usage"]),
25 | parent: typing.Optional[Command] = None,
26 | max_depth: int = 2,
27 | try_subcommand_flags=True,
28 | executor: Executor = default_executor,
29 | ) -> typing.Optional[Command]:
30 | """
31 | Given a command to start with, builds a model of this command and all its subcommands (if they exist).
32 | Use this if you know the command you want to parse, you don't know which flags it responds to with help text, and
33 | you want to include subcommands.
34 | """
35 | return executor.explore(cmd, max_depth=max_depth, parent=parent)
36 |
37 |
38 | __all__ = [
39 | CwlGenerator,
40 | WdlGenerator,
41 | YmlGenerator,
42 | JanisGenerator,
43 | LocalExecutor,
44 | DockerExecutor,
45 | ManPageExecutor,
46 | explore_command,
47 | parse_help,
48 | ]
49 |
--------------------------------------------------------------------------------
/aclimatise/cli.py:
--------------------------------------------------------------------------------
1 | """
2 | Code relating to the command line interface to aCLImatise
3 | """
4 | import sys
5 | from pathlib import Path
6 | from typing import Iterable, Tuple
7 |
8 | import click
9 |
10 | from aclimatise import WrapperGenerator, explore_command, parse_help
11 | from aclimatise.execution.local import LocalExecutor
12 | from aclimatise.execution.man import ManPageExecutor
13 | from aclimatise.flag_parser.parser import CliParser
14 |
15 | # Some common options
16 | opt_generate_names = click.option(
17 | "--generate-names",
18 | "-g",
19 | is_flag=True,
20 | help=(
21 | "Rather than using the long flag to generate the argument name, generate them automatically using the "
22 | "flag description. Generally helpful if there are no long flags, only short flags."
23 | ),
24 | )
25 | opt_case = click.option(
26 | "--case",
27 | "-c",
28 | type=click.Choice(WrapperGenerator.cases),
29 | help=(
30 | "Which case to use for variable names. If not set, defaults to the language defaults: snake_case for CWL"
31 | " and snake_case for WDL"
32 | ),
33 | default="snake",
34 | )
35 | opt_cmd = click.argument("cmd", nargs=-1, required=True)
36 |
37 |
38 | @click.group()
39 | def main():
40 | pass
41 |
42 |
43 | @main.command(help="Run an executable and explore all subcommands")
44 | @opt_cmd
45 | @opt_case
46 | @opt_generate_names
47 | @click.option(
48 | "--man",
49 | "-m",
50 | is_flag=True,
51 | help="Parse the help using its man page, rather than by executing the command. This will fail if the man page doesn't exist",
52 | )
53 | @click.option(
54 | "--depth",
55 | "-d",
56 | type=int,
57 | default=1,
58 | help="How many levels of subcommands we should look for. Depth 2 means commands can be 3 levels deep, such as "
59 | "``git submodule foreach``",
60 | )
61 | @click.option(
62 | "--format",
63 | "-f",
64 | "formats",
65 | type=click.Choice(["wdl", "cwl", "yml"]),
66 | multiple=True,
67 | default=("yml", "wdl", "cwl"),
68 | help="The language in which to output the CLI wrapper",
69 | )
70 | @click.option(
71 | "--out-dir",
72 | "-o",
73 | type=Path,
74 | help="Directory in which to put the output files",
75 | default=Path(),
76 | )
77 | @click.option(
78 | "--help-flag",
79 | "-l",
80 | type=str,
81 | help="Flag to append to the end of the command to make it output help text",
82 | )
83 | @click.option(
84 | "--subcommands/--no-subcommands", default=True, help="Look for subcommands"
85 | )
86 | def explore(
87 | cmd: Iterable[str],
88 | out_dir: Path,
89 | formats: Tuple[str],
90 | subcommands: bool,
91 | case: str,
92 | generate_names: bool,
93 | man: bool,
94 | help_flag: str,
95 | depth: int = None,
96 | ):
97 | # We only support these two executors via CLI because the docker executor would require some additional config
98 | if man:
99 | exec = ManPageExecutor()
100 | else:
101 | kwargs = {}
102 | if help_flag is not None:
103 | kwargs["flags"] = [[help_flag]]
104 | exec = LocalExecutor(**kwargs)
105 |
106 | if subcommands:
107 | command = exec.explore(list(cmd), max_depth=depth)
108 | else:
109 | command = exec.convert(list(cmd))
110 |
111 | for format in formats:
112 | converter_cls = WrapperGenerator.choose_converter(format)
113 | converter = converter_cls(
114 | generate_names=generate_names,
115 | case=case,
116 | )
117 | list(converter.generate_tree(command, out_dir))
118 |
119 |
120 | @main.command(
121 | help="Read a command help from stdin and output a tool definition to stdout"
122 | )
123 | @opt_cmd
124 | @opt_generate_names
125 | @opt_case
126 | @click.option(
127 | "--format",
128 | "-f",
129 | type=click.Choice(["wdl", "cwl", "yml", "janis"]),
130 | default="cwl",
131 | help="The language in which to output the CLI wrapper",
132 | )
133 | def pipe(cmd, generate_names, case, format):
134 | stdin = "".join(sys.stdin.readlines())
135 | command = parse_help(cmd, stdin)
136 |
137 | converter_cls = WrapperGenerator.choose_converter(format)
138 | converter = converter_cls(
139 | generate_names=generate_names,
140 | case=case,
141 | )
142 | output = converter.save_to_string(command)
143 | print(output)
144 |
145 |
146 | @main.command(help="Output a representation of the internal grammar")
147 | def railroad():
148 | try:
149 | from pyparsing.diagram import to_railroad, railroad_to_html
150 |
151 | parser = CliParser()
152 | railroad = to_railroad(parser.flags)
153 | sys.stdout.write(railroad_to_html(railroad))
154 | except ImportError:
155 | print(
156 | "You need PyParsing 3.0.0a2 or greater to use this feature", file=sys.stderr
157 | )
158 | sys.exit(1)
159 |
160 |
161 | if __name__ == "__main__":
162 | main()
163 |
--------------------------------------------------------------------------------
/aclimatise/cli_types.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains the objects that represent a "type" of data a flag argument might store
3 | """
4 | import typing
5 | from enum import Enum
6 |
7 | import attr
8 |
9 | from aclimatise.yaml import AttrYamlMixin
10 |
11 |
12 | @attr.s(auto_attribs=True, frozen=True)
13 | class CliType(AttrYamlMixin):
14 | """
15 | A data type used in the command-line
16 | """
17 |
18 | @staticmethod
19 | def lowest_common_type(types: typing.Iterable["CliType"]) -> "CliType":
20 | type_set: typing.Set[typing.Type[CliType]] = {type(t) for t in types}
21 |
22 | if len(type_set) == 1:
23 | # If there is only one type, use it
24 | return next(iter(types))
25 |
26 | if len(type_set) == 2 and CliInteger in type_set and CliFloat in type_set:
27 | # If they're all numeric, they can be represented as floats
28 | return CliFloat()
29 |
30 | if {
31 | CliDir,
32 | CliDict,
33 | CliFile,
34 | CliTuple,
35 | CliList,
36 | } & type_set:
37 | # These complex types cannot be represented in a simpler way
38 | raise Exception(
39 | "There is no common type between {}".format(
40 | ", ".join([str(typ) for typ in type_set])
41 | )
42 | )
43 |
44 | else:
45 | # Most of the time, strings can be used to represent primitive types
46 | return CliString()
47 |
48 | @property
49 | def representable(self) -> set:
50 | """
51 | Returns a set of types that this type could alternatively be represented as.
52 | Adds the class's own type to the _representable set
53 | """
54 | return self._representable.union({type(self)})
55 |
56 | # The list of types that this specific type could be representable as
57 | _representable = set()
58 |
59 |
60 | @attr.s(auto_attribs=True, frozen=True)
61 | class CliEnum(CliType):
62 | """
63 | One of a list of possible options
64 | """
65 |
66 | enum: Enum
67 | """
68 | The possible options as a Python Enum
69 | """
70 |
71 |
72 | @attr.s(auto_attribs=True, frozen=True)
73 | class CliFloat(CliType):
74 | """
75 | Takes a floating-point value
76 | """
77 |
78 | pass
79 |
80 |
81 | @attr.s(auto_attribs=True, frozen=True)
82 | class CliInteger(CliType):
83 | """
84 | Takes an integer value
85 | """
86 |
87 | _representable = {CliFloat}
88 |
89 |
90 | @attr.s(auto_attribs=True, frozen=True)
91 | class CliString(CliType):
92 | """
93 | Takes a string value
94 | """
95 |
96 | pass
97 |
98 |
99 | @attr.s(auto_attribs=True, frozen=True)
100 | class CliBoolean(CliType):
101 | """
102 | Takes a boolean value
103 | """
104 |
105 | pass
106 |
107 |
108 | @attr.s(auto_attribs=True, frozen=True)
109 | class CliFileSystemType(CliType):
110 | """
111 | Takes a directory / file path
112 | """
113 |
114 | output: bool = False
115 | """
116 | Indicator if it is input or output
117 | """
118 |
119 |
120 | @attr.s(auto_attribs=True, frozen=True)
121 | class CliDir(CliFileSystemType):
122 | """
123 | Takes a directory path
124 | """
125 |
126 | pass
127 |
128 |
129 | @attr.s(auto_attribs=True, frozen=True)
130 | class CliFile(CliFileSystemType):
131 | """
132 | Takes a file path
133 | """
134 |
135 | pass
136 |
137 |
138 | @attr.s(auto_attribs=True, frozen=True)
139 | class CliDict(CliType):
140 | """
141 | Takes a dictionary value
142 | """
143 |
144 | key: CliType
145 | """
146 | Data type of the keys to this dictionary
147 | """
148 |
149 | value: CliType
150 | """
151 | Data type of the values to this dictionary
152 | """
153 |
154 |
155 | @attr.s(auto_attribs=True, frozen=True)
156 | class CliList(CliType):
157 | """
158 | Takes a list value
159 | """
160 |
161 | value: CliType
162 | """
163 | Data type of the values in this list
164 | """
165 |
166 |
167 | @attr.s(auto_attribs=True, frozen=True)
168 | class CliTuple(CliType):
169 | """
170 | Takes a list of values with a fixed length, possibly each with different types
171 | """
172 |
173 | values: typing.List[CliType]
174 | """
175 | List of types, in order, held within the tuple
176 | """
177 |
178 | @property
179 | def homogenous(self):
180 | """
181 | A tuple is homogenous if all types in the tuple are the same, aka the set of all types has length 1
182 | """
183 | return len(set([type(x) for x in self.values])) == 1
184 |
--------------------------------------------------------------------------------
/aclimatise/converter/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | from itertools import groupby, zip_longest
3 | from os import PathLike
4 | from pathlib import Path
5 | from typing import Generator, Iterable, List, Set, TextIO, Tuple, Type
6 |
7 | import attr
8 |
9 | from aclimatise.model import CliArgument, Command, Flag
10 | from aclimatise.name_generation import (
11 | NameGenerationError,
12 | choose_unique_name,
13 | generate_name,
14 | generate_names_nlp,
15 | generate_names_segment,
16 | name_to_camel,
17 | name_to_snake,
18 | )
19 | from aclimatise.yaml import AttrYamlMixin
20 |
21 |
22 | @attr.s(
23 | auto_attribs=True,
24 | )
25 | class NamedArgument(AttrYamlMixin):
26 | arg: CliArgument
27 | name: str
28 |
29 |
30 | @attr.s(
31 | auto_attribs=True,
32 | )
33 | class WrapperGenerator(AttrYamlMixin):
34 | """
35 | Abstract base class for a class that converts a Command object into a string that defines a tool
36 | wrapper in a certain workflow language
37 | """
38 |
39 | cases = ["snake", "camel"]
40 |
41 | @classmethod
42 | def get_subclasses(cls) -> List[Type["WrapperGenerator"]]:
43 | return cls.__subclasses__()
44 |
45 | @classmethod
46 | def choose_converter(cls, typ) -> Type["WrapperGenerator"]:
47 | """
48 | Returns a converter subclass, given a converter type name
49 | :param type: The type of converter, e.g. 'cwl' or 'wdl'
50 | """
51 | for subclass in cls.__subclasses__():
52 | if subclass.format() == typ:
53 | return subclass
54 |
55 | raise Exception("Unknown format type")
56 |
57 | @classmethod
58 | @abstractmethod
59 | def format(cls) -> str:
60 | """
61 | Returns the output format that this generator produces as a string, e.g. "cwl"
62 | """
63 | pass
64 |
65 | @abstractmethod
66 | def save_to_string(self, cmd: Command) -> str:
67 | """
68 | Convert the command into a single string, ignoring subcommands
69 | """
70 | pass
71 |
72 | def save_to_file(self, cmd: Command, path: Path) -> None:
73 | """
74 | Write the command into a file
75 | """
76 | # By default we just write the string out, but subclasses can have different behaviour
77 | path.write_text(self.save_to_string(cmd))
78 |
79 | def generate_tree(
80 | self, cmd: Command, out_dir: PathLike
81 | ) -> Generator[Tuple[Path, Command], None, None]:
82 | out_dir = Path(out_dir)
83 | for cmd in cmd.command_tree():
84 | path = out_dir / (cmd.as_filename + self.suffix)
85 | try:
86 | self.save_to_file(cmd, path)
87 | except NameGenerationError as e:
88 | raise NameGenerationError(
89 | 'Name generation error for command "{}". {}'.format(
90 | " ".join(cmd.command), e.message
91 | )
92 | )
93 | yield path, cmd
94 |
95 | @property
96 | def reserved(self) -> Set[Tuple[str, ...]]:
97 | """
98 | A list of reserved keywords for this language
99 | """
100 | return set()
101 |
102 | @property
103 | @abstractmethod
104 | def suffix(self) -> str:
105 | """
106 | Returns a suffix for files generated using this converter
107 | """
108 |
109 | def words_to_name(self, words: Iterable[str]):
110 | """
111 | Converts a list of tokens, such as ["a", "variable", "name"] to a language-appropriate name, such as
112 | "aVariableName"
113 | """
114 | if self.case == "snake":
115 | return name_to_snake(words)
116 | elif self.case == "camel":
117 | return name_to_camel(words)
118 |
119 | def choose_variable_names(
120 | self, flags: List[CliArgument], length: int = 3
121 | ) -> List[NamedArgument]:
122 | """
123 | Choose names for a list of flags. This needs to be done in one go because there is a risk of duplicate
124 | variable names otherwise
125 | :param length: See :py:func:`from aclimatise.name_generation.generate_name`
126 | """
127 | options = list(
128 | zip_longest(
129 | generate_names_segment([flag.full_name() for flag in flags]),
130 | generate_names_nlp(
131 | [flag.description for flag in flags], reserved=self.reserved
132 | ),
133 | [flag.argument_name() for flag in flags if isinstance(flag, Flag)],
134 | fillvalue=[],
135 | )
136 | )
137 |
138 | return [
139 | NamedArgument(
140 | arg=flag,
141 | name=self.words_to_name(
142 | choose_unique_name(flag_options, reserved=self.reserved, number=i)
143 | ),
144 | )
145 | for i, (flag, flag_options) in enumerate(zip(flags, options))
146 | ]
147 |
148 | case: str = "snake"
149 | """
150 | Which case to use for variable names
151 | """
152 |
153 | generate_names: bool = True
154 | """
155 | Rather than using the long flag to generate the argument name, generate them automatically using the
156 | flag description. Generally helpful if there are no long flags, only short flags.
157 | """
158 |
159 | ignore_positionals: bool = False
160 | """
161 | Don't include positional arguments, for example because the help formatting has some
162 | misleading sections that look like positional arguments
163 | """
164 |
--------------------------------------------------------------------------------
/aclimatise/converter/cwl.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 | from pathlib import Path
3 | from typing import List
4 |
5 | import attr
6 | from cwl_utils.parser_v1_1 import (
7 | CommandInputParameter,
8 | CommandLineBinding,
9 | CommandLineTool,
10 | CommandOutputBinding,
11 | CommandOutputParameter,
12 | DockerRequirement,
13 | )
14 |
15 | from aclimatise import cli_types
16 | from aclimatise.cli_types import CliType
17 | from aclimatise.converter import NamedArgument, WrapperGenerator
18 | from aclimatise.model import CliArgument, Command, Flag, Positional
19 | from aclimatise.yaml import yaml
20 |
21 |
22 | @attr.s(auto_attribs=True)
23 | class CwlGenerator(WrapperGenerator):
24 | case = "snake"
25 |
26 | @classmethod
27 | def format(cls) -> str:
28 | return "cwl"
29 |
30 | @staticmethod
31 | def snake_case(words: list):
32 | return "_".join([word.lower() for word in words])
33 |
34 | @staticmethod
35 | def type_to_cwl_type(typ: cli_types.CliType) -> str:
36 | """
37 | Calculate the CWL type for a CLI type
38 | """
39 | if isinstance(typ, cli_types.CliFile):
40 | return "File"
41 | elif isinstance(typ, cli_types.CliDir):
42 | return "Directory"
43 | elif isinstance(typ, cli_types.CliString):
44 | return "string"
45 | elif isinstance(typ, cli_types.CliFloat):
46 | return "double"
47 | elif isinstance(typ, cli_types.CliInteger):
48 | return "long"
49 | elif isinstance(typ, cli_types.CliBoolean):
50 | return "boolean"
51 | elif isinstance(typ, cli_types.CliEnum):
52 | return "string"
53 | elif isinstance(typ, cli_types.CliList):
54 | return CwlGenerator.type_to_cwl_type(typ.value) + "[]"
55 | elif isinstance(typ, cli_types.CliTuple):
56 | return CwlGenerator.type_to_cwl_type(CliType.lowest_common_type(typ.values))
57 | else:
58 | raise Exception(f"Invalid type {typ}!")
59 |
60 | @staticmethod
61 | def arg_to_cwl_type(arg: CliArgument) -> str:
62 | """
63 | Calculate the CWL type for an entire argument
64 | """
65 | typ = arg.get_type()
66 | cwl_type = CwlGenerator.type_to_cwl_type(typ)
67 |
68 | if arg.optional and not cwl_type.endswith("[]"):
69 | return cwl_type + "?"
70 | else:
71 | return cwl_type
72 |
73 | def get_inputs(self, names: List[NamedArgument]) -> List[CommandInputParameter]:
74 | ret = []
75 | for arg in names:
76 | assert arg.name != "", arg
77 | ret.append(
78 | CommandInputParameter(
79 | id="in_" + arg.name,
80 | type=self.arg_to_cwl_type(arg.arg),
81 | inputBinding=CommandLineBinding(
82 | position=arg.arg.position
83 | if isinstance(arg.arg, Positional)
84 | else None,
85 | prefix=arg.arg.longest_synonym
86 | if isinstance(arg.arg, Flag)
87 | else None,
88 | ),
89 | doc=arg.arg.description,
90 | )
91 | )
92 |
93 | return ret
94 |
95 | def get_outputs(self, names: List[NamedArgument]) -> List[CommandOutputParameter]:
96 | ret = [
97 | # We default to always capturing stdout
98 | CommandOutputParameter(
99 | id="out_stdout",
100 | type="stdout",
101 | doc="Standard output stream",
102 | )
103 | ]
104 |
105 | for arg in names:
106 | typ = arg.arg.get_type()
107 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
108 | ret.append(
109 | CommandOutputParameter(
110 | id="out_" + arg.name,
111 | type=self.arg_to_cwl_type(arg.arg),
112 | doc=arg.arg.description,
113 | outputBinding=CommandOutputBinding(
114 | glob="$(inputs.in_{})".format(arg.name)
115 | ),
116 | )
117 | )
118 | return ret
119 |
120 | def command_to_tool(self, cmd: Command) -> CommandLineTool:
121 | """
122 | Outputs the CWL wrapper to the provided file
123 | """
124 | inputs: List[CliArgument] = [*cmd.named] + (
125 | [] if self.ignore_positionals else [*cmd.positional]
126 | )
127 | names = self.choose_variable_names(inputs)
128 |
129 | hints = []
130 | if cmd.docker_image is not None:
131 | hints.append(DockerRequirement(dockerPull=cmd.docker_image))
132 |
133 | tool = CommandLineTool(
134 | id=cmd.as_filename + ".cwl",
135 | baseCommand=list(cmd.command),
136 | cwlVersion="v1.1",
137 | inputs=self.get_inputs(names),
138 | outputs=self.get_outputs(names),
139 | hints=hints,
140 | )
141 |
142 | return tool
143 |
144 | @property
145 | def suffix(self) -> str:
146 | return ".cwl"
147 |
148 | def save_to_string(self, cmd: Command) -> str:
149 | io = StringIO()
150 | yaml.dump(self.command_to_tool(cmd).save(), io)
151 | return io.getvalue()
152 |
153 | def save_to_file(self, cmd: Command, path: Path) -> None:
154 | map = self.command_to_tool(cmd).save()
155 | with path.open("w") as fp:
156 | yaml.dump(map, fp)
157 |
--------------------------------------------------------------------------------
/aclimatise/converter/janis.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import janis_core as janis
4 | from aclimatise import cli_types
5 | from aclimatise.cli_types import CliType
6 | from aclimatise.converter import NamedArgument, WrapperGenerator
7 | from aclimatise.model import CliArgument, Command, Flag, Positional
8 |
9 |
10 | class JanisGenerator(WrapperGenerator):
11 | @classmethod
12 | def format(cls) -> str:
13 | return "janis"
14 |
15 | def save_to_string(self, cmd: Command) -> str:
16 |
17 | clt = self.command_to_tool(cmd)
18 | return clt.translate("janis", to_console=False)
19 |
20 | def command_to_tool(self, cmd: Command) -> janis.CommandToolBuilder:
21 |
22 | inputs: List[CliArgument] = [*cmd.named] + (
23 | [] if self.ignore_positionals else [*cmd.positional]
24 | )
25 | names = self.choose_variable_names(inputs)
26 |
27 | tool = janis.CommandToolBuilder(
28 | tool=cmd.as_filename,
29 | base_command=list(cmd.command),
30 | inputs=self.get_inputs(names),
31 | outputs=self.get_outputs(names),
32 | version="v0.1.0",
33 | container=cmd.docker_image,
34 | )
35 |
36 | return tool
37 |
38 | def type_to_janis_type(
39 | self, typ: cli_types.CliType, optional: bool
40 | ) -> janis.DataType:
41 |
42 | if isinstance(typ, cli_types.CliFile):
43 | return janis.File(optional=optional)
44 | elif isinstance(typ, cli_types.CliDir):
45 | return janis.Directory(optional=optional)
46 | elif isinstance(typ, cli_types.CliString):
47 | return janis.String(optional=optional)
48 | elif isinstance(typ, cli_types.CliFloat):
49 | return janis.Float(optional=optional)
50 | elif isinstance(typ, cli_types.CliInteger):
51 | return janis.Int(optional=optional)
52 | elif isinstance(typ, cli_types.CliBoolean):
53 | return janis.Boolean(optional=optional)
54 | elif isinstance(typ, cli_types.CliEnum):
55 | return janis.String(optional=optional)
56 | elif isinstance(typ, cli_types.CliList):
57 | # TODO: how is Array represented?
58 | inner = self.type_to_janis_type(typ.value, optional=False)
59 | return janis.Array(inner, optional=optional)
60 |
61 | elif isinstance(typ, cli_types.CliTuple):
62 | return self.type_to_janis_type(
63 | CliType.lowest_common_type(typ.values), optional=False
64 | )
65 | else:
66 | raise Exception(f"Invalid type {typ}!")
67 |
68 | def arg_to_janis_type(self, arg: CliArgument) -> janis.DataType:
69 | return self.type_to_janis_type(arg.get_type(), arg.optional)
70 |
71 | def get_inputs(self, names: List[NamedArgument]) -> List[janis.ToolInput]:
72 | ret = []
73 | for arg in names:
74 | assert arg.name != "", arg
75 | ret.append(
76 | janis.ToolInput(
77 | tag="in_" + arg.name,
78 | input_type=self.arg_to_janis_type(arg.arg),
79 | position=arg.arg.position
80 | if isinstance(arg.arg, Positional)
81 | else None,
82 | prefix=arg.arg.longest_synonym
83 | if isinstance(arg.arg, Flag)
84 | else None,
85 | doc=arg.arg.description,
86 | )
87 | )
88 | return ret
89 |
90 | def get_outputs(self, names: List[NamedArgument]) -> List[janis.ToolOutput]:
91 | ret = []
92 | for arg in names:
93 | typ = arg.arg.get_type()
94 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
95 | ret.append(
96 | janis.ToolOutput(
97 | tag="out_" + arg.name,
98 | output_type=self.arg_to_janis_type(arg.arg),
99 | doc=arg.arg.description,
100 | selector=janis.InputSelector("in_" + arg.name),
101 | )
102 | )
103 | return ret
104 |
105 | @property
106 | def suffix(self) -> str:
107 | return ".py"
108 |
--------------------------------------------------------------------------------
/aclimatise/converter/wdl.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for generating WDL from the CLI data model
3 | """
4 | import re
5 | from typing import Iterable, List, Set, Tuple
6 |
7 | from inflection import camelize
8 | from WDL._grammar import keywords
9 | from wdlgen import (
10 | ArrayType,
11 | File,
12 | Input,
13 | Output,
14 | ParameterMeta,
15 | PrimitiveType,
16 | Task,
17 | WdlType,
18 | )
19 |
20 | from aclimatise import cli_types, model
21 | from aclimatise.converter import NamedArgument, WrapperGenerator
22 | from aclimatise.model import CliArgument, Command, Flag, Positional
23 | from aclimatise.nlp import wordsegment
24 |
25 | #: A regex, borrowed from MiniWDL, that ma
26 | WDL_IDENT = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
27 | #: Matches all characters we should remove from a WDL identifier
28 | WDL_STRIP = re.compile(r"(^[^a-zA-Z])|([^a-zA-Z0-9_])")
29 |
30 |
31 | def escape_wdl_str(text: str):
32 | """
33 | Escape literal quotes in a Python string, to become suitable for WDL
34 | """
35 | return text.replace('"', '\\"').replace("\n", "\\n")
36 |
37 |
38 | def flag_to_command_input(
39 | named_flag: NamedArgument, converter: WrapperGenerator
40 | ) -> Task.Command.CommandInput:
41 | args = dict(name=named_flag.name)
42 |
43 | if isinstance(named_flag.arg, model.Flag):
44 | args.update(dict(optional=named_flag.arg.optional))
45 | if isinstance(named_flag.arg.args, model.EmptyFlagArg):
46 | args.update(dict(true=named_flag.arg.longest_synonym, false=""))
47 | else:
48 | args.update(
49 | dict(
50 | prefix=named_flag.arg.longest_synonym,
51 | )
52 | )
53 | elif isinstance(named_flag, model.Positional):
54 | args.update(dict(optional=False, position=named_flag.position))
55 |
56 | return Task.Command.CommandInput.from_fields(**args)
57 |
58 |
59 | class WdlGenerator(WrapperGenerator):
60 | @property
61 | def suffix(self) -> str:
62 | return ".wdl"
63 |
64 | case = "snake"
65 |
66 | @property
67 | def reserved(self) -> Set[Tuple[str, ...]]:
68 | # Steal the keywords list from miniWDL
69 | return {tuple(wordsegment.segment(key)) for key in keywords["1.0"]}
70 |
71 | @classmethod
72 | def format(cls) -> str:
73 | return "wdl"
74 |
75 | @classmethod
76 | def type_to_wdl(cls, typ: cli_types.CliType, optional: bool = False) -> WdlType:
77 | if isinstance(typ, cli_types.CliString):
78 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
79 | elif isinstance(typ, cli_types.CliFloat):
80 | return WdlType(PrimitiveType(PrimitiveType.kFloat), optional=optional)
81 | elif isinstance(typ, cli_types.CliBoolean):
82 | return WdlType(PrimitiveType(PrimitiveType.kBoolean), optional=optional)
83 | elif isinstance(typ, cli_types.CliInteger):
84 | return WdlType(PrimitiveType(PrimitiveType.kInt), optional=optional)
85 | elif isinstance(typ, cli_types.CliFile):
86 | return WdlType(PrimitiveType(PrimitiveType.kFile), optional=optional)
87 | elif isinstance(typ, cli_types.CliDir):
88 | return WdlType(PrimitiveType(PrimitiveType.kDirectory), optional=optional)
89 | elif isinstance(typ, cli_types.CliTuple):
90 | if typ.homogenous:
91 | return WdlType(
92 | ArrayType(
93 | cls.type_to_wdl(typ.values[0]), requires_multiple=not optional
94 | )
95 | )
96 | else:
97 | return WdlType(
98 | ArrayType(
99 | cls.type_to_wdl(
100 | cli_types.CliType.lowest_common_type(typ.values)
101 | ),
102 | requires_multiple=not optional,
103 | )
104 | )
105 | elif isinstance(typ, cli_types.CliList):
106 | return WdlType(
107 | ArrayType(cls.type_to_wdl(typ.value), requires_multiple=not optional)
108 | )
109 | elif isinstance(typ, cli_types.CliEnum):
110 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
111 | else:
112 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
113 |
114 | def make_inputs(self, named: Iterable[NamedArgument]) -> List[Input]:
115 | return [
116 | Input(
117 | data_type=self.type_to_wdl(
118 | named_arg.arg.get_type(), optional=named_arg.arg.optional
119 | ),
120 | name=named_arg.name,
121 | )
122 | for named_arg in named
123 | ]
124 |
125 | def make_command(self, cmd: Command, inputs: List[NamedArgument]) -> Task.Command:
126 | return Task.Command(
127 | command=" ".join([WDL_STRIP.sub("_", tok) for tok in cmd.command]),
128 | inputs=[
129 | flag_to_command_input(input, self)
130 | for input in inputs
131 | if isinstance(input.arg, Positional)
132 | ],
133 | arguments=[
134 | flag_to_command_input(input, self)
135 | for input in inputs
136 | if isinstance(input.arg, Flag)
137 | ],
138 | )
139 |
140 | def make_parameter_meta(self, named: Iterable[NamedArgument]) -> ParameterMeta:
141 | params = {}
142 | for named_arg in named:
143 | params[named_arg.name] = escape_wdl_str(named_arg.arg.description)
144 |
145 | return ParameterMeta(**params)
146 |
147 | def make_task_name(self, cmd: Command) -> str:
148 | return camelize(
149 | "_".join([WDL_STRIP.sub("", token) for token in cmd.command]).replace(
150 | "-", "_"
151 | )
152 | )
153 |
154 | def make_outputs(self, names: List[NamedArgument]) -> List[Output]:
155 | ret = [
156 | # We default to always capturing stdout
157 | Output(data_type=File, name="out_stdout", expression="stdout()")
158 | ]
159 | for arg in names:
160 | typ = arg.arg.get_type()
161 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
162 | ret.append(
163 | Output(
164 | data_type=self.type_to_wdl(typ),
165 | name="out_" + arg.name,
166 | expression='"${{in_{}}}"'.format(arg.name),
167 | )
168 | )
169 |
170 | return ret
171 |
172 | def save_to_string(self, cmd: Command) -> str:
173 | inputs: List[CliArgument] = [*cmd.named] + (
174 | [] if self.ignore_positionals else [*cmd.positional]
175 | )
176 | names = self.choose_variable_names(inputs)
177 | runtime = Task.Runtime()
178 | runtime.add_docker(cmd.docker_image)
179 |
180 | tool = Task(
181 | name=self.make_task_name(cmd),
182 | command=self.make_command(cmd, names),
183 | version="1.0",
184 | inputs=self.make_inputs(names),
185 | outputs=self.make_outputs(names),
186 | parameter_meta=self.make_parameter_meta(names),
187 | runtime=runtime,
188 | )
189 |
190 | return tool.get_string()
191 |
--------------------------------------------------------------------------------
/aclimatise/converter/yml.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 | from os import PathLike
3 | from pathlib import Path
4 | from typing import Generator, List
5 |
6 | import attr
7 |
8 | from aclimatise.converter import WrapperGenerator
9 | from aclimatise.model import Command
10 | from aclimatise.yaml import yaml
11 |
12 |
13 | @attr.s(auto_attribs=True)
14 | class YmlGenerator(WrapperGenerator):
15 | """
16 | Internal YML format
17 | """
18 |
19 | @property
20 | def suffix(self) -> str:
21 | return ".yml"
22 |
23 | def save_to_file(self, cmd: Command, path: Path) -> None:
24 | with path.open("w") as fp:
25 | yaml.dump(cmd, fp)
26 |
27 | def save_to_string(self, cmd: Command) -> str:
28 | buffer = StringIO()
29 | yaml.dump(cmd, buffer)
30 | return buffer.getvalue()
31 |
32 | @classmethod
33 | def format(cls) -> str:
34 | return "yml"
35 |
--------------------------------------------------------------------------------
/aclimatise/execution/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is concerned with running the actual commands so that we can parse their output
3 | """
4 | import abc
5 | from typing import List, Optional
6 |
7 | from aclimatise.model import Command
8 |
9 |
10 | class Executor(abc.ABC):
11 | """
12 | An executor is anything that can take a command such as ["bwa"] or
13 | ["samtools", "sort"] and return the help output
14 | """
15 |
16 | def __init__(
17 | self, timeout: int = 10, raise_on_timout=False, max_length: Optional[int] = 1000
18 | ):
19 | """
20 | :param timeout: Amount of inactivity before the execution will be killed
21 | :param raise_on_timout: If true, execute will raise a TimeoutError if it
22 | times out
23 | """
24 | # Here we initialise all shared parameters that are used by all executors
25 | self.timeout = timeout
26 | self.raise_on_timeout = raise_on_timout
27 | self.max_length = max_length
28 |
29 | def handle_timeout(self, e: Exception) -> str:
30 | """
31 | Subclasses can call this when a timeout has occurred
32 | :param e: The timeout exception that caused the timeout
33 | """
34 | if self.raise_on_timeout:
35 | raise TimeoutError()
36 | else:
37 | return ""
38 |
39 | def explore(
40 | self,
41 | command: List[str],
42 | max_depth: int = 2,
43 | parent: Optional[Command] = None,
44 | ) -> Command:
45 | """
46 | Given a command to start with, builds a model of this command and all its subcommands (if they exist)
47 | """
48 | # If the executor doesn't implement a specific exploration technique, we just execute and ignore subcommands
49 | return self.convert(command)
50 |
51 | @abc.abstractmethod
52 | def convert(self, command: List[str]) -> Command:
53 | """
54 | Convert a single executable to a Command object, without considering subcommands
55 | """
56 | pass
57 |
--------------------------------------------------------------------------------
/aclimatise/execution/docker.py:
--------------------------------------------------------------------------------
1 | import select
2 | import socket
3 | import time
4 | from select import select as original_select
5 | from typing import List, Tuple
6 | from unittest.mock import patch
7 |
8 | from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter
9 |
10 | from aclimatise.execution.help import CliHelpExecutor
11 | from aclimatise.model import Command
12 |
13 |
14 | def read_socket(sock, timeout: int = None) -> Tuple[bytes, bytes]:
15 | """
16 | Reads from a docker socket, and returns everything
17 | :param sock: Docker socket to read from
18 | :param timeout: Number of seconds after which we return all data collected
19 | :return: A tuple of stdout, stderr
20 | """
21 | start_time = time.time()
22 | out = [b"", b""]
23 | for frame in frames_iter(sock, tty=False):
24 | frame = demux_adaptor(*frame)
25 |
26 | # If we hit the timeout, return anyawy
27 | if time.time() >= start_time + timeout:
28 | return tuple(out)
29 |
30 | assert frame != (None, None)
31 |
32 | if frame[0] is not None:
33 | out[0] += frame[0]
34 | else:
35 | out[1] += frame[1]
36 | return tuple(out)
37 |
38 |
39 | class DockerExecutor(CliHelpExecutor):
40 | """
41 | An executor that runs the commands on an already-running docker Container (not an Image!)
42 | """
43 |
44 | def __init__(
45 | self, container: "docker.models.containers.Container", save_image=True, **kwargs
46 | ):
47 | """
48 | :param container: The object from the Docker API that represents the running container to run inside
49 | :param save_image: If true (default), save the image name on the command, meaning that the resulting tool
50 | definitions also use this Docker image
51 | """
52 | super().__init__(**kwargs)
53 | self.container = container
54 | self.save_image = save_image
55 |
56 | def convert(
57 | self,
58 | cmd: List[str],
59 | ) -> Command:
60 | # Use the existing function, but patch in the docker image
61 | cmd = super().convert(cmd)
62 | if self.save_image:
63 | cmd.docker_image = self.container.image.tags[0]
64 | return cmd
65 |
66 | def execute(self, command: List[str]) -> str:
67 | _, sock = self.container.exec_run(
68 | command, stdout=True, stderr=True, demux=True, socket=True
69 | )
70 | try:
71 | # These are timeouts that define how long to wait while nothing is being output
72 | sock._sock.settimeout(self.timeout)
73 | with patch.object(
74 | select,
75 | "select",
76 | new=lambda rlist, wlist, xlist: original_select(
77 | rlist, wlist, xlist, self.timeout
78 | ),
79 | ):
80 | stdout, stderr = read_socket(sock, timeout=self.timeout)
81 | except socket.timeout as e:
82 | return self.handle_timeout(e)
83 |
84 | return (stdout or stderr or b"").decode()
85 |
--------------------------------------------------------------------------------
/aclimatise/execution/help.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import copy
3 | import logging
4 | from typing import Iterable, List, Optional
5 |
6 | from pyparsing import ParseBaseException
7 |
8 | from aclimatise.execution import Executor
9 | from aclimatise.integration import parse_help
10 | from aclimatise.model import Command
11 |
12 | logger = logging.getLogger()
13 |
14 |
15 | class CliHelpExecutor(Executor):
16 | """
17 | This is an abstract class for any executor that works with command-line help conventions like using help flags in
18 | order to obtain the help text.
19 | """
20 |
21 | def __init__(
22 | self,
23 | flags: Iterable[str] = (["--help"], ["-h"], [], ["--usage"]),
24 | try_subcommand_flags=True,
25 | **kwargs
26 | ):
27 | super().__init__(**kwargs)
28 | self.flags = flags
29 | self.try_subcommand_flags = try_subcommand_flags
30 |
31 | def explore(
32 | self,
33 | command: List[str],
34 | max_depth: int = 2,
35 | parent: Optional[Command] = None,
36 | ) -> Optional[Command]:
37 |
38 | logger.info("Exploring {}".format(" ".join(command)))
39 | best = self.convert(command)
40 | best.parent = parent
41 |
42 | # Check if this is a valid subcommand
43 | if parent:
44 | if best.valid_subcommand():
45 | logger.info(
46 | "{} seems to be a valid subcommand".format(" ".join(command))
47 | )
48 | else:
49 | logger.info(
50 | "{} does not seem to be a valid subcommand".format(
51 | " ".join(command)
52 | )
53 | )
54 | return None
55 |
56 | # Recursively call this function on positionals, but only do this if we aren't at max depth
57 | if best.depth < max_depth:
58 | # By default we use the best parent help-flag
59 | child_executor = copy.copy(self)
60 | child_executor.flags = (
61 | self.flags if self.try_subcommand_flags else [best.generated_using]
62 | )
63 |
64 | # Try each *unique* positional
65 | for positional in {positional.name for positional in best.positional}:
66 | subcommand = child_executor.explore(
67 | command=command + [positional],
68 | parent=best,
69 | max_depth=max_depth,
70 | )
71 | if subcommand is not None:
72 | best.subcommands.append(subcommand)
73 | # If we had any subcommands then we probably don't have any positionals, or at least don't care about them
74 | best.positional = []
75 |
76 | return best
77 |
78 | @abc.abstractmethod
79 | def execute(self, cmd: List[str]) -> str:
80 | """
81 | Executes the provided command and returns a string containing the output
82 | """
83 | pass
84 |
85 | def convert(
86 | self,
87 | cmd: List[str],
88 | ) -> Command:
89 | """
90 | Determine the best Command instance for a given command line tool, by trying many
91 | different help flags, such as --help and -h, then return the Command. Use this if you know the command you want to
92 | parse, but you don't know which flags it responds to with help text. Unlike :py:func:`aclimatise.explore_command`,
93 | this doesn't even attempt to parse subcommands.
94 |
95 | :param cmd: The command to analyse, e.g. ['wc'] or ['bwa', 'mem']
96 | :param flags: A list of help flags to try, e.g. ['--help', '-h'], in order how which one you would prefer to use.
97 | Generally [] aka no flags should be last
98 | :param executor: A class that provides the means to run a command. You can use the pre-made classes or write your own.
99 | """
100 | # For each help flag, run the command and then try to parse it
101 | logger.info("Trying flags for {}".format(" ".join(cmd)))
102 | commands = []
103 | for flag in self.flags:
104 | help_cmd = cmd + flag
105 | logger.info("Trying {}".format(" ".join(help_cmd)))
106 | try:
107 | final = self.execute(help_cmd)
108 | result = parse_help(cmd, final, max_length=self.max_length)
109 | result.generated_using = flag
110 | commands.append(result)
111 | except (ParseBaseException, UnicodeDecodeError) as e:
112 | # If parsing fails, this wasn't the right flag to use
113 | continue
114 |
115 | # Sort by flags primarily, and if they're equal, return the command with the longest help text, and if they're equal
116 | # return the command with the most help flags. This helps ensure we get ["bedtools", "--help"] instead of
117 | # ["bedtools"]
118 | best = Command.best(commands)
119 | logger.info(
120 | "The best help flag seems to be {}".format(
121 | " ".join(best.command + best.generated_using)
122 | )
123 | )
124 | return best
125 |
--------------------------------------------------------------------------------
/aclimatise/execution/local.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions that relate to executing the programs of interest, in order to extract their help text
3 | """
4 | import os
5 | import pty
6 | import signal
7 | import subprocess
8 | import sys
9 | from typing import List
10 |
11 | import psutil
12 |
13 | from aclimatise.execution.help import CliHelpExecutor
14 |
15 |
16 | def kill_proc_tree(pid, sig=signal.SIGTERM, include_parent=True):
17 | """
18 | Kill a process tree (including grandchildren) with signal "sig" and return a (gone, still_alive) tuple.
19 | "on_terminate", if specified, is a callabck function which is called as soon as a child terminates.
20 |
21 | Taken from https://psutil.readthedocs.io/en/latest/#kill-process-tree
22 | """
23 | assert pid != os.getpid(), "won't kill myself"
24 | parent = psutil.Process(pid)
25 | children = parent.children(recursive=True)
26 | if include_parent:
27 | children.append(parent)
28 | for p in children:
29 | p.send_signal(sig)
30 |
31 |
32 | class LocalExecutor(CliHelpExecutor):
33 | def __init__(self, popen_args: dict = {}, **kwargs):
34 | super().__init__(**kwargs)
35 | self.popen_args = popen_args
36 |
37 | def execute(self, command: List[str]) -> str:
38 | master, slave = pty.openpty()
39 | popen_kwargs = dict(
40 | stdout=subprocess.PIPE,
41 | stderr=subprocess.PIPE,
42 | stdin=slave,
43 | encoding="utf-8",
44 | )
45 | popen_kwargs.update(self.popen_args)
46 |
47 | # This works a lot like subprocess.run, but we need access to the pid in order to kill the process tree, so use Popen
48 | with subprocess.Popen(command, **popen_kwargs) as process:
49 | try:
50 | stdout, stderr = process.communicate(timeout=self.timeout)
51 | except subprocess.TimeoutExpired as e:
52 | # Kill the entire process tree, because sometimes killing the parent isn't enough
53 | kill_proc_tree(
54 | process.pid,
55 | include_parent=True,
56 | sig=signal.SIGKILL if sys.platform == "linux" else None,
57 | )
58 | process.communicate()
59 | return self.handle_timeout(e)
60 | finally:
61 | os.close(master)
62 | os.close(slave)
63 |
64 | return stdout or stderr
65 |
--------------------------------------------------------------------------------
/aclimatise/execution/man.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | from typing import Collection, List, Optional
4 |
5 | from aclimatise.execution import Executor
6 | from aclimatise.integration import parse_help
7 | from aclimatise.model import Command
8 |
9 |
10 | class ManPageExecutor(Executor):
11 | def __init__(
12 | self,
13 | man_paths: List[str] = [],
14 | subcommand_sep: Collection[str] = ("-", "_"),
15 | man_flags: Collection[str] = ["--no-subpages"],
16 | **kwargs
17 | ):
18 | """
19 | :param man_paths: Additional paths within which to look for man pages
20 | :param subcommand_sep: A list of separators to use to generate man paths from subcommands. For example
21 | ``git branch`` has an associated man page at ``git-branch``, using the hyphen as a separator.
22 | :param man_flags: Additional flags to pass to the ``man`` command
23 | """
24 | super().__init__(**kwargs)
25 | self.man_paths = man_paths
26 | self.subcommand_sep = subcommand_sep
27 | self.man_flags = man_flags
28 |
29 | def execute_with_sep(self, command: List[str], separator: str = "-") -> str:
30 | """
31 | Returns the man page text for the provided command, using the provided subcommand separator, or an empty string
32 | if this man page doesn't exist
33 | """
34 | env = {**os.environ.copy(), "MANPAGER": "cat"} # Don't use a pager
35 | if len(self.man_paths) > 0:
36 | env.update({"MANPATH": ":".join(self.man_paths)})
37 |
38 | sub_man = separator.join(command)
39 | result = subprocess.run(
40 | ["man", *self.man_flags, sub_man],
41 | env=env,
42 | stdout=subprocess.PIPE,
43 | stderr=subprocess.PIPE,
44 | )
45 | if result.returncode == 0:
46 | return result.stdout.decode()
47 |
48 | return ""
49 |
50 | def convert(self, command: List[str]) -> Command:
51 | if len(command) == 1:
52 | return parse_help(
53 | command, self.execute_with_sep(command), max_length=self.max_length
54 | )
55 | else:
56 | commands = []
57 | for sep in self.subcommand_sep:
58 | man_text = self.execute_with_sep(command, sep)
59 | commands.append(
60 | parse_help(command, man_text, max_length=self.max_length)
61 | )
62 | return Command.best(commands)
63 |
--------------------------------------------------------------------------------
/aclimatise/flag_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/flag_parser/__init__.py
--------------------------------------------------------------------------------
/aclimatise/flag_parser/elements.py:
--------------------------------------------------------------------------------
1 | """
2 | Re-usable parser elements that aren't tied to the parser object
3 | """
4 | from typing import List
5 |
6 | from pyparsing import *
7 |
8 | from aclimatise.model import *
9 |
10 | #: Characters that delimit flag synonyms
11 | synonym_delim_chars = ",|/"
12 | #: Characters that can start a CLI element, e.g. "-@"
13 | element_start_chars = alphanums + "@"
14 | #: Characters that can be in the middle of a CLI element, e.g. "-some-arg"
15 | element_body_chars = element_start_chars + "-_."
16 | #: Characters that can only be used in arguments for flags e.g. ""
17 | argument_body_chars = element_body_chars + "|"
18 | #: Characters that can be in the middle of an argument that has brackets around it, e.g. "-arg "
19 | delimited_body_chars = argument_body_chars + r" \/"
20 |
21 | NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()).setName("Newline")
22 |
23 |
24 | cli_id = Word(initChars=element_start_chars, bodyChars=element_body_chars)
25 |
26 | positional_name = Word(
27 | initChars=element_start_chars, bodyChars=element_body_chars, min=2
28 | )
29 |
30 | # short_flag = originalTextFor(Literal('-') + Word(alphanums + '@', max=1))
31 | # """A short flag has only a single dash and single character, e.g. `-m`"""
32 | # long_flag = originalTextFor(Literal('--') + cli_id)
33 | # """A long flag has two dashes and any amount of characters, e.g. `--max-count`"""
34 | any_flag = (
35 | originalTextFor("-" + Optional("-") + cli_id).leaveWhitespace().setName("Flag")
36 | )
37 | """The flag is the part with the dashes, e.g. `-m` or `--max-count`"""
38 |
39 | flag_arg_sep = (
40 | Or([Literal("="), Literal(" ")]).leaveWhitespace().setName("FlagArgSeparator")
41 | )
42 | """The term that separates the flag from the arguments, e.g. in `--file=FILE` it's `=`"""
43 |
44 | arg = Word(initChars=element_start_chars, bodyChars=argument_body_chars)
45 | """A single argument name, e.g. `FILE`"""
46 |
47 | optional_args = Forward().setName("OptionalArg")
48 |
49 |
50 | def visit_optional_args(s, lok, toks):
51 | if len(toks) == 1:
52 | return OptionalFlagArg(names=[toks[0]])
53 | else:
54 | first, _, sep, second, _ = toks
55 | if isinstance(second, str):
56 | return OptionalFlagArg(names=[first, second], separator=sep)
57 | elif isinstance(second, OptionalFlagArg):
58 | return OptionalFlagArg(names=[first] + second.names, separator=sep)
59 |
60 |
61 | optional_args <<= (
62 | (arg + "[" + "," + (optional_args ^ arg) + "]")
63 | .setParseAction(visit_optional_args)
64 | .setName("OptionalArgs")
65 | )
66 | """
67 | When the flag has multiple arguments, some of which are optional, e.g.
68 | -I FLOAT[,FLOAT[,INT[,INT]]]
69 | """
70 |
71 | # simple_arg = arg.copy().setParseAction(
72 | # lambda s, loc, toks: SimpleFlagArg(toks[0]))
73 | simple_arg = (
74 | (
75 | Or(
76 | [
77 | Word(initChars=element_start_chars, bodyChars=element_body_chars),
78 | # Allow spaces in the argument name, but only if it's enclosed in angle brackets
79 | (
80 | Literal("<").suppress()
81 | + Word(
82 | initChars=element_start_chars, bodyChars=delimited_body_chars
83 | )
84 | + Literal(">").suppress()
85 | ).setName("angle_delimited_arg"),
86 | ]
87 | )
88 | )
89 | .leaveWhitespace()
90 | .setParseAction(lambda s, loc, toks: SimpleFlagArg(toks[0]))
91 | ).setName("SimpleArg")
92 |
93 | repeated_segment = (
94 | (ZeroOrMore(arg) + Literal(".")[2, 3].suppress() + Optional(arg))
95 | .setParseAction(lambda s, loc, toks: RepeatFlagArg(toks[-1] or toks[0]))
96 | .setName("RepeatedSegment")
97 | ) # Either ".." or "..."
98 |
99 | list_type_arg = (
100 | (
101 | (arg + repeated_segment)
102 | ^ (arg + Literal("[").suppress() + repeated_segment + Literal("]").suppress())
103 | )
104 | .setParseAction(lambda s, loc, toks: toks[1])
105 | .setName("repeated_arg")
106 | )
107 | """
108 | When the argument is an array of values, e.g. when the help says `--samout SAMOUTS [SAMOUTS ...]` or
109 | `-i FILE1 FILE2 .. FILEn`
110 |
111 | """
112 |
113 | choice_type_arg = (
114 | nestedExpr(opener="{", closer="}", content=delimitedList(cli_id, delim=","))
115 | .setParseAction(lambda s, loc, toks: ChoiceFlagArg(set(toks[0])))
116 | .setName("ChoiceArg")
117 | )
118 | """When the argument is one from a list of values, e.g. when the help says `--format {sam,bam}`"""
119 |
120 |
121 | def noop(s, loc, toks):
122 | return toks
123 |
124 |
125 | arg_expression = (
126 | (
127 | flag_arg_sep.suppress()
128 | + (list_type_arg | choice_type_arg | optional_args | simple_arg)
129 | )
130 | # .leaveWhitespace()
131 | .setParseAction(lambda s, loc, toks: toks[0])
132 | )
133 | arg_expression.skipWhitespace = False
134 | """An argument with separator, e.g. `=FILE`"""
135 |
136 | flag_with_arg = (
137 | (any_flag + Optional(arg_expression))
138 | .setParseAction(
139 | lambda s, loc, toks: (
140 | FlagSynonym(
141 | name=toks[0], argtype=toks[1] if len(toks) > 1 else EmptyFlagArg()
142 | )
143 | )
144 | )
145 | .setName("FlagWithArg")
146 | )
147 | flag_with_arg.skipWhitespace = True
148 | """e.g. `--max-count=NUM`"""
149 |
150 | synonym_delim = (
151 | White() ^ (Optional(White()) + Char(synonym_delim_chars) + Optional(White()))
152 | ).leaveWhitespace()
153 | """
154 | The character used to separate synonyms of a flag. Depending on the help text this might be a comma, pipe or space
155 | """
156 |
157 | description_sep = White(min=1).suppress()
158 | """
159 | The section that separates a flag from its description. This needs to be broad enough that it will match all different
160 | formats of help outputs but not so broad that every single word starting with a dash will be matched as a flag
161 | """
162 |
163 | # block_element_prefix = LineStart().leaveWhitespace()
164 | block_element_prefix = (
165 | ((LineStart().leaveWhitespace() ^ Literal(":")) + White(min=1))
166 | .setName("block_element_prefix")
167 | .leaveWhitespace()
168 | .suppress()
169 | )
170 | """
171 | Each element (e.g. flag) in a list of flags must either start with a colon or nothing
172 |
173 | e.g. in this example "index" is prefixed by a colon and "mem" is prefixed by a LineStart
174 |
175 | Command: index index sequences in the FASTA format
176 | mem BWA-MEM algorithm
177 | """
178 |
179 | flag_synonyms = delimitedList(flag_with_arg, delim=synonym_delim).setName(
180 | "FlagSynonyms"
181 | )
182 | """
183 | When the help lists multiple synonyms for a flag, e.g:
184 | -n, --lines=NUM
185 | """
186 |
187 |
188 | # The description of the flag
189 | # e.g. for grep's `-o, --only-matching`, this is:
190 | # "Print only the matched (non-empty) parts of a matching line, with each such part on a separate output line."
191 | # desc_line = originalTextFor(SkipTo(LineEnd())).setName(
192 | # "DescriptionLine"
193 | # ) # .setParseAction(success))
194 | # desc_line = originalTextFor(
195 | # delimitedList(Regex("[^\s]+"), delim=" ", combine=True)
196 | # ).leaveWhitespace()
197 |
198 |
199 | def visit_description_line(s, loc, toks):
200 | return toks[0].strip()
201 |
202 |
203 | description_line = (
204 | SkipTo(LineEnd(), include=True)
205 | .setParseAction(visit_description_line)
206 | .setWhitespaceChars(" \t")
207 | ).setName("DescriptionLine")
208 |
--------------------------------------------------------------------------------
/aclimatise/integration.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import attr
4 |
5 | from aclimatise.flag_parser.parser import CliParser
6 | from aclimatise.model import Command, Flag
7 | from aclimatise.usage_parser.parser import UsageParser
8 |
9 |
10 | def parse_help(cmd: typing.Collection[str], text: str, max_length=1000) -> Command:
11 | """
12 | Parse a string of help text into a Command. Use this if you already have run the executable and extracted the
13 | help text yourself
14 |
15 | :param cmd: List of arguments used to generate this help text, e.g. ['bwa', 'mem']
16 | :param text: The help text to parse
17 | :param max_length: If the input text has more than this many lines, no attempt will be made to parse the file (as
18 | it's too large, will likely take a long time, and there's probably an underlying problem if this has happened).
19 | In this case, an empty Command will be returned
20 | """
21 | if len(text.splitlines()) > max_length:
22 | return Command(list(cmd))
23 |
24 | help_command = CliParser().parse_command(name=cmd, cmd=text)
25 | usage_command = UsageParser().parse_usage(list(cmd), text)
26 |
27 | # Combine the two commands by picking from the help_command where possible, otherwise falling back on the usage
28 | fields = dict(
29 | help_text=text,
30 | # Use the help command's positionals preferentially, but fall back to usage
31 | positional=help_command.positional or usage_command.positional,
32 | # Combine the flags from both help and usage
33 | named=list(Flag.combine([help_command.named, usage_command.named])),
34 | )
35 | for field in attr.fields(Command):
36 | fields[field.name] = (
37 | fields.get(field.name)
38 | or getattr(help_command, field.name)
39 | or getattr(usage_command, field.name)
40 | )
41 |
42 | return Command(**fields)
43 |
--------------------------------------------------------------------------------
/aclimatise/nlp.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | import wordsegment
3 |
4 |
5 | @spacy.language.Language.component("prevent_sentence_boundary")
6 | def prevent_sentence_boundary_detection(doc):
7 | for token in doc:
8 | token.is_sent_start = False
9 | return doc
10 |
11 |
12 | try:
13 | nlp = spacy.load("en_core_web_sm")
14 | no_sentences = spacy.load("en_core_web_sm")
15 | no_sentences.add_pipe(
16 | "prevent_sentence_boundary", name="prevent-sbd", before="parser"
17 | )
18 | except IOError:
19 | raise Exception(
20 | "Spacy model doesn't exist! Install it with `python -m spacy download en`"
21 | )
22 |
23 | # We load the spacy and the wordsegment models here as a kind of singleton pattern, to avoid multiple functions loading
24 | # redundant copies
25 |
26 | if len(wordsegment.WORDS) == 0:
27 | wordsegment.load()
28 |
29 |
30 | def is_sentence(text: str, threshold: float = 0.8) -> bool:
31 | """
32 | Returns a bool that indicates if this text is likely a sentence. This should probably be replaced by a machine
33 | learning classifier in the future
34 | :param threshold: If the ratio of non-word tokens over word tokens is higher than this, then return False
35 | """
36 |
37 | doc = no_sentences(text)
38 | sents = list(doc.sents)
39 |
40 | if len(sents) == 0:
41 | return False
42 |
43 | sentence = sents[0]
44 | non_word_count = 0
45 | word_count = 0
46 | for tok in sentence:
47 | pos = tok.pos_
48 | if pos == "SPACE":
49 | # Ignore whitespace
50 | continue
51 |
52 | if pos in {"X", "SYM", "PUNCT", "NUM"}:
53 | non_word_count += 1
54 | word_count += 1
55 |
56 | result = word_count == 0 or non_word_count / word_count < threshold
57 | return result
58 |
--------------------------------------------------------------------------------
/aclimatise/parser.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from pyparsing import *
4 |
5 |
6 | class IndentCheckpoint(ParseElementEnhance):
7 | """
8 | This is a wrapper element that simply rolls back changes in the indent stack whenever the contained element
9 | fails to match. This ensures the stack remains accurate
10 | """
11 |
12 | def __init__(self, expr: ParserElement, indent_stack: List[int]):
13 | super().__init__(expr)
14 | # self.expr = expr
15 | self.stack = indent_stack
16 |
17 | def parseImpl(self, instring, loc, doActions=True):
18 | # Backup the stack whenever we reach this element during the parse
19 | backup_stack = self.stack[:]
20 | try:
21 | return self.expr._parse(instring, loc, doActions, callPreParse=False)
22 | except ParseException as e:
23 | # On a parse failure, reset the stack
24 | self.stack[:] = backup_stack
25 | raise e
26 |
27 | def __str__(self):
28 | if hasattr(self, "name"):
29 | return self.name
30 |
31 | if self.strRepr is None:
32 | self.strRepr = "Indented[" + str(self.expr) + "]"
33 |
34 | return self.strRepr
35 |
36 |
37 | class IndentParserMixin:
38 | """
39 | A mixin that maintains an indent stack, and utility methods for them
40 | """
41 |
42 | def __init__(self):
43 | self.stack = [1]
44 |
45 | def pop_indent(self):
46 | def check_indent(s, l, t):
47 | self.stack.pop()
48 |
49 | return (Empty() + Empty()).setParseAction(check_indent).setName("Pop")
50 |
51 | def push_indent(self):
52 | def check_indent(s, l, t):
53 | curCol = col(l, s)
54 | self.stack.append(curCol)
55 |
56 | return (Empty() + Empty()).setParseAction(check_indent).setName("Push")
57 |
58 | def peer_indent(self, allow_greater=False):
59 | """
60 | :param allow_greater: Allow greater indent than the previous indentation, but don't add it to the stack
61 | """
62 |
63 | def check_peer_indent(s, l, t):
64 | if l >= len(s):
65 | return
66 | curCol = col(l, s)
67 | if allow_greater and curCol >= self.stack[-1]:
68 | return
69 | elif curCol == self.stack[-1]:
70 | return
71 | else:
72 | if curCol > self.stack[-1]:
73 | raise ParseException(s, l, "illegal nesting")
74 | raise ParseException(s, l, "not a peer entry")
75 |
76 | return Empty().setParseAction(check_peer_indent).setName("Peer")
77 |
78 | def indent(self, update=True):
79 | """
80 | :param update: If true, update the stack, otherwise simply check for an indent
81 | """
82 |
83 | def check_sub_indent(s, l, t):
84 | curCol = col(l, s)
85 | if curCol > self.stack[-1]:
86 | if update:
87 | self.stack.append(curCol)
88 | else:
89 | raise ParseException(s, l, "not a subentry")
90 |
91 | return (Empty() + Empty().setParseAction(check_sub_indent)).setName("Indent")
92 |
93 | def dedent(self, precise=True):
94 | def check_dedent(s, l, t):
95 | if l >= len(s):
96 | return
97 | curCol = col(l, s)
98 | if precise and self.stack and curCol not in self.stack:
99 | raise ParseException(s, l, "not an unindent")
100 | if curCol < self.stack[-1]:
101 | self.stack.pop()
102 |
103 | return Empty().setParseAction(check_dedent).setName("Unindent")
104 |
105 |
106 | __all__ = [IndentCheckpoint, IndentParserMixin]
107 |
--------------------------------------------------------------------------------
/aclimatise/usage_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/usage_parser/__init__.py
--------------------------------------------------------------------------------
/aclimatise/usage_parser/elements.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from pyparsing import *
4 |
5 | # from aclimatise.flag_parser.elements import cli_id, any_flag, long_flag, short_flag, flag_with
6 | from aclimatise.flag_parser.elements import (
7 | arg,
8 | argument_body_chars,
9 | delimited_body_chars,
10 | element_body_chars,
11 | element_start_chars,
12 | flag_with_arg,
13 | repeated_segment,
14 | )
15 | from aclimatise.model import (
16 | Command,
17 | EmptyFlagArg,
18 | Flag,
19 | FlagSynonym,
20 | Positional,
21 | SimpleFlagArg,
22 | )
23 | from aclimatise.usage_parser.model import UsageElement
24 |
25 |
26 | def delimited_item(open, el, close):
27 | def action(s, loc, toks):
28 | return toks[1:-1]
29 |
30 | return (open + el + close).setParseAction(action)
31 |
32 |
33 | usage_element = Forward()
34 | element_char = arg.copy() # Word(initChars=element_start_chars, bodyChars=)
35 |
36 | mandatory_element = (
37 | element_char.copy()
38 | .setParseAction(
39 | lambda s, loc, toks: UsageElement(
40 | text=toks[0],
41 | )
42 | )
43 | .setName("MandatoryElement")
44 | )
45 | """
46 | A mandatory element in the command-line invocation. Might be a variable or a constant
47 | """
48 |
49 | variable_element = (
50 | delimited_item(
51 | "<", Word(initChars=element_start_chars, bodyChars=delimited_body_chars), ">"
52 | )
53 | .setParseAction(lambda s, loc, toks: UsageElement(text=toks[1], variable=True))
54 | .setName("VariableElement")
55 | )
56 | """
57 | Any element inside angle brackets is a variable, meaning you are supposed to provide your own value for it.
58 | However, some usage formats show variables without the angle brackets
59 | """
60 |
61 |
62 | def visit_optional_section(s, loc, toks):
63 | inner = toks[1:-1]
64 | for tok in inner:
65 | tok.optional = True
66 | return inner
67 |
68 |
69 | optional_section = (
70 | delimited_item("[", OneOrMore(usage_element), "]")
71 | .setParseAction(visit_optional_section)
72 | .setName("OptionalSection")
73 | )
74 | """
75 | Anything can be nested within square brackets, which indicates that everything there is optional
76 | """
77 |
78 | # flag_arg = Or([
79 | # variable_element,
80 | # element_char
81 | # ])
82 | """
83 | The argument after a flag, e.g. in "-b " this would be everything after "-b"
84 | """
85 |
86 | # short_flag_name = Char(alphas)
87 | """
88 | The single character for a short flag, e.g. "n" for a "-n" flag
89 | """
90 |
91 | # short_flag = (
92 | # '-' + short_flag_name + White() + Optional(flag_arg)
93 | # ).setParseAction(
94 | # lambda s, loc, toks:
95 | # Flag.from_synonyms([FlagSynonym(
96 | # name=toks[0] + toks[1],
97 | # argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg()
98 | # )], description=None)
99 | # )
100 | """
101 | The usage can contain a flag with its argument
102 | """
103 |
104 | # long_flag = (
105 | # '--' + element_char + White() + Optional(flag_arg)
106 | # ).setParseAction(lambda s, loc, toks: Flag.from_synonyms([FlagSynonym(
107 | # name=toks[1],
108 | # argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg()
109 | # )]))
110 | """
111 | The usage can contain a flag with its argument
112 | """
113 |
114 |
115 | def visit_short_flag_list(s, loc, toks):
116 | return [
117 | Flag.from_synonyms(
118 | [FlagSynonym(name="-" + flag, argtype=EmptyFlagArg())], description=None
119 | )
120 | for flag in toks[1:]
121 | ]
122 |
123 |
124 | # short_flag_list = ('-' + short_flag_name + OneOrMore(short_flag_name)).setParseAction(
125 | # visit_short_flag_list).leaveWhitespace()
126 | """
127 | Used to illustrate where a list of short flags could be used, e.g. -nurlf indicates -n or -u etc
128 | """
129 |
130 |
131 | def visit_list_element(s, loc, toks):
132 | # Pick the last element if there is one, otherwise use the first element
133 | # This gives us a better name like 'inN.bam' instead of 'in2.bam'
134 | els = [tok for tok in toks if isinstance(tok, (UsageElement, Flag))]
135 | for el in els:
136 | el.repeatable = True
137 | return els[-1]
138 |
139 |
140 | options_placeholder = (
141 | Regex("options?", flags=re.IGNORECASE).suppress().setName("OptionsPlaceholder")
142 | )
143 |
144 | list_element = (
145 | (
146 | OneOrMore(options_placeholder ^ mandatory_element ^ variable_element)
147 | + Literal(".")[2, 3]
148 | + Optional(options_placeholder ^ mandatory_element ^ variable_element)
149 | )
150 | .setParseAction(visit_list_element)
151 | .setName("list_element")
152 | )
153 | """
154 | When one or more arguments are allowed, e.g. " ... "
155 | """
156 |
157 | usage_flag = (
158 | And([flag_with_arg])
159 | .setParseAction(lambda s, loc, toks: Flag.from_synonyms(toks, description=""))
160 | .setName("usage_flag")
161 | )
162 |
163 |
164 | usage_element <<= Or(
165 | [
166 | optional_section,
167 | list_element,
168 | # short_flag_list,
169 | usage_flag,
170 | variable_element,
171 | options_placeholder,
172 | mandatory_element,
173 | ]
174 | ).setName("usage_element")
175 |
176 | stack = [1]
177 |
178 |
179 | def visit_usage(s, loc, toks):
180 | # Fix up stack inconsistencies
181 | while len(stack) > 1:
182 | stack.pop()
183 |
184 | return toks[0][0]
185 |
186 |
187 | usage_example = OneOrMore(usage_element, stopOn=LineEnd())
188 | """
189 | Each usage example is a single line of text, e.g.
190 |
191 | shell [options] -e string
192 | """
193 |
194 | usage = (
195 | LineStart()
196 | + Regex("usage:", flags=re.IGNORECASE).suppress()
197 | + OneOrMore(usage_example)
198 | ) # .setParseAction(visit_usage).setDebug()
199 | """
200 | Each usage block can have one or more lines of different usage. e.g.
201 |
202 | Usage:
203 | shell [options] -e string
204 | execute string in V8
205 | shell [options] file1 file2 ... filek
206 | run JavaScript scripts in file1, file2, ..., filek
207 | """
208 |
209 |
210 | # usage = Regex('usage:', flags=re.IGNORECASE).suppress() + delimitedList(usage_element, delim=Or([' ', '\n']))
211 | # indentedBlock(
212 | # delimitedList(usage_element, delim=' '),
213 | # indentStack=stack,
214 | # indent=True
215 | # )
216 |
--------------------------------------------------------------------------------
/aclimatise/usage_parser/model.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 |
3 | import attr
4 |
5 | from aclimatise import model
6 | from aclimatise.yaml import AttrYamlMixin
7 |
8 |
9 | @attr.s(auto_attribs=True)
10 | class UsageElement(AttrYamlMixin):
11 | text: str
12 | """
13 | The name of this element, as defined in the usage section
14 | """
15 |
16 | optional: bool = False
17 | """
18 | Whether or not this element is required
19 | """
20 |
21 | variable: bool = False
22 | """
23 | True if this is a variable, ie you are supposed to replace this text with your own, False if this is a constant
24 | that you shouldn't change, e.g. the name of the application
25 | """
26 |
27 | # flag: bool = False
28 | """
29 | True if this is a flag (starts with dashes) and not a regular element
30 | """
31 |
32 | repeatable: bool = False
33 | """
34 | If this flag/argument can be used multiple times
35 | """
36 |
37 |
38 | @attr.s(auto_attribs=True)
39 | class UsageInstance(AttrYamlMixin):
40 | items: List[UsageElement]
41 | """
42 | The string of elements that make up a valid command invocation
43 | """
44 |
45 | description: Optional[str] = None
46 | """
47 | Description of this invocation
48 | """
49 |
50 | @property
51 | def positionals(self) -> List["model.Positional"]:
52 | """
53 | Return all the positional arguments that could be derived from this instance
54 | """
55 | return [
56 | model.Positional(
57 | description="", position=i, name=el.text, optional=el.optional
58 | )
59 | for i, el in enumerate(self.items)
60 | if isinstance(el, UsageElement)
61 | ]
62 |
63 | @property
64 | def flags(self) -> List["model.Flag"]:
65 | """
66 | Return all the flags that could be derived from this instance
67 | """
68 | return [el for el in self.items if isinstance(el, model.Flag)]
69 |
--------------------------------------------------------------------------------
/aclimatise/usage_parser/parser.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | from pyparsing import *
5 |
6 | from aclimatise.flag_parser.elements import description_line
7 | from aclimatise.parser import IndentCheckpoint, IndentParserMixin
8 | from aclimatise.usage_parser.elements import usage_example
9 | from aclimatise.usage_parser.model import UsageElement, UsageInstance
10 |
11 | from .elements import *
12 |
13 |
14 | def normalise_cline(tokens):
15 | """
16 | Normalise a command line string, such as ["dotnet", "Pisces.dll"], converting it to ["dotnet", "pisces"]
17 | :param tokens:
18 | :return:
19 | """
20 | return [Path(el.lower()).stem for el in tokens]
21 |
22 |
23 | class UsageParser(IndentParserMixin):
24 | def __init__(self):
25 | super().__init__()
26 |
27 | def visit_description_block(s, loc, toks):
28 | return "\n".join(toks)
29 |
30 | self.description_block = IndentCheckpoint(
31 | self.indent()
32 | + (self.peer_indent(allow_greater=True) + description_line)[1, ...]
33 | + self.dedent(precise=False),
34 | indent_stack=self.stack,
35 | ).setParseAction(visit_description_block)
36 |
37 | def visit_single_usage(s, loc, toks):
38 | return [UsageInstance(items=list(toks), description=None)]
39 |
40 | self.single_usage = usage_example.copy().setParseAction(visit_single_usage)
41 |
42 | def visit_described_usage(s, loc, toks):
43 | if len(toks) > 0 and isinstance(toks[-1], str):
44 | description = toks[-1]
45 | else:
46 | description = None
47 |
48 | return UsageInstance(items=list(toks[:-1]), description=description)
49 |
50 | self.described_usage = (
51 | usage_example + Optional(self.description_block)
52 | ).setParseAction(visit_described_usage)
53 |
54 | def visit_multi_usage(s, loc, toks):
55 | return list(toks)
56 |
57 | self.multi_usage = (
58 | LineEnd().suppress()
59 | + (
60 | IndentCheckpoint(
61 | # This indent ensures that every usage example is somewhat indented (more than column 1, at least),
62 | # and also sets the baseline from which the description block is measured
63 | self.indent() + self.described_usage
64 | # The pop here doesn't check that we have dedented, but rather it just resets the indentation so that
65 | # a new usage block can have a different indentation
66 | + self.pop_indent(),
67 | indent_stack=self.stack,
68 | )
69 | )[1, ...]
70 | ).setParseAction(visit_multi_usage)
71 |
72 | self.usage = (
73 | LineStart()
74 | + Regex("usage:", flags=re.IGNORECASE).suppress()
75 | + Optional(self.multi_usage | self.single_usage)
76 | ).setWhitespaceChars(
77 | "\t "
78 | ) # .setParseAction(visit_usage).setDebug()
79 |
80 | def parse_usage(self, cmd: List[str], usage: str, debug: bool = False) -> Command:
81 | # return self.usage.searchString(usage)
82 | usage_blocks = self.usage.setDebug(debug).searchString(usage)
83 | if not usage_blocks:
84 | # If we had no results, return an empty command
85 | return Command(command=cmd)
86 |
87 | instances = []
88 | all_positionals = []
89 | all_flags = []
90 | for block in usage_blocks:
91 | for instance in block:
92 |
93 | positional = [
94 | tok for tok in instance.items if isinstance(tok, UsageElement)
95 | ]
96 | flags = [tok for tok in instance.items if isinstance(tok, Flag)]
97 |
98 | # Remove an "options" argument which is just a proxy for other flags
99 | # positional = [pos for pos in positional if pos.text.lower() != "options"]
100 | # The usage often starts with a re-iteration of the command name itself. Remove this if present
101 | for i in range(len(positional)):
102 | # For each positional argument, if the entire cmd string is present, slice away this and everything before it
103 | end = i + len(cmd)
104 | if end <= len(positional) and normalise_cline(
105 | [pos.text for pos in positional[i:end]]
106 | ) == normalise_cline(cmd):
107 | positional = positional[end:]
108 |
109 | if not any([tok for tok in positional if tok.variable]):
110 | # If the usage didn't explicitly mark anything as a variable using < > brackets, we have to assume that
111 | # everything other than flags are positional elements
112 | for element in positional:
113 | element.variable = True
114 |
115 | instances.append(instance)
116 | # Convert these UsageElements into Positionals
117 | all_positionals += [
118 | Positional(
119 | description="", position=i, name=el.text, optional=el.optional
120 | )
121 | for i, el in enumerate(positional)
122 | ]
123 | all_flags += flags
124 |
125 | return Command(
126 | command=cmd,
127 | positional=Positional.deduplicate(all_positionals),
128 | named=Flag.deduplicate(all_flags),
129 | )
130 |
--------------------------------------------------------------------------------
/aclimatise/yaml.py:
--------------------------------------------------------------------------------
1 | from ruamel.yaml import YAML, yaml_object
2 | from ruamel.yaml.comments import CommentedMap
3 |
4 | yaml = YAML()
5 |
6 |
7 | class AttrYamlMixin:
8 | @classmethod
9 | def from_yaml(cls, constructor, node):
10 | state = CommentedMap()
11 | constructor.construct_mapping(node, state)
12 | return cls(**state)
13 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ../README.md
--------------------------------------------------------------------------------
/docs/_static/railroad.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/docs/_static/railroad.html
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | Python API
2 | ==========
3 | .. automodule:: aclimatise
4 | :members: parse_help, best_cmd, explore_command, execute_cmd
5 | :undoc-members:
6 | :show-inheritance:
7 |
8 |
--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
1 | Command Line Interface
2 | ======================
3 |
4 | .. click:: aclimatise.cli:main
5 | :prog: aclimatise
6 | :show-nested:
7 | :commands: explore, pipe
8 |
9 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = "aCLImatise"
21 | copyright = "2020, Michael Milton"
22 | author = "Michael Milton"
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = "0.0.16"
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | "sphinx.ext.autodoc",
35 | "sphinx_click.ext",
36 | ]
37 |
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ["_templates"]
40 |
41 | # List of patterns, relative to source directory, that match files and
42 | # directories to ignore when looking for source files.
43 | # This pattern also affects html_static_path and html_extra_path.
44 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
45 |
46 |
47 | # -- Options for HTML output -------------------------------------------------
48 |
49 | # The theme to use for HTML and HTML Help pages. See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = "alabaster"
53 |
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ["_static"]
58 |
--------------------------------------------------------------------------------
/docs/grammar.rst:
--------------------------------------------------------------------------------
1 | Grammar
2 | =======
3 |
4 | Internally, aCLImatise uses a `Parsing Expression Grammar `_,
5 | which is a class of recursive grammar used to parse programming languages. This grammar is expressed and parsed using
6 | the `PyParsing `_ Python library. To help visualise the grammar used to parse
7 | command-line help, here is a `Railroad Diagram `_ generated using
8 | PyParsing.
9 |
10 | The "terminal" nodes (circular) are either:
11 |
12 | * In quotes, e.g. ``':'``, which indicates a literal string
13 | * In the form ``W:(start, body)``, e.g. ``W:(0-9@-Za-z, \--9@-Z\\_a-z|)``, which indicates a word where the first character comes from the ``start`` list of characters, and the remaining characters come from the ``body`` characters
14 | * In the form ``Re: pattern``, which indicates a regular expression pattern used to match this terminal
15 | * Whitespace nodes, e.g. ````, which list the types of whitespace being parsed by that terminal
16 | * Certain other special nodes like ``Empty``, and ``LineStart`` which match based on custom code. Where possible, these are annotated with what they are designed to match, for example ``UnIndent`` matches an unindent in the input file.
17 |
18 | The "non-terminal" nodes (square) refer to subsections of the diagram, which are spelled-out under the subheading with
19 | the same name.
20 |
21 | To read the diagram, start with ``FlagList``, the start node, and from there follow the lines along any branch of the path that goes forward (although some paths end up turning backwards to indicate loops). Any string that matches the sequence of tokens you encounter along that path will be parsed by the grammar.
22 |
23 | .. raw:: html
24 | :file: _static/railroad.html
25 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. aCLImatise documentation master file, created by
2 | sphinx-quickstart on Mon May 11 16:46:56 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to aCLImatise's documentation!
7 | ======================================
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 | :caption: Contents:
12 |
13 | installation
14 | api
15 | cli
16 | model
17 | grammar
18 | changes
19 | aCLImatise Base Camp
20 |
21 | .. include:: ../README.rst
22 |
23 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | To install ``aCLImatise``, run:
5 |
6 | .. code-block:: bash
7 |
8 | pip install aclimatise
9 | python -m spacy download en # Install an internal language model
10 |
11 | Now you can use either the :doc:`Python API ` or the :doc:`CLI `.
12 |
13 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/model.rst:
--------------------------------------------------------------------------------
1 | Data Model
2 | ==========
3 |
4 | Command
5 | -------
6 | .. autoclass:: aclimatise.model.Command
7 | :members:
8 |
9 | Command Inputs
10 | --------------
11 | .. autoclass:: aclimatise.model.CliArgument
12 | :members:
13 | .. autoclass:: aclimatise.model.Positional
14 | :members:
15 | .. autoclass:: aclimatise.model.Flag
16 | :members:
17 | .. autoclass:: aclimatise.model.FlagSynonym
18 | :members:
19 |
20 | Flag Arguments
21 | --------------
22 | .. autoclass:: aclimatise.model.FlagArg
23 | :members:
24 | .. autoclass:: aclimatise.model.EmptyFlagArg
25 | :members:
26 | .. autoclass:: aclimatise.model.OptionalFlagArg
27 | :members:
28 | .. autoclass:: aclimatise.model.SimpleFlagArg
29 | :members:
30 | .. autoclass:: aclimatise.model.RepeatFlagArg
31 | :members:
32 | .. autoclass:: aclimatise.model.ChoiceFlagArg
33 | :members:
34 |
35 | Argument Types
36 | --------------
37 | .. automodule:: aclimatise.cli_types
38 | :members:
39 | :undoc-members:
40 | :show-inheritance:
41 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: aclimatise-test
2 | channels:
3 | - conda-forge
4 | - bioconda
5 | dependencies:
6 | - python>=3.7.5
7 | - bwa==0.7.17
8 | - samtools=1.9
9 | - bedtools==2.26.0
10 | - htseq==0.12.4
11 | - dinosaur==1.1.3
12 | - pisces==5.2.9.122
13 | - genomethreader==1.7.1
14 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = aclimatise
3 | version = 3.0.1
4 | description = aCLImatise is a Python library and command-line utility for parsing the help output of a command-line tool and then outputting a description of the tool in a more structured format
5 | long_description = file: README.rst
6 | long_description_content_type: text/x-rst
7 | license = GPLv3
8 | classifiers =
9 | License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10 | Programming Language :: Python :: 3
11 | Programming Language :: Python :: 3.7
12 | Programming Language :: Python :: 3.8
13 | Intended Audience :: Developers
14 | Natural Language :: English
15 |
16 | [tool:pytest]
17 | log_level = INFO
18 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from setuptools import find_packages, setup
3 |
4 | setup(
5 | packages=find_packages(exclude="test"),
6 | install_requires=[
7 | "pyparsing",
8 | "jinja2",
9 | "spacy~=3.0",
10 | "miniwdl",
11 | "wordsegment",
12 | "inflection",
13 | "illusional.wdlgen==0.3.0",
14 | "ruamel.yaml==0.16.5",
15 | "click",
16 | "cwltool",
17 | "cwl-utils>=0.4",
18 | "regex",
19 | "num2words",
20 | "word2number",
21 | "psutil",
22 | "deprecated",
23 | "attrs",
24 | "janis-pipelines.core>=0.11.2",
25 | ],
26 | python_requires=">=3.6",
27 | entry_points={"console_scripts": ["aclimatise = aclimatise.cli:main"]},
28 | extras_require={
29 | "dev": [
30 | "pytest",
31 | "pre-commit",
32 | "Sphinx",
33 | "sphinx-click",
34 | "pytest-timeout",
35 | "docker",
36 | ],
37 | },
38 | )
39 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/__init__.py
--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 | import pytest
4 | from pkg_resources import resource_filename
5 |
6 | from aclimatise.converter.yml import YmlGenerator
7 | from aclimatise.execution.local import LocalExecutor
8 | from aclimatise.flag_parser.parser import CliParser
9 | from aclimatise.usage_parser.parser import UsageParser
10 | from aclimatise.yaml import yaml
11 |
12 |
13 | @pytest.fixture()
14 | def usage_parser():
15 | return UsageParser()
16 |
17 |
18 | @pytest.fixture()
19 | def local_executor():
20 | return LocalExecutor()
21 |
22 |
23 | @pytest.fixture()
24 | def yaml_converter():
25 | return YmlGenerator()
26 |
27 |
28 | @pytest.fixture()
29 | def bedtools_cmd():
30 | with open(resource_filename(__name__, "test_data/bedtools/bedtools.yml")) as fp:
31 | return yaml.load(fp)
32 |
33 |
34 | @pytest.fixture()
35 | def samtools_cmd():
36 | with open(resource_filename(__name__, "test_data/samtools/samtools.yml")) as fp:
37 | return yaml.load(fp)
38 |
39 |
40 | @pytest.fixture
41 | def samtools_help():
42 | with open(resource_filename(__name__, "test_data/samtools.txt")) as fp:
43 | return fp.read()
44 |
45 |
46 | @pytest.fixture
47 | def htseq_help():
48 | with open(resource_filename(__name__, "test_data/htseq_count.txt")) as fp:
49 | return fp.read()
50 |
51 |
52 | @pytest.fixture
53 | def bwamem_help():
54 | with open(resource_filename(__name__, "test_data/bwa_mem.txt")) as fp:
55 | return fp.read()
56 |
57 |
58 | @pytest.fixture
59 | def pisces_help():
60 | with open(resource_filename(__name__, "test_data/pisces.txt")) as fp:
61 | return fp.read()
62 |
63 |
64 | @pytest.fixture
65 | def bwa_help():
66 | with open(resource_filename(__name__, "test_data/bwa.txt")) as fp:
67 | return fp.read()
68 |
69 |
70 | @pytest.fixture
71 | def bwa_bwt2sa_help():
72 | with open(resource_filename(__name__, "test_data/bwa_bwt2sa.txt")) as fp:
73 | return fp.read()
74 |
75 |
76 | @pytest.fixture
77 | def bedtools_help():
78 | with open(resource_filename(__name__, "test_data/bedtools.txt")) as fp:
79 | return fp.read()
80 |
81 |
82 | @pytest.fixture
83 | def bedtools_coverage_help():
84 | with open(resource_filename(__name__, "test_data/bedtools_coverage.txt")) as fp:
85 | return fp.read()
86 |
87 |
88 | @pytest.fixture
89 | def podchecker_help():
90 | with open(resource_filename(__name__, "test_data/podchecker.txt")) as fp:
91 | return fp.read()
92 |
93 |
94 | @pytest.fixture()
95 | def process():
96 | def process_help_section(help):
97 | """
98 | Does some preprocessing on a help text segment to facilitate testing
99 | """
100 | help = help.strip("\n")
101 | return dedent(help)
102 |
103 | return process_help_section
104 |
--------------------------------------------------------------------------------
/test/executors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/executors/__init__.py
--------------------------------------------------------------------------------
/test/executors/test_docker.py:
--------------------------------------------------------------------------------
1 | import docker
2 | import pytest
3 |
4 | from aclimatise.execution.docker import DockerExecutor
5 |
6 |
7 | @pytest.mark.timeout(360)
8 | def test_docker_image_saved(bwamem_help):
9 | client = docker.from_env()
10 | container = client.containers.run(
11 | "biocontainers/bwa:v0.7.17_cv1",
12 | entrypoint=["sleep", "999999999"],
13 | detach=True,
14 | )
15 |
16 | exec = DockerExecutor(container)
17 | cmd = exec.convert(["bwa", "mem"])
18 | assert cmd.docker_image == "biocontainers/bwa:v0.7.17_cv1"
19 |
20 |
21 | def test_docker(bwamem_help):
22 | client = docker.from_env()
23 | container = client.containers.run(
24 | "biocontainers/bwa:v0.7.17_cv1",
25 | entrypoint=["sleep", "999999999"],
26 | detach=True,
27 | )
28 |
29 | exec = DockerExecutor(container)
30 | output = exec.execute(["bwa", "mem"])
31 | assert output == bwamem_help
32 | container.kill()
33 |
34 |
35 | @pytest.mark.timeout(360)
36 | def test_docker_kill():
37 | """
38 | Test that the DockerExecutor can kill the command if it times out
39 | """
40 | client = docker.from_env(timeout=99999)
41 | container = client.containers.run(
42 | "ubuntu:latest",
43 | entrypoint=["sleep", "999999999"],
44 | detach=True,
45 | )
46 |
47 | exec = DockerExecutor(container)
48 | output = exec.execute(["sleep", "999999"])
49 | container.kill()
50 | assert output == ""
51 |
52 |
53 | def test_no_output():
54 | # Check that it doesn't crash when no output is received
55 |
56 | client = docker.from_env()
57 | container = client.containers.run(
58 | "quay.io/biocontainers/gadem:1.3.1--h516909a_2",
59 | entrypoint=["sleep", "9999999"],
60 | detach=True,
61 | )
62 | exec = DockerExecutor(container)
63 | output = exec.execute(["gadem"])
64 | container.kill()
65 | assert output is not None
66 |
67 |
68 | @pytest.mark.timeout(360)
69 | def test_infinite_output():
70 | """
71 | Test that the DockerExecutor can kill the command if it's constantly producing output
72 | """
73 | client = docker.from_env(timeout=99999)
74 | container = client.containers.run(
75 | "ubuntu:latest",
76 | entrypoint=["sleep", "999999999"],
77 | detach=True,
78 | )
79 |
80 | exec = DockerExecutor(container)
81 | output = exec.execute(["yes"])
82 | container.kill()
83 | assert output.startswith("y")
84 |
--------------------------------------------------------------------------------
/test/executors/test_local.py:
--------------------------------------------------------------------------------
1 | from aclimatise.execution.local import LocalExecutor
2 |
3 | from ..util import skip_not_installed
4 |
5 |
6 | @skip_not_installed("bwa")
7 | def test_local(bwamem_help):
8 | exec = LocalExecutor()
9 | output = exec.execute(["bwa", "mem"])
10 | assert output == bwamem_help
11 |
--------------------------------------------------------------------------------
/test/executors/test_man.py:
--------------------------------------------------------------------------------
1 | from test.util import skip_not_installed
2 |
3 | from aclimatise.execution.man import ManPageExecutor
4 |
5 |
6 | @skip_not_installed("git")
7 | @skip_not_installed("man")
8 | def test_git():
9 | cmd = ManPageExecutor(max_length=99999).explore(
10 | ["git"],
11 | )
12 | assert len(cmd.positional) > 20
13 |
14 |
15 | @skip_not_installed("git")
16 | @skip_not_installed("ls")
17 | def test_ls():
18 | cmd = ManPageExecutor().explore(
19 | ["ls"],
20 | )
21 | assert {"-A", "--almost-all", "-1", "--context"} <= cmd.all_synonyms
22 |
--------------------------------------------------------------------------------
/test/flags/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from aclimatise.flag_parser.parser import CliParser
4 |
5 |
6 | @pytest.fixture
7 | def parser():
8 | return CliParser()
9 |
--------------------------------------------------------------------------------
/test/flags/test_bedtools.py:
--------------------------------------------------------------------------------
1 | def test_bedtools_block(parser, process):
2 | txt = """
3 | [ Multi-way file comparisons ]
4 | multiinter Identifies common intervals among multiple interval files.
5 | unionbedg Combines coverage intervals from multiple BEDGRAPH files.
6 |
7 | [ Paired-end manipulation ]
8 | """
9 | blocks = parser.flags.searchString(txt)
10 | assert len(blocks) == 1, "This comprises only one block of flags"
11 | assert len(blocks[0]) == 2, "The single block contains 2 positional arguments"
12 |
13 |
14 | def test_bedtools_root(parser, bedtools_help):
15 | command = parser.parse_command(bedtools_help, ["bedtools"])
16 | assert len(command.named) == 1
17 | assert len(command.positional) == 43
18 |
--------------------------------------------------------------------------------
/test/flags/test_bwa.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from textwrap import dedent
3 |
4 | import pytest
5 |
6 | from aclimatise.flag_parser import elements
7 | from aclimatise.integration import parse_help
8 | from aclimatise.model import Flag, FlagSynonym, OptionalFlagArg
9 |
10 |
11 | def test_flag_arg(parser):
12 | result = elements.flag_with_arg.parseString("-A INT")[0]
13 | assert isinstance(result, FlagSynonym)
14 | assert result.argtype.name == "INT"
15 | assert result.name == "-A"
16 |
17 |
18 | def test_flag(parser):
19 | result = parser.flag.parseString(
20 | "-A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]"
21 | )[0]
22 | assert isinstance(result, Flag)
23 | assert result.synonyms[0] == "-A"
24 | assert result.args.name == "INT"
25 |
26 |
27 | def test_flag_b(parser):
28 | result = parser.flag.parseString("-B INT penalty for a mismatch [4]")
29 | print(result)
30 |
31 |
32 | def test_multiarg_flag(parser):
33 | result = parser.flag.parseString(
34 | "-O INT[,INT] gap open penalties for deletions and insertions [6,6]"
35 | )[0]
36 | assert isinstance(result, Flag)
37 |
38 |
39 | def test_flags(parser):
40 | result = parser.flags.parseString(
41 | """
42 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]
43 | -B INT penalty for a mismatch [4]
44 | """,
45 | parseAll=True,
46 | )
47 |
48 |
49 | def test_bwa_segmented_options(parser):
50 | result = parser.flag_block.parseString(
51 | """
52 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]
53 | -B INT penalty for a mismatch [4]
54 | -O INT[,INT] gap open penalties for deletions and insertions [6,6]
55 | -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
56 | -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5]
57 | -U INT penalty for an unpaired read pair [17]
58 |
59 | -x STR read type. Setting -x changes multiple parameters unless overriden [null]
60 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)
61 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)
62 | intractg: -B9 -O16 -L5 (intra-species contigs to ref)
63 | """,
64 | parseAll=True,
65 | )
66 | assert len(result) == 7
67 |
68 |
69 | def test_bwa_help_part(parser):
70 | results = list(
71 | parser.flags.scanString(
72 | """
73 | Algorithm options:
74 |
75 | -t INT number of threads [1]
76 | -k INT minimum seed length [19]
77 | -w INT band width for banded alignment [100]
78 | -d INT off-diagonal X-dropoff [100]
79 | -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
80 | -y INT seed occurrence for the 3rd round seeding [20]
81 | -c INT skip seeds with more than INT occurrences [500]
82 | -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
83 | -W INT discard a chain if seeded bases shorter than INT [0]
84 | -m INT perform at most INT rounds of mate rescues for each read [50]
85 | -S skip mate rescue
86 | -P skip pairing; mate rescue performed unless -S also in use
87 | """
88 | )
89 | )
90 | assert len(results) == 1
91 |
92 | for tokens, start, end in results:
93 | assert len(tokens) == 12
94 |
95 |
96 | def test_bwa_multisection(parser):
97 | s = """
98 | Scoring options:
99 |
100 | -x STR read type. Setting -x changes multiple parameters unless overriden [null]
101 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)
102 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)
103 | intractg: -B9 -O16 -L5 (intra-species contigs to ref)
104 |
105 | Input/output options:
106 |
107 | -p smart pairing (ignoring in2.fq)
108 | """
109 | result_lists = list(parser.flags.scanString(s))
110 | assert len(result_lists) == 2
111 | for result_list, b, c in result_lists:
112 | assert len(result_list) == 1
113 |
114 |
115 | def test_complex_optionals(parser):
116 | s = """
117 | -I FLOAT[,FLOAT[,INT[,INT]]]
118 | specify the mean, standard deviation (10% of the mean if absent), max
119 | (4 sigma from the mean if absent) and min of the insert size distribution.
120 | FR orientation only. [inferred]
121 | """
122 | results = list(parser.flag_block.parseString(s))[0]
123 | assert isinstance(results, Flag)
124 | assert isinstance(results.args, OptionalFlagArg)
125 | assert results.args.names == ["FLOAT", "FLOAT", "INT", "INT"]
126 |
127 |
128 | def test_bwa_skipping(parser):
129 | s = """
130 | Input/output options:
131 |
132 | -p smart pairing (ignoring in2.fq)
133 | -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null]
134 | -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]
135 | -o FILE sam file to output results to [stdout]
136 | -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file)
137 | -5 for split alignment, take the alignment with the smallest coordinate as primary
138 | """
139 | cmd = parser.parse_command(cmd=s, name=["bwa", "mem"])
140 | assert len(cmd.named) == 6
141 |
142 |
143 | def test_bwa_root(bwa_help):
144 | command = parse_help(["bwa"], bwa_help)
145 | assert len(command.named) == 0
146 | assert len(command.positional) == 14
147 | assert command.positional[0].name == "index"
148 | assert command.positional[-1].name == "bwt2sa"
149 |
150 |
151 | def test_bwa(parser, bwamem_help):
152 | # Parse help
153 | command = parse_help(["bwa", "mem"], text=bwamem_help)
154 |
155 | assert len(command.named) == 36
156 | assert len(command.positional) == 3
157 |
--------------------------------------------------------------------------------
/test/flags/test_bwakit.py:
--------------------------------------------------------------------------------
1 | def test_single_flag(parser):
2 |
3 | txt = """
4 | --use_strict (enforce strict mode)
5 | type: bool default: false
6 | """
7 |
8 | result = parser.flag_block.parseString(txt)[0]
9 | assert "type: bool" in result.description
10 |
11 |
12 | def test_multiple_flags(parser):
13 |
14 | txt = """
15 | --use_strict (enforce strict mode)
16 | type: bool default: false
17 | --es5_readonly (activate correct semantics for inheriting readonliness)
18 | type: bool default: true
19 | """
20 |
21 | result = parser.flag_block.setDebug().parseString(txt)
22 | assert len(result) == 2
23 |
--------------------------------------------------------------------------------
/test/flags/test_gth.py:
--------------------------------------------------------------------------------
1 | from aclimatise.model import Flag
2 |
3 |
4 | def test_unindented_flags(parser):
5 | """
6 | Verify that we can parse blocks of flags that aren't intended (which is unusual)
7 | """
8 |
9 | text = """
10 | -genomic specify input files containing genomic sequences
11 | mandatory option
12 | -cdna specify input files containing cDNA/EST sequences
13 | -protein specify input files containing protein sequences
14 | """.strip()
15 | flags = parser.flags.parseString(text)
16 | assert len(flags) == 3
17 | for flag in flags:
18 | assert isinstance(flag, Flag)
19 |
--------------------------------------------------------------------------------
/test/flags/test_htseq.py:
--------------------------------------------------------------------------------
1 | """
2 | Uses htseq-count, which is used as an example of a Python argparse CLI
3 | """
4 | import shutil
5 | from textwrap import dedent
6 |
7 | import pytest
8 |
9 | from aclimatise.flag_parser import elements
10 | from aclimatise.model import EmptyFlagArg, FlagSynonym, RepeatFlagArg
11 |
12 |
13 | def test_short(parser):
14 | flag = elements.flag_with_arg.parseString(
15 | dedent(
16 | """
17 | -i IDATTR
18 | """
19 | )
20 | )[0]
21 | assert isinstance(flag, FlagSynonym)
22 |
23 |
24 | def test_long_short_synonyms(parser):
25 | flag = elements.flag_synonyms.parseString(
26 | dedent(
27 | """
28 | -i IDATTR, --idattr IDATTR
29 | """
30 | )
31 | )[0]
32 | print(flag)
33 |
34 |
35 | def test_long_short_desc(parser):
36 | flag = parser.flag_block.parseString(
37 | """
38 | -i IDATTR, --idattr IDATTR
39 | GFF attribute to be used as feature ID (default,
40 | suitable for Ensembl GTF files: gene_id)
41 | """
42 | )[0]
43 | print(flag)
44 |
45 |
46 | def test_long_short_choices(parser):
47 | flag = parser.flag_block.parseString(
48 | """
49 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
50 | mode to handle reads overlapping more than one feature
51 | (choices: union, intersection-strict, intersection-
52 | nonempty; default: union)
53 | """
54 | )
55 |
56 |
57 | def test_help_section_preamble(parser):
58 | flags = list(
59 | parser.flags.searchString(
60 | dedent(
61 | """
62 | optional arguments:
63 | -h, --help show this help message and exit
64 | -f {sam,bam}, --format {sam,bam}
65 | type of data, either 'sam' or 'bam'
66 | (default: sam)
67 | """
68 | )
69 | )
70 | )[0]
71 | assert len(flags) == 2
72 |
73 |
74 | def test_repeat_type(parser):
75 | flag = elements.flag_synonyms.parseString(
76 | "--additional-attr ADDITIONAL_ATTR [ADDITIONAL_ATTR ...]"
77 | )[0]
78 | assert flag.name == "--additional-attr"
79 | assert isinstance(flag.argtype, RepeatFlagArg)
80 | assert flag.argtype.name == "ADDITIONAL_ATTR"
81 |
82 |
83 | def test_full_flags(parser):
84 | results = parser.flags.parseString(
85 | """
86 | -h, --help show this help message and exit
87 | -f {sam,bam}, --format {sam,bam}
88 | type of data, either 'sam' or 'bam'
89 | (default: sam)
90 | -r {pos,name}, --order {pos,name}
91 | 'pos' or 'name'. Sorting order of
92 | (default: name). Paired-end sequencing data must be
93 | sorted either by position or by read name, and the
94 | sorting order must be specified. Ignored for single-
95 | end data.
96 | --max-reads-in-buffer MAX_BUFFER_SIZE
97 | When is paired end sorted by
98 | position, allow only so many reads to stay in memory
99 | until the mates are found (raising this number will
100 | use more memory). Has no effect for single end or
101 | paired end sorted by name
102 | -s {yes,no,reverse}, --stranded {yes,no,reverse}
103 | whether the data is from a strand-specific assay.
104 | Specify 'yes', 'no', or 'reverse' (default: yes).
105 | 'reverse' means 'yes' with reversed strand
106 | interpretation
107 | -a MINAQUAL, --minaqual MINAQUAL
108 | skip all reads with alignment quality lower than the
109 | given minimum value (default: 10)
110 | -t FEATURETYPE, --type FEATURETYPE
111 | feature type (3rd column in GFF file) to be used, all
112 | features of other type are ignored (default, suitable
113 | for Ensembl GTF files: exon)
114 | -i IDATTR, --idattr IDATTR
115 | GFF attribute to be used as feature ID (default,
116 | suitable for Ensembl GTF files: gene_id)
117 | --additional-attr ADDITIONAL_ATTR
118 | Additional feature attributes (default: none, suitable
119 | for Ensembl GTF files: gene_name). Use multiple times
120 | for each different attribute
121 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
122 | mode to handle reads overlapping more than one feature
123 | (choices: union, intersection-strict, intersection-
124 | nonempty; default: union)
125 | --nonunique {none,all}
126 | Whether to score reads that are not uniquely aligned
127 | or ambiguously assigned to features
128 | --secondary-alignments {score,ignore}
129 | Whether to score secondary alignments (0x100 flag)
130 | --supplementary-alignments {score,ignore}
131 | Whether to score supplementary alignments (0x800 flag)
132 | -o SAMOUTS, --samout SAMOUTS
133 | write out all SAM alignment records into SAM files
134 | (one per input file needed), annotating each line with
135 | its feature assignment (as an optional field with tag
136 | 'XF')
137 | -q, --quiet suppress progress report
138 | """
139 | )
140 | assert len(list(results)) == 15
141 |
142 |
143 | def test_choice(parser):
144 | flag = elements.flag_with_arg.parseString("--format {sam,bam}")[0]
145 | assert flag.name == "--format"
146 |
147 | # Both sets should be the same
148 | assert len(flag.argtype.choices & {"sam", "bam"}) == 2
149 |
150 |
151 | def test_noarg(parser):
152 | flag = parser.flag.parseString("-q, --quiet suppress progress report")[0]
153 | assert flag.longest_synonym == "--quiet"
154 | assert len(flag.synonyms) == 2
155 | assert isinstance(flag.args, EmptyFlagArg)
156 |
157 |
158 | @pytest.mark.skipif(
159 | not shutil.which("htseq-count"), reason="htseq-count is not installed"
160 | )
161 | def test_full(parser, local_executor):
162 | # Parse help
163 | help_text = local_executor.execute(["htseq-count", "--help"])
164 | flag_sections = parser.flags.searchString(help_text)
165 | # There is one section for positional arguments and one for named arguments
166 | assert len(flag_sections) == 2
167 | # There are two positional arguments
168 | assert len(flag_sections[0]) == 2
169 | # There are at least 15 flags
170 | assert len(flag_sections[1]) >= 15
171 |
--------------------------------------------------------------------------------
/test/flags/test_pisces.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 | from pkg_resources import resource_filename
4 |
5 | from aclimatise.flag_parser import elements
6 | from aclimatise.flag_parser.parser import CliParser
7 | from aclimatise.model import SimpleFlagArg
8 |
9 |
10 | def test_pisces_flag(parser):
11 | cmd = """
12 | --targetlodfrequency, --targetvf
13 | """
14 | flag_synonyms = elements.flag_synonyms.parseString(cmd)
15 | # There is one section for positional arguments and one for named arguments
16 | assert len(flag_synonyms) == 2
17 | assert isinstance(flag_synonyms[1].argtype, SimpleFlagArg)
18 | assert flag_synonyms[1].argtype.name == "FLOAT"
19 |
20 |
21 | def test_pisces_arg(parser):
22 | cmd = """
23 | --targetlodfrequency, --targetvf
24 | FLOAT Target Frequency to call a variant. Ie, to
25 | target a 5% allele frequency, we must call down
26 | to 2.6%, to capture that 5% allele 95% of the
27 | time. This parameter is used by the Somatic
28 | Genotyping Model
29 | """
30 | flag = parser.flag_block.parseString(cmd)[0]
31 |
32 | assert len(flag.synonyms) == 2
33 | assert flag.description.startswith("FLOAT Target Frequency")
34 | assert flag.args.name == "FLOAT"
35 |
36 |
37 | def test_pisces_arg_2(parser):
38 | cmd = """
39 | --vqfilter, --variantqualityfilter
40 | INT FilteredVariantQScore to report variant as
41 | filtered
42 | """
43 | flag = parser.flag_block.parseString(cmd)[0]
44 |
45 | assert len(flag.synonyms) == 2
46 | assert flag.description.startswith("INT FilteredVariantQScore ")
47 | assert flag.args.name == "INT"
48 |
49 |
50 | def test_pisces_indent_dedent(parser):
51 | cmd = """
52 | -i, --intervalpaths
53 | PATHS IntervalPath(s), single value or comma
54 | delimited list corresponding to BAMPath(s). At
55 | most one value should be provided if BAM folder
56 | is specified
57 | --coveragemethod
58 | STRING'approximate' or 'exact'. Exact is more
59 | precise but requires more memory (minimum 8 GB).
60 | Default approximate
61 | --baselogname STRING
62 | -d, --debug BOOL
63 | --usestitchedxd BOOL Set to true to make use of the consensus
64 | read-direction information (the XD tag) from
65 | stitched reads. This is on by default when using
66 | Stitcher output bam, but must be deliberately
67 | set for Gemini output.
68 | """
69 | flags = parser.flag_block.parseString(cmd)
70 |
71 | assert len(flags) == 5
72 |
73 | assert isinstance(flags[0].args, SimpleFlagArg)
74 | assert flags[0].synonyms == ["-i", "--intervalpaths"]
75 |
76 | assert isinstance(flags[3].args, SimpleFlagArg)
77 | assert flags[3].synonyms == ["-d", "--debug"]
78 | assert flags[3].description == "BOOL"
79 |
80 |
81 | def test_pisces_triple_long_flag_synonyms(parser):
82 | cmd = "--minvf, --minimumvariantfrequency, --minimumfrequency "
83 | synonyms = elements.flag_synonyms.parseString(cmd)
84 |
85 | assert len(synonyms) == 3
86 |
87 |
88 | def test_pisces_triple_long_flag(parser):
89 | cmd = """
90 | --minvf, --minimumvariantfrequency, --minimumfrequency
91 | FLOAT MinimumFrequency to call a variant
92 | """
93 | flag = parser.flag_block.parseString(cmd)[0]
94 |
95 | assert len(flag.synonyms) == 3
96 | assert flag.description.startswith("FLOAT MinimumFrequency")
97 |
98 |
99 | def test_pisces_quad_flag_synonyms(parser):
100 | cmd = "-c, --mindp, --mindepth, --mincoverage "
101 | synonyms = elements.flag_synonyms.parseString(cmd)
102 |
103 | assert len(synonyms) == 4
104 |
105 |
106 | def test_pisces_quad_flag(parser):
107 | cmd = """
108 | -c, --mindp, --mindepth, --mincoverage
109 | INT Minimum depth to call a variant
110 | """
111 | flag = parser.flag_block.parseString(cmd)[0]
112 |
113 | assert len(flag.synonyms) == 4
114 | assert flag.description.startswith("INT Minimum")
115 |
116 |
117 | def test_pisces_multi_indent(parser):
118 | cmd = """
119 | --minvq, --minvariantqscore
120 | INT MinimumVariantQScore to report variant
121 | -c, --mindp, --mindepth, --mincoverage
122 | INT Minimum depth to call a variant
123 | --minvf, --minimumvariantfrequency, --minimumfrequency
124 | FLOAT MinimumFrequency to call a variant
125 | --targetlodfrequency, --targetvf
126 | FLOAT Target Frequency to call a variant. Ie, to
127 | target a 5% allele frequency, we must call down
128 | to 2.6%, to capture that 5% allele 95% of the
129 | time. This parameter is used by the Somatic
130 | Genotyping Model
131 | --vqfilter, --variantqualityfilter
132 | INT FilteredVariantQScore to report variant as
133 | filtered
134 |
135 | """
136 | flags = parser.flags.parseString(cmd)
137 |
138 | assert len(flags) == 5
139 |
140 |
141 | def test_pisces(parser, pisces_help):
142 | # Parse help
143 | flag_sections = parser.flags.searchString(pisces_help)
144 | # There is one section for positional arguments and one for named arguments
145 | assert len(flag_sections) == 5
146 |
147 | # There are two arguments in the first block
148 | assert len(flag_sections[0]) == 2
149 |
150 | # There are 23 arguments in the second block
151 | assert len(flag_sections[1]) == 24
152 |
153 | # There are 4 arguments in the third block
154 | assert len(flag_sections[2]) == 4
155 |
156 | # There are 23 arguments in the fourth block
157 | assert len(flag_sections[3]) == 23
158 |
159 | # There are 6 arguments in the fifth block
160 | assert len(flag_sections[4]) == 6
161 |
162 | # The very first argument has 3 synonyms
163 | assert len(flag_sections[0][0].synonyms) == 3
164 |
--------------------------------------------------------------------------------
/test/flags/test_podchecker.py:
--------------------------------------------------------------------------------
1 | from aclimatise.model import Flag
2 |
3 |
4 | def test_podchecker_flags(parser):
5 | cmd = """
6 | -warnings -nowarnings
7 | Turn on/off printing of warnings. Repeating -warnings increases
8 | the warning level, i.e. more warnings are printed. Currently
9 | increasing to level two causes flagging of unescaped "<,>"
10 | characters.
11 | """
12 | flag = parser.flag_block.parseString(cmd)
13 | assert isinstance(flag[0], Flag)
14 | assert len(flag[0].synonyms) == 2
15 |
16 |
17 | def test_podchecker(podchecker_help, parser):
18 | cmd = """
19 | Options and Arguments:
20 | -help Print a brief help message and exit.
21 |
22 | -man Print the manual page and exit.
23 |
24 | -warnings -nowarnings
25 | Turn on/off printing of warnings. Repeating -warnings increases
26 | the warning level, i.e. more warnings are printed. Currently
27 | increasing to level two causes flagging of unescaped "<,>"
28 | characters.
29 |
30 | file The pathname of a POD file to syntax-check (defaults to standard
31 | input).
32 | """
33 | flags = parser.flags.searchString(cmd)[0]
34 | assert len(flags) == 4
35 |
--------------------------------------------------------------------------------
/test/flags/test_samtools.py:
--------------------------------------------------------------------------------
1 | import shutil
2 |
3 | import pytest
4 |
5 | from aclimatise.model import Flag
6 |
7 |
8 | def test_samtools_bedcov_j(parser):
9 | text = """
10 | -j do not include deletions (D) and ref skips (N) in bedcov computation
11 | """
12 | flag = parser.flag_block.parseString(text)[0]
13 | assert isinstance(flag, Flag)
14 | assert flag.synonyms[0] == "-j"
15 |
16 |
17 | def test_samtools_bedcov_qjfmt(parser):
18 | text = """
19 | -Q mapping quality threshold [0]
20 | -j do not include deletions (D) and ref skips (N) in bedcov computation
21 | --input-fmt-option OPT[=VAL]
22 | Specify a single input file format option in the form
23 | of OPTION or OPTION=VALUE
24 | """
25 | flags = list(parser.flags.setDebug().searchString(text)[0])
26 | assert len(flags) == 3
27 |
28 |
29 | def test_samtools(parser, samtools_help):
30 | # Parse the root samtools command
31 | samtools = parser.parse_command(name=["samtools"], cmd=samtools_help)
32 | assert len(samtools.named) == 0
33 | assert len(samtools.positional) > 25
34 |
35 |
36 | @pytest.mark.skipif(not shutil.which("samtools"), reason="samtools is not installed")
37 | def test_samtools_index(parser, local_executor):
38 | # Parse help
39 | help_text = local_executor.execute(["samtools", "index"])
40 | flag_sections = parser.flags.searchString(help_text)
41 | # There is one section for positional arguments and one for named arguments
42 | assert len(flag_sections) == 1
43 | # There are two positional arguments
44 | assert len(flag_sections[0]) == 4
45 |
--------------------------------------------------------------------------------
/test/flags/test_singularity.py:
--------------------------------------------------------------------------------
1 | import shutil
2 |
3 | import pytest
4 |
5 | from aclimatise.flag_parser.parser import CliParser
6 |
7 |
8 | def test_singularity_style_flags(parser):
9 | flag = parser.flag_block.parseString(
10 | " -n|--name Specify a custom container name (first priority)"
11 | )[0]
12 | assert len(flag.synonyms) == 2
13 | assert flag.synonyms == ["-n", "--name"]
14 |
15 |
16 | @pytest.mark.skipif(
17 | not shutil.which("singularity"), reason="singularity is not installed"
18 | )
19 | def test_singularity_pull(local_executor):
20 | parser = CliParser(parse_positionals=False)
21 |
22 | # Parse help
23 | help_text = local_executor.execute(["singularity", "pull", "--help"])
24 | flag_sections = parser.flags.searchString(help_text)
25 | # There is one section for positional arguments and one for named arguments
26 | assert len(flag_sections) == 1
27 | # There are two positional arguments
28 | assert len(flag_sections[0]) >= 5
29 |
--------------------------------------------------------------------------------
/test/name_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/name_generation/__init__.py
--------------------------------------------------------------------------------
/test/name_generation/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from aclimatise.converter import WrapperGenerator
4 |
5 |
6 | @pytest.fixture()
7 | def snake_gen():
8 | return WrapperGenerator(case="snake", generate_names=True)
9 |
10 |
11 | @pytest.fixture()
12 | def camel_gen():
13 | return WrapperGenerator(case="camel", generate_names=True)
14 |
15 |
16 | @pytest.fixture()
17 | def gen():
18 | return WrapperGenerator()
19 |
--------------------------------------------------------------------------------
/test/name_generation/test_batch.py:
--------------------------------------------------------------------------------
1 | """
2 | Test all the test data files
3 | """
4 | import pytest
5 | from pkg_resources import resource_filename
6 |
7 | from aclimatise import WrapperGenerator, parse_help
8 |
9 | from ..util import HelpText, all_tests, convert_validate, validate_cwl, validate_wdl
10 |
11 |
12 | @pytest.mark.parametrize("test", all_tests)
13 | def test_all(test: HelpText):
14 | """
15 | Tests that generate_names can work on real-life Commands without exceeding reasonable system resources
16 | """
17 | with open(resource_filename("test", test.path)) as fp:
18 | help_text = fp.read()
19 |
20 | cmd = parse_help(test.cmd, help_text)
21 |
22 | WrapperGenerator().choose_variable_names([*cmd.positional, *cmd.named])
23 |
--------------------------------------------------------------------------------
/test/name_generation/test_case.py:
--------------------------------------------------------------------------------
1 | """
2 | Test the casing (snake_case vs camelCase) used by the converters
3 | """
4 | import pytest
5 |
6 | from aclimatise.converter import WrapperGenerator
7 | from aclimatise.model import EmptyFlagArg, Flag
8 |
9 |
10 | def test_camel_short(camel_gen):
11 | flag = Flag(
12 | synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg()
13 | )
14 | names = camel_gen.choose_variable_names([flag], length=3)
15 | assert names[0].name == "numberOfThreads"
16 |
17 |
18 | def test_snake_short(snake_gen):
19 | flag = Flag(
20 | synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg()
21 | )
22 | names = snake_gen.choose_variable_names([flag], length=2)
23 | assert "number" in names[0].name
24 | assert "threads" in names[0].name
25 |
26 |
27 | def test_camel_long(camel_gen):
28 | flag = Flag(
29 | synonyms=["-g", "--genomepaths", "--genomefolders"],
30 | description="number of threads [1]",
31 | args=EmptyFlagArg(),
32 | )
33 | names = camel_gen.choose_variable_names([flag], length=2)
34 | assert names[0].name == "genomeFolders"
35 |
36 |
37 | def test_snake_long(snake_gen):
38 | flag = Flag(
39 | synonyms=["-g", "--genomepaths", "--genomefolders"],
40 | description="number of threads [1]",
41 | args=EmptyFlagArg(),
42 | )
43 | names = snake_gen.choose_variable_names([flag], length=2)
44 | assert names[0].name == "genome_folders"
45 |
--------------------------------------------------------------------------------
/test/name_generation/test_description.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests the generate_name function, which converts a paragraph of text into a variable name
3 | """
4 | from aclimatise.name_generation import generate_name, preprocess
5 |
6 |
7 | def test_bwa_mem_t():
8 | name = next(generate_name(preprocess("number of threads [1]")))
9 | assert len(name) < 5
10 | assert "number" in name
11 | assert "threads" in name
12 |
13 |
14 | def test_bwa_mem_p():
15 | name = next(generate_name(preprocess("smart pairing (ignoring in2.fq)")))
16 | assert len(name) <= 3
17 | assert "smart" in name
18 | assert "pairing" in name
19 |
20 |
21 | def test_bwa_mem_r():
22 | name = next(
23 | generate_name(
24 | preprocess("read group header line such as '@RG\tID:foo\tSM:bar' [null]")
25 | )
26 | )
27 | assert len(name) < 5
28 | assert "read" in name
29 | # assert 'header' in name
30 |
31 |
32 | def test_bwa_mem_i():
33 | name = next(
34 | generate_name(
35 | preprocess(
36 | "specify the mean, standard deviation (10% of the mean if absent), max (4 sigma from the mean if absent) and min of the insert size distribution. FR orientation only. [inferred]"
37 | )
38 | )
39 | )
40 | assert len(name) < 5
41 | assert "specify" in name
42 |
43 | # Ideally this would return "mean" first, but the POS engine thinks that "mean" describes "deviation"
44 | # assert "mean" in name
45 | assert "deviation" in name
46 |
47 |
48 | def test_bedtools_coverage_d():
49 | name = next(
50 | generate_name(
51 | preprocess(
52 | "Report the depth at each position in each A feature. Positions reported are one based. Each position and depth follow the complete A feature."
53 | )
54 | )
55 | )
56 | assert len(name) < 5
57 | assert "report" in name
58 | assert "depth" in name
59 |
60 |
61 | def test_bedtools_coverage_s():
62 | name = next(
63 | generate_name(
64 | preprocess(
65 | "Require same strandedness. That is, only report hits in B that overlap A on the _same_ strand. By default, overlaps are reported without respect to strand"
66 | )
67 | )
68 | )
69 | assert len(name) < 5
70 | assert "require" in name
71 | assert "strandedness" in name
72 |
73 |
74 | def test_bedtools_coverage_g():
75 | name = next(
76 | generate_name(
77 | preprocess(
78 | "Provide a genome file to enforce consistent chromosome sort order across input files. Only applies when used with -sorted option."
79 | )
80 | )
81 | )
82 | assert len(name) < 5
83 | assert "provide" in name
84 | assert "file" in name
85 |
86 |
87 | def test_symbol():
88 | """
89 | Check that symbols are correctly removed from the output
90 | """
91 | name = next(generate_name(preprocess("/genome@ #file$")))
92 | assert len(name) < 5
93 | assert "genome" in name
94 | assert "file" in name
95 |
96 |
97 | def test_hyphens():
98 | name = next(generate_name(preprocess("penalty for 5'- and 3'-end clipping [5,5]")))
99 | assert len(name) < 5
100 | assert "penalty" in name
101 |
102 | for word in name:
103 | assert "-" not in word
104 | assert "[" not in word
105 | assert "," not in word
106 |
--------------------------------------------------------------------------------
/test/name_generation/test_group.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests certain groups of flags used together
3 | """
4 | from aclimatise.converter import WrapperGenerator
5 | from aclimatise.model import EmptyFlagArg, Flag, SimpleFlagArg
6 |
7 |
8 | def test_bedtools_window_sm():
9 | """
10 | These two flags have almost the same name, and almost the same description
11 | """
12 | flags = [
13 | Flag(
14 | synonyms=["-sm"],
15 | description="Only report hits in B that overlap A on the _same_ strand.",
16 | args=EmptyFlagArg(),
17 | ),
18 | Flag(
19 | synonyms=["-sm"],
20 | description="Only report hits in B that overlap A on the _opposite_ strand.",
21 | args=EmptyFlagArg(),
22 | ),
23 | Flag(
24 | synonyms=["-c"],
25 | description="For each entry in A, report the number of overlaps with B.",
26 | args=EmptyFlagArg(),
27 | ),
28 | ]
29 | args = WrapperGenerator().choose_variable_names(flags)
30 | assert len(set([arg.name for arg in args])) == 3
31 |
32 |
33 | def test_same_description():
34 | """
35 | Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name.
36 | However, if the descriptions are identical to each other, we have to fall back to the description
37 | """
38 | flags = [
39 | Flag(
40 | synonyms=["-a"],
41 | description="Makes the program do a certain thing",
42 | args=EmptyFlagArg(),
43 | ),
44 | Flag(
45 | synonyms=["-b"],
46 | description="Makes the program do a certain thing",
47 | args=EmptyFlagArg(),
48 | ),
49 | ]
50 | names = WrapperGenerator().choose_variable_names(flags)
51 | assert names[0].name == "a"
52 | assert names[1].name == "b"
53 |
54 |
55 | def test_same_arg():
56 | """
57 | Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name.
58 | However, if the descriptions are identical to each other, we have to fall back to the description
59 | """
60 | flags = [
61 | Flag(synonyms=["-a"], description="", args=SimpleFlagArg("SomeThing")),
62 | Flag(synonyms=["-b"], description="", args=SimpleFlagArg("SomeThing")),
63 | ]
64 | names = WrapperGenerator().choose_variable_names(flags)
65 | assert names[0].name == "a"
66 | assert names[1].name == "b"
67 |
--------------------------------------------------------------------------------
/test/name_generation/test_single_flag.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for the name generation of single flags
3 | """
4 | from aclimatise.converter.wdl import WdlGenerator
5 | from aclimatise.model import EmptyFlagArg, Flag, Positional, SimpleFlagArg
6 |
7 |
8 | def test_samtools_dict_output():
9 | gen = WdlGenerator()
10 | arg = Flag(
11 | synonyms=["-o", "--output"],
12 | description="file to write out dict file [stdout]",
13 | args=SimpleFlagArg(name="str"),
14 | )
15 | name = gen.choose_variable_names([arg])[0].name
16 | # The WDL converter should avoid naming a variable "output" since that's a WDL keyword
17 | assert name != "output"
18 |
19 | # Also, since we have a description, the generator shouldn't choose the lazy option of var_output
20 | assert name != "var_output"
21 |
22 |
23 | def test_samtools_quickcheck_output():
24 | gen = WdlGenerator()
25 | arg = Positional(description="", position=0, name="input")
26 | name = gen.choose_variable_names([arg])[0].name
27 | # The WDL converter should avoid naming a variable "output" since that's a WDL keyword
28 | assert name != "input"
29 |
30 |
31 | def test_bwt2sa_i(gen):
32 | arg = Flag(synonyms=["-i"], description="", args=SimpleFlagArg(name="32"))
33 |
34 | name = gen.choose_variable_names([arg])[0].name
35 | # 32 isn't a valid variable name, so the only option here is to use the letter i
36 | assert name == "i"
37 |
38 |
39 | def test_name_to_words_symbol(gen):
40 | """
41 | Check that we can get an argument name even if the argument's flag is a symbol
42 | """
43 | arg = Flag(
44 | synonyms=["-@"],
45 | description="Number of additional threads to use",
46 | args=EmptyFlagArg(),
47 | )
48 |
49 | name = gen.choose_variable_names([arg])[0].name
50 | assert name == "at"
51 |
52 |
53 | def test_name_to_words(gen):
54 | """
55 | Check that we can get an argument name even if the argument's flag is a symbol
56 | """
57 | arg = Flag(
58 | synonyms=["--genomepaths"],
59 | description="",
60 | args=EmptyFlagArg(),
61 | )
62 |
63 | name = gen.choose_variable_names([arg])[0].name
64 | assert "genome" in name
65 | assert "paths" in name
66 | # assert list(arg._name_from_name()) == ["genome", "paths"]
67 |
68 |
69 | def test_bwa_mem_infq(gen):
70 | arg = Positional(name="in1.fq", description="", position=0)
71 | name = gen.choose_variable_names([arg])[0].name
72 | # name = arg.variable_name([])
73 | assert "1" in name or "one" in name
74 | assert "in" in name
75 | assert "fq" in name
76 |
--------------------------------------------------------------------------------
/test/test_cli.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import tempfile
3 | import traceback
4 | from pathlib import Path
5 |
6 | import pyparsing
7 | import pytest
8 | from click.testing import CliRunner
9 | from packaging import version
10 |
11 | from aclimatise.cli import main
12 | from aclimatise.yaml import yaml
13 |
14 | from .util import skip_not_installed, validate_cwl, validate_janis, validate_wdl
15 |
16 |
17 | @pytest.fixture()
18 | def runner():
19 | return CliRunner()
20 |
21 |
22 | def cli_worked(result):
23 | if result.exit_code == 0:
24 | return True
25 | else:
26 | traceback.print_exception(*result.exc_info)
27 | assert result.exit_code == 0
28 |
29 |
30 | def test_pipe_wdl(runner, htseq_help):
31 | result = runner.invoke(
32 | main, ["pipe", "htseq-count", "--format", "wdl"], input=htseq_help
33 | )
34 | cli_worked(result)
35 | validate_wdl(result.output)
36 |
37 |
38 | def test_pipe_cwl(runner, htseq_help):
39 | result = runner.invoke(
40 | main, ["pipe", "htseq-count", "--format", "cwl"], input=htseq_help
41 | )
42 | cli_worked(result)
43 | validate_cwl(result.output)
44 |
45 |
46 | def test_pipe_janis(runner, htseq_help):
47 | result = runner.invoke(
48 | main, ["pipe", "htseq-count", "--format", "janis"], input=htseq_help
49 | )
50 | cli_worked(result)
51 | validate_janis(result.output)
52 |
53 |
54 | @skip_not_installed("htseq-count")
55 | def test_explore_htseq(runner, caplog):
56 | caplog.set_level(100000)
57 | with tempfile.TemporaryDirectory() as tempdir:
58 | result = runner.invoke(main, ["explore", "htseq-count", "--out-dir", tempdir])
59 | cli_worked(result)
60 | assert len(list(Path(tempdir).iterdir())) == 3
61 |
62 |
63 | @skip_not_installed("ls")
64 | @skip_not_installed("man")
65 | def test_explore_ls_man(runner, caplog):
66 | caplog.set_level(100000)
67 | with tempfile.TemporaryDirectory() as tempdir:
68 | result = runner.invoke(main, ["explore", "ls", "--man", "--out-dir", tempdir])
69 | cli_worked(result)
70 | with (Path(tempdir) / "ls.yml").open() as fp:
71 | parsed = yaml.load(fp)
72 | assert parsed.help_text.startswith("LS(1)")
73 |
74 |
75 | @skip_not_installed("samtools")
76 | def test_explore_samtools(runner, caplog):
77 | caplog.set_level(100000)
78 | with tempfile.TemporaryDirectory() as tempdir:
79 | result = runner.invoke(
80 | main, ["explore", "samtools", "--help-flag", "--help", "--out-dir", tempdir]
81 | )
82 | cli_worked(result)
83 | assert len(list(Path(tempdir).iterdir())) > 20
84 |
85 |
86 | @skip_not_installed("samtools")
87 | def test_explore_samtools_no_subcommands(runner, caplog):
88 | caplog.set_level(100000)
89 | with tempfile.TemporaryDirectory() as tempdir:
90 | result = runner.invoke(
91 | main,
92 | [
93 | "explore",
94 | "samtools",
95 | "--no-subcommands",
96 | "--out-dir",
97 | tempdir,
98 | ],
99 | )
100 | cli_worked(result)
101 | # Since we aren't looking at subcommands, there should be one file for each format
102 | assert len(list(Path(tempdir).iterdir())) >= 3
103 |
104 |
105 | @pytest.mark.skipif(
106 | version.parse(pyparsing.__version__) < version.parse("3.0.0a2"),
107 | reason="PyParsing 3.0.0+ is not installed",
108 | )
109 | def test_grammar(runner):
110 | result = runner.invoke(main, ["grammar"])
111 | assert result.exit_code == 0
112 | assert len(result.output) > 20
113 |
--------------------------------------------------------------------------------
/test/test_convert.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import tempfile
3 | from pathlib import Path
4 |
5 | import pytest
6 | from WDL import parse_document
7 |
8 | from aclimatise import explore_command
9 | from aclimatise.converter.cwl import CwlGenerator
10 | from aclimatise.converter.wdl import WdlGenerator
11 | from aclimatise.model import CliArgument, Flag, SimpleFlagArg
12 | from aclimatise.yaml import yaml
13 |
14 | from .util import convert_validate, skip_not_installed
15 |
16 | # Note: the parse and explore tests run conversion tests already. These tests are for specific edge cases
17 |
18 |
19 | def test_premade_samtools(samtools_cmd):
20 | """
21 | Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command
22 | trees
23 | """
24 | convert_validate(samtools_cmd, explore=True)
25 |
26 |
27 | def test_premade_bedtools(bedtools_cmd):
28 | """
29 | Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command
30 | trees
31 | """
32 | convert_validate(bedtools_cmd, explore=True)
33 |
34 |
35 | @skip_not_installed("samtools")
36 | @skip_not_installed("samtools.pl")
37 | def test_explore_samtools_pl(yaml_converter):
38 | """
39 | Tests that commands with a non-standard file extension include their extension in the final output, and don't
40 | override another command with the same stem
41 | """
42 | samtools = explore_command(["samtools"], max_depth=0)
43 | samtools_pl = explore_command(["samtools.pl"], max_depth=0)
44 | with tempfile.TemporaryDirectory() as temp_dir:
45 | path = Path(temp_dir)
46 | filenames = set()
47 | for path, command in itertools.chain(
48 | yaml_converter.generate_tree(samtools, temp_dir),
49 | yaml_converter.generate_tree(samtools_pl, temp_dir),
50 | ):
51 | filenames.add(path.name)
52 |
53 | assert filenames == {"samtools.yml", "samtools.pl.yml"}
54 |
55 |
56 | def test_docker_conversion(bedtools_cmd):
57 | intersect = bedtools_cmd["intersect"]
58 | container = "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
59 | intersect.docker_image = container
60 | with tempfile.NamedTemporaryFile() as cwl_file:
61 | CwlGenerator().save_to_file(intersect, path=Path(cwl_file.name))
62 | cwl_file.seek(0)
63 | parsed_cwl = yaml.load(cwl_file)
64 | assert any(
65 | [
66 | hint["class"] == "DockerRequirement" and hint["dockerPull"] == container
67 | for hint in parsed_cwl["hints"]
68 | ]
69 | )
70 |
71 | wdl = WdlGenerator().save_to_string(intersect)
72 | parsed_wdl = parse_document(wdl).tasks[0]
73 | assert parsed_wdl.runtime["docker"].literal.value == container
74 |
75 |
76 | @pytest.mark.parametrize(
77 | "flag,cwltype,wdltype",
78 | [
79 | [
80 | Flag(
81 | synonyms=["--some-flag"],
82 | optional=True,
83 | args=SimpleFlagArg("string"),
84 | description="",
85 | ),
86 | "string?",
87 | "String?",
88 | ],
89 | [
90 | Flag(
91 | synonyms=["--some-flag"],
92 | optional=False,
93 | args=SimpleFlagArg("string"),
94 | description="",
95 | ),
96 | "string",
97 | "String",
98 | ],
99 | ],
100 | )
101 | def test_types_conversion(flag: CliArgument, cwltype: str, wdltype: str):
102 | """
103 | Test that types are being correctly translated from aCLImatise types to CWL and WDL
104 | """
105 | assert CwlGenerator.arg_to_cwl_type(flag) == cwltype
106 | assert (
107 | WdlGenerator.type_to_wdl(flag.get_type(), optional=flag.optional).get_string()
108 | == wdltype
109 | )
110 |
--------------------------------------------------------------------------------
/test/test_data/bedtools.txt:
--------------------------------------------------------------------------------
1 | bedtools: flexible tools for genome arithmetic and DNA sequence analysis.
2 | usage: bedtools [options]
3 |
4 | The bedtools sub-commands include:
5 |
6 | [ Genome arithmetic ]
7 | intersect Find overlapping intervals in various ways.
8 | window Find overlapping intervals within a window around an interval.
9 | closest Find the closest, potentially non-overlapping interval.
10 | coverage Compute the coverage over defined intervals.
11 | map Apply a function to a column for each overlapping interval.
12 | genomecov Compute the coverage over an entire genome.
13 | merge Combine overlapping/nearby intervals into a single interval.
14 | cluster Cluster (but don't merge) overlapping/nearby intervals.
15 | complement Extract intervals _not_ represented by an interval file.
16 | shift Adjust the position of intervals.
17 | subtract Remove intervals based on overlaps b/w two files.
18 | slop Adjust the size of intervals.
19 | flank Create new intervals from the flanks of existing intervals.
20 | sort Order the intervals in a file.
21 | random Generate random intervals in a genome.
22 | shuffle Randomly redistrubute intervals in a genome.
23 | sample Sample random records from file using reservoir sampling.
24 | spacing Report the gap lengths between intervals in a file.
25 | annotate Annotate coverage of features from multiple files.
26 |
27 | [ Multi-way file comparisons ]
28 | multiinter Identifies common intervals among multiple interval files.
29 | unionbedg Combines coverage intervals from multiple BEDGRAPH files.
30 |
31 | [ Paired-end manipulation ]
32 | pairtobed Find pairs that overlap intervals in various ways.
33 | pairtopair Find pairs that overlap other pairs in various ways.
34 |
35 | [ Format conversion ]
36 | bamtobed Convert BAM alignments to BED (& other) formats.
37 | bedtobam Convert intervals to BAM records.
38 | bamtofastq Convert BAM records to FASTQ records.
39 | bedpetobam Convert BEDPE intervals to BAM records.
40 | bed12tobed6 Breaks BED12 intervals into discrete BED6 intervals.
41 |
42 | [ Fasta manipulation ]
43 | getfasta Use intervals to extract sequences from a FASTA file.
44 | maskfasta Use intervals to mask sequences from a FASTA file.
45 | nuc Profile the nucleotide content of intervals in a FASTA file.
46 |
47 | [ BAM focused tools ]
48 | multicov Counts coverage from multiple BAMs at specific intervals.
49 | tag Tag BAM alignments based on overlaps with interval files.
50 |
51 | [ Statistical relationships ]
52 | jaccard Calculate the Jaccard statistic b/w two sets of intervals.
53 | reldist Calculate the distribution of relative distances b/w two files.
54 | fisher Calculate Fisher statistic b/w two feature files.
55 |
56 | [ Miscellaneous tools ]
57 | overlap Computes the amount of overlap from two intervals.
58 | igv Create an IGV snapshot batch script.
59 | links Create a HTML page of links to UCSC locations.
60 | makewindows Make interval "windows" across a genome.
61 | groupby Group by common cols. & summarize oth. cols. (~ SQL "groupBy")
62 | expand Replicate lines based on lists of values in columns.
63 | split Split a file into multiple files with equal records or base pairs.
64 |
65 | [ General help ]
66 | --help Print this help menu.
67 | --version What version of bedtools are you using?.
68 | --contact Feature requests, bugs, mailing lists, etc.
69 |
70 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_closest.txt:
--------------------------------------------------------------------------------
1 |
2 | Tool: bedtools closest (aka closestBed)
3 | Version: v2.26.0
4 | Summary: For each feature in A, finds the closest
5 | feature (upstream or downstream) in B.
6 |
7 | Usage: bedtools closest [OPTIONS] -a -b
8 |
9 | Options:
10 | -d In addition to the closest feature in B,
11 | report its distance to A as an extra column.
12 | - The reported distance for overlapping features will be 0.
13 |
14 | -D Like -d, report the closest feature in B, and its distance to A
15 | as an extra column. Unlike -d, use negative distances to report
16 | upstream features.
17 | The options for defining which orientation is "upstream" are:
18 | - "ref" Report distance with respect to the reference genome.
19 | B features with a lower (start, stop) are upstream
20 | - "a" Report distance with respect to A.
21 | When A is on the - strand, "upstream" means B has a
22 | higher (start,stop).
23 | - "b" Report distance with respect to B.
24 | When B is on the - strand, "upstream" means A has a
25 | higher (start,stop).
26 |
27 | -io Ignore features in B that overlap A. That is, we want close,
28 | yet not touching features only.
29 |
30 | -iu Ignore features in B that are upstream of features in A.
31 | This option requires -D and follows its orientation
32 | rules for determining what is "upstream".
33 |
34 | -id Ignore features in B that are downstream of features in A.
35 | This option requires -D and follows its orientation
36 | rules for determining what is "downstream".
37 |
38 | -fu Choose first from features in B that are upstream of features in A.
39 | This option requires -D and follows its orientation
40 | rules for determining what is "upstream".
41 |
42 | -fd Choose first from features in B that are downstream of features in A.
43 | This option requires -D and follows its orientation
44 | rules for determining what is "downstream".
45 |
46 | -t How ties for closest feature are handled. This occurs when two
47 | features in B have exactly the same "closeness" with A.
48 | By default, all such features in B are reported.
49 | Here are all the options:
50 | - "all" Report all ties (default).
51 | - "first" Report the first tie that occurred in the B file.
52 | - "last" Report the last tie that occurred in the B file.
53 |
54 | -mdb How multiple databases are resolved.
55 | - "each" Report closest records for each database (default).
56 | - "all" Report closest records among all databases.
57 |
58 | -k Report the k closest hits. Default is 1. If tieMode = "all",
59 | - all ties will still be reported.
60 |
61 | -N Require that the query and the closest hit have different names.
62 | For BED, the 4th column is compared.
63 |
64 | -s Require same strandedness. That is, only report hits in B
65 | that overlap A on the _same_ strand.
66 | - By default, overlaps are reported without respect to strand.
67 |
68 | -S Require different strandedness. That is, only report hits in B
69 | that overlap A on the _opposite_ strand.
70 | - By default, overlaps are reported without respect to strand.
71 |
72 | -f Minimum overlap required as a fraction of A.
73 | - Default is 1E-9 (i.e., 1bp).
74 | - FLOAT (e.g. 0.50)
75 |
76 | -F Minimum overlap required as a fraction of B.
77 | - Default is 1E-9 (i.e., 1bp).
78 | - FLOAT (e.g. 0.50)
79 |
80 | -r Require that the fraction overlap be reciprocal for A AND B.
81 | - In other words, if -f is 0.90 and -r is used, this requires
82 | that B overlap 90% of A and A _also_ overlaps 90% of B.
83 |
84 | -e Require that the minimum fraction be satisfied for A OR B.
85 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires
86 | that either 90% of A is covered OR 10% of B is covered.
87 | Without -e, both fractions would have to be satisfied.
88 |
89 | -split Treat "split" BAM or BED12 entries as distinct BED intervals.
90 |
91 | -g Provide a genome file to enforce consistent chromosome sort order
92 | across input files. Only applies when used with -sorted option.
93 |
94 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions
95 | for the same chromosome. ex. "chr1" vs "chr01".
96 |
97 | -names When using multiple databases, provide an alias for each that
98 | will appear instead of a fileId when also printing the DB record.
99 |
100 | -filenames When using multiple databases, show each complete filename
101 | instead of a fileId when also printing the DB record.
102 |
103 | -sortout When using multiple databases, sort the output DB hits
104 | for each record.
105 |
106 | -bed If using BAM input, write output as BED.
107 |
108 | -header Print the header from the A file prior to results.
109 |
110 | -nobuf Disable buffered output. Using this option will cause each line
111 | of output to be printed as it is generated, rather than saved
112 | in a buffer. This will make printing large output files
113 | noticeably slower, but can be useful in conjunction with
114 | other software tools and scripts that need to process one
115 | line of bedtools output at a time.
116 |
117 | -iobuf Specify amount of memory to use for input buffer.
118 | Takes an integer argument. Optional suffixes K/M/G supported.
119 | Note: currently has no effect with compressed files.
120 |
121 | Notes:
122 | Reports "none" for chrom and "-1" for all other fields when a feature
123 | is not found in B on the same chromosome as the feature in A.
124 | E.g. none -1 -1
125 |
126 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_coverage.txt:
--------------------------------------------------------------------------------
1 | Tool: bedtools coverage (aka coverageBed)
2 | Version: v2.26.0
3 | Summary: Returns the depth and breadth of coverage of features from B
4 | on the intervals in A.
5 |
6 | Usage: bedtools coverage [OPTIONS] -a -b
7 |
8 | Options:
9 | -hist Report a histogram of coverage for each feature in A
10 | as well as a summary histogram for _all_ features in A.
11 |
12 | Output (tab delimited) after each feature in A:
13 | 1) depth
14 | 2) # bases at depth
15 | 3) size of A
16 | 4) % of A at depth
17 |
18 | -d Report the depth at each position in each A feature.
19 | Positions reported are one based. Each position
20 | and depth follow the complete A feature.
21 |
22 | -counts Only report the count of overlaps, don't compute fraction, etc.
23 |
24 | -mean Report the mean depth of all positions in each A feature.
25 |
26 | -s Require same strandedness. That is, only report hits in B
27 | that overlap A on the _same_ strand.
28 | - By default, overlaps are reported without respect to strand.
29 |
30 | -S Require different strandedness. That is, only report hits in B
31 | that overlap A on the _opposite_ strand.
32 | - By default, overlaps are reported without respect to strand.
33 |
34 | -f Minimum overlap required as a fraction of A.
35 | - Default is 1E-9 (i.e., 1bp).
36 | - FLOAT (e.g. 0.50)
37 |
38 | -F Minimum overlap required as a fraction of B.
39 | - Default is 1E-9 (i.e., 1bp).
40 | - FLOAT (e.g. 0.50)
41 |
42 | -r Require that the fraction overlap be reciprocal for A AND B.
43 | - In other words, if -f is 0.90 and -r is used, this requires
44 | that B overlap 90% of A and A _also_ overlaps 90% of B.
45 |
46 | -e Require that the minimum fraction be satisfied for A OR B.
47 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires
48 | that either 90% of A is covered OR 10% of B is covered.
49 | Without -e, both fractions would have to be satisfied.
50 |
51 | -split Treat "split" BAM or BED12 entries as distinct BED intervals.
52 |
53 | -g Provide a genome file to enforce consistent chromosome sort order
54 | across input files. Only applies when used with -sorted option.
55 |
56 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions
57 | for the same chromosome. ex. "chr1" vs "chr01".
58 |
59 | -sorted Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input.
60 |
61 | -bed If using BAM input, write output as BED.
62 |
63 | -header Print the header from the A file prior to results.
64 |
65 | -nobuf Disable buffered output. Using this option will cause each line
66 | of output to be printed as it is generated, rather than saved
67 | in a buffer. This will make printing large output files
68 | noticeably slower, but can be useful in conjunction with
69 | other software tools and scripts that need to process one
70 | line of bedtools output at a time.
71 |
72 | -iobuf Specify amount of memory to use for input buffer.
73 | Takes an integer argument. Optional suffixes K/M/G supported.
74 | Note: currently has no effect with compressed files.
75 |
76 | Default Output:
77 | After each entry in A, reports:
78 | 1) The number of features in B that overlapped the A interval.
79 | 2) The number of bases in A that had non-zero coverage.
80 | 3) The length of the entry in A.
81 | 4) The fraction of bases in A that had non-zero coverage.
82 |
83 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_multiinter.txt:
--------------------------------------------------------------------------------
1 |
2 | Tool: bedtools multiinter (aka multiIntersectBed)
3 | Version: v2.26.0
4 | Summary: Identifies common intervals among multiple
5 | BED/GFF/VCF files.
6 |
7 | Usage: bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn
8 | Requires that each interval file is sorted by chrom/start.
9 |
10 | Options:
11 | -cluster Invoke Ryan Layers's clustering algorithm.
12 |
13 | -header Print a header line.
14 | (chrom/start/end + names of each file).
15 |
16 | -names A list of names (one/file) to describe each file in -i.
17 | These names will be printed in the header line.
18 |
19 | -g Use genome file to calculate empty regions.
20 | - STRING.
21 |
22 | -empty Report empty regions (i.e., start/end intervals w/o
23 | values in all files).
24 | - Requires the '-g FILE' parameter.
25 |
26 | -filler TEXT Use TEXT when representing intervals having no value.
27 | - Default is '0', but you can use 'N/A' or any text.
28 |
29 | -examples Show detailed usage examples.
30 |
31 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_random.txt:
--------------------------------------------------------------------------------
1 |
2 | *****
3 | *****ERROR: Need a genome (-g) file.
4 | *****
5 |
6 | Tool: bedtools random (aka randomBed)
7 | Version: v2.26.0
8 | Summary: Generate random intervals among a genome.
9 |
10 | Usage: bedtools random [OPTIONS] -g
11 |
12 | Options:
13 | -l The length of the intervals to generate.
14 | - Default = 100.
15 | - (INTEGER)
16 |
17 | -n The number of intervals to generate.
18 | - Default = 1,000,000.
19 | - (INTEGER)
20 |
21 | -seed Supply an integer seed for the shuffling.
22 | - By default, the seed is chosen automatically.
23 | - (INTEGER)
24 |
25 | Notes:
26 | (1) The genome file should tab delimited and structured as follows:
27 |
28 |
29 | For example, Human (hg19):
30 | chr1 249250621
31 | chr2 243199373
32 | ...
33 | chr18_gl000207_random 4262
34 |
35 | Tips:
36 | One can use the UCSC Genome Browser's MySQL database to extract
37 | chromosome sizes. For example, H. sapiens:
38 |
39 | mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \
40 | "select chrom, size from hg19.chromInfo" > hg19.genome
41 |
42 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_spacing.txt:
--------------------------------------------------------------------------------
1 |
2 | Tool: bedtools spacing
3 | Version: v2.26.0
4 | Summary: Report (last col.) the gap lengths between intervals in a file.
5 |
6 | Usage: bedtools spacing [OPTIONS] -i
7 |
8 | Notes:
9 | (1) Input must be sorted by chrom,start (sort -k1,1 -k2,2n for BED).
10 | (2) The 1st element for each chrom will have NULL distance. (".").
11 | (3) Distance for overlapping intervaks is -1 and bookended is 0.
12 |
13 | Example:
14 | $ cat test.bed
15 | chr1 0 10
16 | chr1 10 20
17 | chr1 21 30
18 | chr1 35 45
19 | chr1 100 200
20 |
21 | $ bedtools spacing -i test.bed
22 | chr1 0 10 .
23 | chr1 10 20 0
24 | chr1 21 30 1
25 | chr1 35 45 5
26 | chr1 100 200 55
27 |
28 | -bed If using BAM input, write output as BED.
29 |
30 | -header Print the header from the A file prior to results.
31 |
32 | -nobuf Disable buffered output. Using this option will cause each line
33 | of output to be printed as it is generated, rather than saved
34 | in a buffer. This will make printing large output files
35 | noticeably slower, but can be useful in conjunction with
36 | other software tools and scripts that need to process one
37 | line of bedtools output at a time.
38 |
39 | -iobuf Specify amount of memory to use for input buffer.
40 | Takes an integer argument. Optional suffixes K/M/G supported.
41 | Note: currently has no effect with compressed files.
42 |
43 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_subtract.txt:
--------------------------------------------------------------------------------
1 |
2 | Tool: bedtools subtract (aka subtractBed)
3 | Version: v2.26.0
4 | Summary: Removes the portion(s) of an interval that is overlapped
5 | by another feature(s).
6 |
7 | Usage: bedtools subtract [OPTIONS] -a -b
8 |
9 | Options:
10 | -A Remove entire feature if any overlap. That is, by default,
11 | only subtract the portion of A that overlaps B. Here, if
12 | any overlap is found (or -f amount), the entire feature is removed.
13 |
14 | -N Same as -A except when used with -f, the amount is the sum
15 | of all features (not any single feature).
16 |
17 | -wb Write the original entry in B for each overlap.
18 | - Useful for knowing _what_ A overlaps. Restricted by -f and -r.
19 |
20 | -wo Write the original A and B entries plus the number of base
21 | pairs of overlap between the two features.
22 | - Overlaps restricted by -f and -r.
23 | Only A features with overlap are reported.
24 |
25 | -s Require same strandedness. That is, only report hits in B
26 | that overlap A on the _same_ strand.
27 | - By default, overlaps are reported without respect to strand.
28 |
29 | -S Require different strandedness. That is, only report hits in B
30 | that overlap A on the _opposite_ strand.
31 | - By default, overlaps are reported without respect to strand.
32 |
33 | -f Minimum overlap required as a fraction of A.
34 | - Default is 1E-9 (i.e., 1bp).
35 | - FLOAT (e.g. 0.50)
36 |
37 | -F Minimum overlap required as a fraction of B.
38 | - Default is 1E-9 (i.e., 1bp).
39 | - FLOAT (e.g. 0.50)
40 |
41 | -r Require that the fraction overlap be reciprocal for A AND B.
42 | - In other words, if -f is 0.90 and -r is used, this requires
43 | that B overlap 90% of A and A _also_ overlaps 90% of B.
44 |
45 | -e Require that the minimum fraction be satisfied for A OR B.
46 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires
47 | that either 90% of A is covered OR 10% of B is covered.
48 | Without -e, both fractions would have to be satisfied.
49 |
50 | -split Treat "split" BAM or BED12 entries as distinct BED intervals.
51 |
52 | -g Provide a genome file to enforce consistent chromosome sort order
53 | across input files. Only applies when used with -sorted option.
54 |
55 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions
56 | for the same chromosome. ex. "chr1" vs "chr01".
57 |
58 | -sorted Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input.
59 |
60 | -bed If using BAM input, write output as BED.
61 |
62 | -header Print the header from the A file prior to results.
63 |
64 | -nobuf Disable buffered output. Using this option will cause each line
65 | of output to be printed as it is generated, rather than saved
66 | in a buffer. This will make printing large output files
67 | noticeably slower, but can be useful in conjunction with
68 | other software tools and scripts that need to process one
69 | line of bedtools output at a time.
70 |
71 | -iobuf Specify amount of memory to use for input buffer.
72 | Takes an integer argument. Optional suffixes K/M/G supported.
73 | Note: currently has no effect with compressed files.
74 |
75 |
--------------------------------------------------------------------------------
/test/test_data/bedtools_window.txt:
--------------------------------------------------------------------------------
1 |
2 | Tool: bedtools window (aka windowBed)
3 | Version: v2.26.0
4 | Summary: Examines a "window" around each feature in A and
5 | reports all features in B that overlap the window. For each
6 | overlap the entire entry in A and B are reported.
7 |
8 | Usage: bedtools window [OPTIONS] -a -b
9 |
10 | Options:
11 | -abam The A input file is in BAM format. Output will be BAM as well. Replaces -a.
12 |
13 | -ubam Write uncompressed BAM output. Default writes compressed BAM.
14 |
15 | -bed When using BAM input (-abam), write output as BED. The default
16 | is to write output in BAM when using -abam.
17 |
18 | -w Base pairs added upstream and downstream of each entry
19 | in A when searching for overlaps in B.
20 | - Creates symterical "windows" around A.
21 | - Default is 1000 bp.
22 | - (INTEGER)
23 |
24 | -l Base pairs added upstream (left of) of each entry
25 | in A when searching for overlaps in B.
26 | - Allows one to define assymterical "windows".
27 | - Default is 1000 bp.
28 | - (INTEGER)
29 |
30 | -r Base pairs added downstream (right of) of each entry
31 | in A when searching for overlaps in B.
32 | - Allows one to define assymterical "windows".
33 | - Default is 1000 bp.
34 | - (INTEGER)
35 |
36 | -sw Define -l and -r based on strand. For example if used, -l 500
37 | for a negative-stranded feature will add 500 bp downstream.
38 | - Default = disabled.
39 |
40 | -sm Only report hits in B that overlap A on the _same_ strand.
41 | - By default, overlaps are reported without respect to strand.
42 |
43 | -Sm Only report hits in B that overlap A on the _opposite_ strand.
44 | - By default, overlaps are reported without respect to strand.
45 |
46 | -u Write the original A entry _once_ if _any_ overlaps found in B.
47 | - In other words, just report the fact >=1 hit was found.
48 |
49 | -c For each entry in A, report the number of overlaps with B.
50 | - Reports 0 for A entries that have no overlap with B.
51 | - Overlaps restricted by -f.
52 |
53 | -v Only report those entries in A that have _no overlaps_ with B.
54 | - Similar to "grep -v."
55 |
56 | -header Print the header from the A file prior to results.
57 |
58 |
--------------------------------------------------------------------------------
/test/test_data/bowtie2_build.txt:
--------------------------------------------------------------------------------
1 | Bowtie 2 version 2.3.5.1 by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea)
2 | Usage: bowtie2-build [options]*
3 | reference_in comma-separated list of files with ref sequences
4 | bt2_index_base write bt2 data to files with this dir/basename
5 | *** Bowtie 2 indexes work only with v2 (not v1). Likewise for v1 indexes. ***
6 | Options:
7 | -f reference files are Fasta (default)
8 | -c reference sequences given on cmd line (as
9 | )
10 | --large-index force generated index to be 'large', even if ref
11 | has fewer than 4 billion nucleotides
12 | --debug use the debug binary; slower, assertions enabled
13 | --sanitized use sanitized binary; slower, uses ASan and/or UBSan
14 | --verbose log the issued command
15 | -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting
16 | -p/--packed use packed strings internally; slower, less memory
17 | --bmax max bucket sz for blockwise suffix-array builder
18 | --bmaxdivn max bucket sz as divisor of ref len (default: 4)
19 | --dcv diff-cover period for blockwise (default: 1024)
20 | --nodc disable diff-cover (algorithm becomes quadratic)
21 | -r/--noref don't build .3/.4 index files
22 | -3/--justref just build .3/.4 index files
23 | -o/--offrate SA is sampled every 2^ BWT chars (default: 5)
24 | -t/--ftabchars # of chars consumed in initial lookup (default: 10)
25 | --threads # of threads
26 | --seed seed for random number generator
27 | -q/--quiet verbose output (for debugging)
28 | -h/--help print detailed description of tool and its options
29 | --usage print this usage message
30 | --version print version information and quit
31 |
--------------------------------------------------------------------------------
/test/test_data/bwa.txt:
--------------------------------------------------------------------------------
1 |
2 | Program: bwa (alignment via Burrows-Wheeler transformation)
3 | Version: 0.7.17-r1188
4 | Contact: Heng Li
5 |
6 | Usage: bwa [options]
7 |
8 | Command: index index sequences in the FASTA format
9 | mem BWA-MEM algorithm
10 | fastmap identify super-maximal exact matches
11 | pemerge merge overlapping paired ends (EXPERIMENTAL)
12 | aln gapped/ungapped alignment
13 | samse generate alignment (single ended)
14 | sampe generate alignment (paired ended)
15 | bwasw BWA-SW for long queries
16 |
17 | shm manage indices in shared memory
18 | fa2pac convert FASTA to PAC format
19 | pac2bwt generate BWT from PAC
20 | pac2bwtgen alternative algorithm for generating BWT
21 | bwtupdate update .bwt to the new format
22 | bwt2sa generate SA from BWT and Occ
23 |
24 | Note: To use BWA, you need to first index the genome with `bwa index'.
25 | There are three alignment algorithms in BWA: `mem', `bwasw', and
26 | `aln/samse/sampe'. If you are not sure which to use, try `bwa mem'
27 | first. Please `man ./bwa.1' for the manual.
28 |
29 |
--------------------------------------------------------------------------------
/test/test_data/bwa_bwt2sa.txt:
--------------------------------------------------------------------------------
1 | Usage: bwa bwt2sa [-i 32]
2 |
--------------------------------------------------------------------------------
/test/test_data/bwa_bwtupdate.txt:
--------------------------------------------------------------------------------
1 | Usage: bwa bwtupdate
2 |
--------------------------------------------------------------------------------
/test/test_data/bwa_index.txt:
--------------------------------------------------------------------------------
1 |
2 | Usage: bwa index [options]
3 |
4 | Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]
5 | -p STR prefix of the index [same as fasta name]
6 | -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [10000000]
7 | -6 index files named as .64.* instead of .*
8 |
9 | Warning: `-a bwtsw' does not work for short genomes, while `-a is' and
10 | `-a div' do not work not for long genomes.
11 |
12 |
--------------------------------------------------------------------------------
/test/test_data/bwa_mem.txt:
--------------------------------------------------------------------------------
1 |
2 | Usage: bwa mem [options] [in2.fq]
3 |
4 | Algorithm options:
5 |
6 | -t INT number of threads [1]
7 | -k INT minimum seed length [19]
8 | -w INT band width for banded alignment [100]
9 | -d INT off-diagonal X-dropoff [100]
10 | -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
11 | -y INT seed occurrence for the 3rd round seeding [20]
12 | -c INT skip seeds with more than INT occurrences [500]
13 | -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
14 | -W INT discard a chain if seeded bases shorter than INT [0]
15 | -m INT perform at most INT rounds of mate rescues for each read [50]
16 | -S skip mate rescue
17 | -P skip pairing; mate rescue performed unless -S also in use
18 |
19 | Scoring options:
20 |
21 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]
22 | -B INT penalty for a mismatch [4]
23 | -O INT[,INT] gap open penalties for deletions and insertions [6,6]
24 | -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
25 | -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5]
26 | -U INT penalty for an unpaired read pair [17]
27 |
28 | -x STR read type. Setting -x changes multiple parameters unless overridden [null]
29 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)
30 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)
31 | intractg: -B9 -O16 -L5 (intra-species contigs to ref)
32 |
33 | Input/output options:
34 |
35 | -p smart pairing (ignoring in2.fq)
36 | -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null]
37 | -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]
38 | -o FILE sam file to output results to [stdout]
39 | -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file)
40 | -5 for split alignment, take the alignment with the smallest coordinate as primary
41 | -q don't modify mapQ of supplementary alignments
42 | -K INT process INT input bases in each batch regardless of nThreads (for reproducibility) []
43 |
44 | -v INT verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]
45 | -T INT minimum score to output [30]
46 | -h INT[,INT] if there are 80% of the max score, output all in XA [5,200]
47 | -a output all alignments for SE or unpaired PE
48 | -C append FASTA/FASTQ comment to SAM output
49 | -V output the reference FASTA header in the XR tag
50 | -Y use soft clipping for supplementary alignments
51 | -M mark shorter split hits as secondary
52 |
53 | -I FLOAT[,FLOAT[,INT[,INT]]]
54 | specify the mean, standard deviation (10% of the mean if absent), max
55 | (4 sigma from the mean if absent) and min of the insert size distribution.
56 | FR orientation only. [inferred]
57 |
58 | Note: Please read the man page for detailed description of the command line and options.
59 |
60 |
--------------------------------------------------------------------------------
/test/test_data/dinosaur.txt:
--------------------------------------------------------------------------------
1 | usage:
2 | > java -jar Dinosaur-1.1.3.jar [OPTIONS] mzML
3 | OPTIONS:
4 | PARAMETER DEFAULT DESCRIPTION
5 | advHelp false set to output adv param file help and quit
6 | advParams path to adv param file
7 | concurrency 2 the number of assays to analyze in parallel
8 | force false ignore missing mzML params
9 | maxCharge 6 max searched ion charge
10 | minCharge 1 min searched ion charge
11 | mode global analysis mode: global or target. Global mode reports all isotope patterns, targeted only those matching targets.
12 | mzML - The shotgun MzML file to analyze
13 | nReport 10 number of random assay to export control figure for
14 | outDir output directory (by default same as input mzML)
15 | outName basename for output files (by default same as input mzML)
16 | profiling false set to enable CPU profiling
17 | reportDeisoMzHeight 15.0 mz range in deisotoper reports
18 | reportHighRes false generate high-resolution plot trail when supported (for print)
19 | reportSeed -1 seed to use for report assay selection (<0 means random)
20 | reportTargets false set to create a special report figure for each target
21 | targetPreference rt if multiple isotope patterns fit target, take the closest rt apex (rt) or the most intense (intensity)
22 | targets path to isotope patterns target file (not used by default)
23 | verbose false increase details in output
24 | writeBinary false set to output binary MSFeatureProtocol file
25 | writeHills false set to output csv file with all hills assigned to isotope patterns
26 | writeMsInspect false set to output MsInspect feature csv file
27 | writeQuantML false set to output mzQuantML file
28 | zipQcFolder false set to zip the entire qc folder on algorithm completion
29 |
30 | Not enough arguments!
31 |
--------------------------------------------------------------------------------
/test/test_data/gth.txt:
--------------------------------------------------------------------------------
1 | Usage: gth [option ...] -genomic file [...] -cdna file [...] -protein file [...]
2 | Compute similarity-based gene structure predictions (spliced alignments)
3 | using cDNA/EST and/or protein sequences and assemble the resulting spliced
4 | alignments to consensus spliced alignments.
5 |
6 | -genomic specify input files containing genomic sequences
7 | mandatory option
8 | -cdna specify input files containing cDNA/EST sequences
9 | -protein specify input files containing protein sequences
10 | -species specify species to select splice site model which is most
11 | appropriate; possible species:
12 | "human"
13 | "mouse"
14 | "rat"
15 | "chicken"
16 | "drosophila"
17 | "nematode"
18 | "fission_yeast"
19 | "aspergillus"
20 | "arabidopsis"
21 | "maize"
22 | "rice"
23 | "medicago"
24 | default: undefined
25 | -bssm read bssm parameter from file in the path given by the
26 | environment variable BSSMDIR
27 | default: undefined
28 | -scorematrix read amino acid substitution scoring matrix from file in the
29 | path given by the environment variable GTHDATADIR
30 | default: BLOSUM62
31 | -translationtable set the codon translation table used for codon translation in
32 | matching, DP, and output
33 | default: 1
34 | -f analyze only forward strand of genomic sequences
35 | default: no
36 | -r analyze only reverse strand of genomic sequences
37 | default: no
38 | -cdnaforward align only forward strand of cDNAs
39 | default: no
40 | -frompos analyze genomic sequence from this position
41 | requires -topos or -width; counting from 1 on
42 | default: 0
43 | -topos analyze genomic sequence to this position
44 | requires -frompos; counting from 1 on
45 | default: 0
46 | -width analyze only this width of genomic sequence
47 | requires -frompos
48 | default: 0
49 | -v be verbose
50 | default: no
51 | -xmlout show output in XML format
52 | default: no
53 | -gff3out show output in GFF3 format
54 | default: no
55 | -md5ids show MD5 fingerprints as sequence IDs
56 | default: no
57 | -o redirect output to specified file
58 | default: undefined
59 | -gzip write gzip compressed output file
60 | default: no
61 | -bzip2 write bzip2 compressed output file
62 | default: no
63 | -force force writing to output file
64 | default: no
65 | -gs2out output in old GeneSeqer2 format
66 | default: no
67 | -minmatchlen specify minimum match length (cDNA matching)
68 | default: 20
69 | -seedlength specify the seed length (cDNA matching)
70 | default: 18
71 | -exdrop specify the Xdrop value for edit distance extension (cDNA
72 | matching)
73 | default: 2
74 | -prminmatchlen specify minimum match length (protein matches)
75 | default: 24
76 | -prseedlength specify seed length (protein matching)
77 | default: 10
78 | -prhdist specify Hamming distance (protein matching)
79 | default: 4
80 | -gcmaxgapwidth set the maximum gap width for global chains
81 | defines approximately the maximum intron length
82 | set to 0 to allow for unlimited length
83 | in order to avoid false-positive exons (lonely exons) at the
84 | sequence ends, it is very important to set this parameter
85 | appropriately!
86 | default: 1000000
87 | -gcmincoverage set the minimum coverage of global chains regarding to the
88 | reference sequence
89 | default: 50
90 | -paralogs compute paralogous genes (different chaining procedure)
91 | default: no
92 | -introncutout enable the intron cutout technique
93 | default: no
94 | -fastdp use jump table to increase speed of DP calculation
95 | default: no
96 | -autointroncutout set the automatic intron cutout matrix size in megabytes and
97 | enable the automatic intron cutout technique
98 | default: 0
99 | -intermediate stop after calculation of spliced alignments and output
100 | results in reusable XML format. Do not process this output
101 | yourself, use the ``normal'' XML output instead!
102 | default: no
103 | -first set the maximum number of spliced alignments per genomic DNA
104 | input. Set to 0 for unlimited number.
105 | default: 0
106 | -help display help for basic options and exit
107 | -help+ display help for all options and exit
108 | -version display version information and exit
109 |
110 | For detailed information, please refer to the manual of GenomeThreader.
111 | Report bugs to .
112 |
--------------------------------------------------------------------------------
/test/test_data/htseq_count.txt:
--------------------------------------------------------------------------------
1 | usage: htseq-count [options] alignment_file gff_file
2 |
3 | This script takes one or more alignment files in SAM/BAM format and a feature
4 | file in GFF format and calculates for each feature the number of reads mapping
5 | to it. See http://htseq.readthedocs.io/en/master/count.html for details.
6 |
7 | positional arguments:
8 | samfilenames Path to the SAM/BAM files containing the mapped reads.
9 | If '-' is selected, read from standard input
10 | featuresfilename Path to the GTF file containing the features
11 |
12 | optional arguments:
13 | -h, --help show this help message and exit
14 | -f {sam,bam,auto}, --format {sam,bam,auto}
15 | Type of data. DEPRECATED: file format
16 | is detected automatically. This option is ignored.
17 | -r {pos,name}, --order {pos,name}
18 | 'pos' or 'name'. Sorting order of
19 | (default: name). Paired-end sequencing data must be
20 | sorted either by position or by read name, and the
21 | sorting order must be specified. Ignored for single-
22 | end data.
23 | --max-reads-in-buffer MAX_BUFFER_SIZE
24 | When is paired end sorted by
25 | position, allow only so many reads to stay in memory
26 | until the mates are found (raising this number will
27 | use more memory). Has no effect for single end or
28 | paired end sorted by name
29 | -s {yes,no,reverse}, --stranded {yes,no,reverse}
30 | Whether the data is from a strand-specific assay.
31 | Specify 'yes', 'no', or 'reverse' (default: yes).
32 | 'reverse' means 'yes' with reversed strand
33 | interpretation
34 | -a MINAQUAL, --minaqual MINAQUAL
35 | Skip all reads with MAPQ alignment quality lower than
36 | the given minimum value (default: 10). MAPQ is the 5th
37 | column of a SAM/BAM file and its usage depends on the
38 | software used to map the reads.
39 | -t FEATURETYPE, --type FEATURETYPE
40 | Feature type (3rd column in GTF file) to be used, all
41 | features of other type are ignored (default, suitable
42 | for Ensembl GTF files: exon)
43 | -i IDATTR, --idattr IDATTR
44 | GTF attribute to be used as feature ID (default,
45 | suitable for Ensembl GTF files: gene_id). All feature
46 | of the right type (see -t option) within the same GTF
47 | attribute will be added together. The typical way of
48 | using this option is to count all exonic reads from
49 | each gene and add the exons but other uses are
50 | possible as well.
51 | --additional-attr ADDITIONAL_ATTR
52 | Additional feature attributes (default: none, suitable
53 | for Ensembl GTF files: gene_name). Use multiple times
54 | for more than one additional attribute. These
55 | attributes are only used as annotations in the output,
56 | while the determination of how the counts are added
57 | together is done based on option -i.
58 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
59 | Mode to handle reads overlapping more than one feature
60 | (choices: union, intersection-strict, intersection-
61 | nonempty; default: union)
62 | --nonunique {none,all,fraction,random}
63 | Whether and how to score reads that are not uniquely
64 | aligned or ambiguously assigned to features (choices:
65 | none, all, fraction, random; default: none)
66 | --secondary-alignments {score,ignore}
67 | Whether to score secondary alignments (0x100 flag)
68 | --supplementary-alignments {score,ignore}
69 | Whether to score supplementary alignments (0x800 flag)
70 | -o SAMOUTS, --samout SAMOUTS
71 | Write out all SAM alignment records into SAM/BAM files
72 | (one per input file needed), annotating each line with
73 | its feature assignment (as an optional field with tag
74 | 'XF'). See the -p option to use BAM instead of SAM.
75 | -p {SAM,BAM,sam,bam}, --samout-format {SAM,BAM,sam,bam}
76 | Format to use with the --samout option.
77 | -d OUTPUT_DELIMITER, --delimiter OUTPUT_DELIMITER
78 | Column delimiter in output (default: TAB).
79 | -c OUTPUT_FILENAME, --counts_output OUTPUT_FILENAME
80 | Filename to output the counts to instead of stdout.
81 | --append-output Append counts output. This option is useful if you
82 | have already creates a TSV/CSV/similar file with a
83 | header for your samples (with additional columns for
84 | the feature name and any additionl attributes) and
85 | want to fill in the rest of the file.
86 | -n NPROCESSES, --nprocesses NPROCESSES
87 | Number of parallel CPU processes to use (default: 1).
88 | --feature-query FEATURE_QUERY
89 | Restrict to features descibed in this expression.
90 | Currently supports a single kind of expression:
91 | attribute == "one attr" to restrict the GFF to a
92 | single gene or transcript, e.g. --feature-query
93 | 'gene_name == "ACTB"' - notice the single quotes
94 | around the argument of this option and the double
95 | quotes around the gene name. Broader queries might
96 | become available in the future.
97 | -q, --quiet Suppress progress report
98 | --version Show software version and exit
99 |
100 | Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology
101 | Laboratory (EMBL) and Fabio Zanini (fabio.zanini@unsw.edu.au), UNSW Sydney.
102 | (c) 2010-2020. Released under the terms of the GNU General Public License v3.
103 | Part of the 'HTSeq' framework, version 0.12.4.
104 |
--------------------------------------------------------------------------------
/test/test_data/mauve.txt:
--------------------------------------------------------------------------------
1 | Usage:
2 | mauveAligner [options] ...
3 | Options:
4 | --output= Output file name. Prints to screen by default
5 | --mums Find MUMs only, do not attempt to determine locally collinear blocks (LCBs)
6 | --no-recursion Don't perform recursive anchor identification (implies --no-gapped-alignment)
7 | --no-lcb-extension If determining LCBs, don't attempt to extend the LCBs
8 | --seed-size= Initial seed match size, default is log_2( average seq. length )
9 | --max-extension-iterations= Limit LCB extensions to this number of attempts, default is 4
10 | --eliminate-inclusions Eliminate linked inclusions in subset matches.
11 | --weight= Minimum LCB weight in base pairs per sequence
12 | --match-input= Use specified match file instead of searching for matches
13 | --lcb-match-input Indicates that the match input file contains matches that have been clustered into LCBs
14 | --lcb-input= Use specified lcb file instead of constructing LCBs (skips LCB generation)
15 | --scratch-path= For large genomes, use a directory for storage of temporary data. Should be given two or more times to with different paths.
16 | --id-matrix= Generate LCB stats and write them to the specified file
17 | --island-size= Find islands larger than the given number
18 | --island-output= Output islands the given file (requires --island-size)
19 | --backbone-size= Find stretches of backbone longer than the given number of b.p.
20 | --max-backbone-gap= Allow backbone to be interrupted by gaps up to this length in b.p.
21 | --backbone-output= Output islands the given file (requires --island-size)
22 | --coverage-output= Output a coverage list to the specified file (- for stdout)
23 | --repeats Generates a repeat map. Only one sequence can be specified
24 | --output-guide-tree= Write out a guide tree to the designated file
25 | --collinear Assume that input sequences are collinear--they have no rearrangements
26 |
27 | Gapped alignment controls:
28 | --no-gapped-alignment Don't perform a gapped alignment
29 | --max-gapped-aligner-length= Maximum number of base pairs to attempt aligning with the gapped aligner
30 | --min-recursive-gap-length= Minimum size of gaps that Mauve will perform recursive MUM anchoring on (Default is 200)
31 |
32 | Signed permutation matrix options:
33 | --permutation-matrix-output= Write out the LCBs as a signed permutation matrix to the given file
34 | --permutation-matrix-min-weight= A permutation matrix will be written for every set of LCBs with weight between this value and the value of --weight
35 |
36 | Alignment output options:
37 | --alignment-output-dir= Outputs a set of alignment files (one per LCB) to a given directory
38 | --alignment-output-format= Selects the output format for --alignment-output-dir
39 | --output-alignment= Write out an XMFA format alignment to the designated file
40 |
41 | Supported alignment output formats are: phylip, clustal, msf, nexus, mega, codon
42 |
43 |
--------------------------------------------------------------------------------
/test/test_data/podchecker.txt:
--------------------------------------------------------------------------------
1 | Usage:
2 | podchecker [-help] [-man] [-(no)warnings] [file ...]
3 |
4 | Options and Arguments:
5 | -help Print a brief help message and exit.
6 |
7 | -man Print the manual page and exit.
8 |
9 | -warnings -nowarnings
10 | Turn on/off printing of warnings. Repeating -warnings increases
11 | the warning level, i.e. more warnings are printed. Currently
12 | increasing to level two causes flagging of unescaped "<,>"
13 | characters.
14 |
15 | file The pathname of a POD file to syntax-check (defaults to standard
16 | input).
17 |
--------------------------------------------------------------------------------
/test/test_data/samtools.txt:
--------------------------------------------------------------------------------
1 |
2 | Program: samtools (Tools for alignments in the SAM format)
3 | Version: 1.9 (using htslib 1.9)
4 |
5 | Usage: samtools [options]
6 |
7 | Commands:
8 | -- Indexing
9 | dict create a sequence dictionary file
10 | faidx index/extract FASTA
11 | fqidx index/extract FASTQ
12 | index index alignment
13 |
14 | -- Editing
15 | calmd recalculate MD/NM tags and '=' bases
16 | fixmate fix mate information
17 | reheader replace BAM header
18 | targetcut cut fosmid regions (for fosmid pool only)
19 | addreplacerg adds or replaces RG tags
20 | markdup mark duplicates
21 |
22 | -- File operations
23 | collate shuffle and group alignments by name
24 | cat concatenate BAMs
25 | merge merge sorted alignments
26 | mpileup multi-way pileup
27 | sort sort alignment file
28 | split splits a file by read group
29 | quickcheck quickly check if SAM/BAM/CRAM file appears intact
30 | fastq converts a BAM to a FASTQ
31 | fasta converts a BAM to a FASTA
32 |
33 | -- Statistics
34 | bedcov read depth per BED region
35 | depth compute the depth
36 | flagstat simple stats
37 | idxstats BAM index stats
38 | phase phase heterozygotes
39 | stats generate stats (former bamcheck)
40 |
41 | -- Viewing
42 | flags explain BAM flags
43 | tview text alignment viewer
44 | view SAM<->BAM<->CRAM conversion
45 | depad convert padded BAM to unpadded BAM
46 |
47 |
--------------------------------------------------------------------------------
/test/test_data/samtools_bedcov.txt:
--------------------------------------------------------------------------------
1 | Usage: samtools bedcov [options] [...]
2 |
3 | Options:
4 | -Q mapping quality threshold [0]
5 | -j do not include deletions (D) and ref skips (N) in bedcov computation
6 | --input-fmt-option OPT[=VAL]
7 | Specify a single input file format option in the form
8 | of OPTION or OPTION=VALUE
9 | --reference FILE
10 | Reference sequence FASTA FILE [null]
11 |
--------------------------------------------------------------------------------
/test/test_data/samtools_dict.txt:
--------------------------------------------------------------------------------
1 | About: Create a sequence dictionary file from a fasta file
2 | Usage: samtools dict [options]
3 |
4 | Options: -a, --assembly STR assembly
5 | -H, --no-header do not print @HD line
6 | -o, --output STR file to write out dict file [stdout]
7 | -s, --species STR species
8 | -u, --uri STR URI [file:///abs/path/to/file.fa]
9 |
10 |
--------------------------------------------------------------------------------
/test/test_data/samtools_pl.txt:
--------------------------------------------------------------------------------
1 | Program: samtools.pl (helper script for SAMtools)
2 | Version: 0.3.3
3 | Contact: Heng Li
4 |
5 | Usage: samtools.pl []
6 |
7 | Command: varFilter filtering SNPs and short indels
8 | pileup2fq generate fastq from `pileup -c'
9 | showALEN print alignment length (ALEN) following CIGAR
10 |
11 |
--------------------------------------------------------------------------------
/test/test_data/samtools_quickcheck.txt:
--------------------------------------------------------------------------------
1 | Usage: samtools quickcheck [options] [...]
2 | Options:
3 | -v verbose output (repeat for more verbosity)
4 | -q suppress warning messages
5 |
6 | Notes:
7 |
8 | 1. By default quickcheck will emit a warning message if and only if a file
9 | fails the checks, in which case the exit status is non-zero. Under normal
10 | behaviour with valid data it will be silent and has a zero exit status.
11 | The warning messages are purely for manual inspection and should not be
12 | parsed by scripts.
13 |
14 | 2. In order to use this command programmatically, you should check its exit
15 | status. One way to use quickcheck might be as a check that all BAM files in
16 | a directory are okay:
17 |
18 | samtools quickcheck *.bam && echo 'all ok' \
19 | || echo 'fail!'
20 |
21 | The first level of verbosity lists only files that fail to stdout.
22 | To obtain a parsable list of files that have failed, use this option:
23 |
24 | samtools quickcheck -qv *.bam > bad_bams.fofn \
25 | && echo 'all ok' \
26 | || echo 'some files failed check, see bad_bams.fofn'
27 |
--------------------------------------------------------------------------------
/test/test_explore_e2e.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import tempfile
4 | from unittest.mock import Mock, patch
5 |
6 | import pytest
7 |
8 | from aclimatise import explore_command
9 | from aclimatise.model import Command, Positional
10 |
11 | from .util import (
12 | HelpText,
13 | all_ids,
14 | all_tests,
15 | convert_validate,
16 | ensure_conda,
17 | skip_not_installed,
18 | )
19 |
20 |
21 | @pytest.mark.parametrize("test", all_tests, ids=all_ids)
22 | def test_explore(test: HelpText):
23 | """
24 | A comprehensive end-to-end test that tests the parser and converters, after exploring a given command
25 | """
26 | if not shutil.which(test.cmd[0]):
27 | pytest.skip("{} is not installed".format(test.cmd[0]))
28 |
29 | try:
30 | ensure_conda()
31 | except:
32 | pytest.skip("Not in a conda environment")
33 |
34 | # For speed's sake, only explore to depth 2
35 | command = explore_command(test.cmd, max_depth=1)
36 |
37 | # Check we parsed correctly
38 | test.run_assertions(command, explore=True)
39 |
40 |
41 | @skip_not_installed("dinosaur")
42 | @pytest.mark.timeout(360)
43 | def test_explore_dinosaur():
44 | """
45 | Python has an issue with killing process trees, whereby the subprocess runs another subprocess.
46 | This tests that dinosaur
47 | :return:
48 | """
49 | command = explore_command(["dinosaur"], max_depth=1)
50 |
51 |
52 | @pytest.mark.skipif(not shutil.which("bwa"), reason="bwa is not installed")
53 | def test_explore_bwa():
54 | """
55 | This tests specifically that exploring bwa yields a proper bwa mem
56 | """
57 | command = explore_command(["bwa"], max_depth=1)
58 |
59 | # Check that we parsed bwa mem correctly
60 | mem = [cmd for cmd in command.subcommands if cmd.command[1] == "mem"][0]
61 | assert len(mem.positional) == 3
62 | assert len(mem.subcommands) == 0
63 | assert len(mem.named) >= 30
64 |
65 |
66 | def test_repeat_positionals():
67 | """
68 | Test that, if we have multiple duplicate positionals, only the first is tested
69 | """
70 | parent = Command(
71 | command=[],
72 | positional=[
73 | Positional(name="a", description="", position=i) for i in range(10)
74 | ],
75 | )
76 | child = Command(command=[])
77 |
78 | count = 0
79 |
80 | def mock_convert(*args, **kwargs):
81 | nonlocal count
82 | if count == 0:
83 | count += 1
84 | return parent
85 | return child
86 |
87 | # with patch("aclimatise.execution.help.CliHelpExecutor.explore", new=lambda *args, **kwargs: child):
88 | with patch(
89 | "aclimatise.execution.help.CliHelpExecutor.convert",
90 | new=Mock(side_effect=mock_convert),
91 | ) as mocked:
92 | explore_command([])
93 |
94 | # We should only call convert twice, once for the parent and once for the child, since there's only one unique positional
95 | assert mocked.call_count == 2
96 |
--------------------------------------------------------------------------------
/test/test_model.py:
--------------------------------------------------------------------------------
1 | from aclimatise.model import Command
2 |
3 |
4 | def test_reanalyse(samtools_cmd: Command):
5 | """
6 | Test the command.reanalyse() method
7 | """
8 | reanalysed = samtools_cmd.reanalyse()
9 | assert reanalysed.help_text == samtools_cmd.help_text
10 | assert len(reanalysed.subcommands) == len(samtools_cmd.subcommands)
11 |
12 | re_sort = reanalysed["sort"]
13 | assert len(re_sort.positional) > 0
14 | assert len(re_sort.named) > 0
15 |
--------------------------------------------------------------------------------
/test/test_parse_e2e.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 |
4 | import pytest
5 | from pkg_resources import resource_filename
6 |
7 | from aclimatise.integration import parse_help
8 |
9 | from .util import (
10 | HelpText,
11 | all_ids,
12 | all_tests,
13 | all_tests_lookup,
14 | convert_validate,
15 | validate_cwl,
16 | validate_wdl,
17 | )
18 |
19 |
20 | @pytest.mark.parametrize("test", all_tests, ids=all_ids)
21 | def test_all(test: HelpText):
22 | """
23 | A comprehensive end-to-end test that tests the parser and converters, using the test data files
24 | """
25 | with open(resource_filename("test", test.path)) as fp:
26 | help_text = fp.read()
27 |
28 | cmd = parse_help(test.cmd, help_text)
29 |
30 | # Check that the help text is included in the command
31 | assert cmd.help_text == help_text
32 |
33 | test.run_assertions(cmd, explore=False)
34 |
35 |
36 | @pytest.mark.timeout(20)
37 | def test_long_text():
38 | """
39 | This tests the case where the parse function is handed an inordinate amount of text. In this case, we shouldn't
40 | bother parsing, and just return an empty command
41 | """
42 | text = "\n".join(
43 | [
44 | "".join(
45 | random.choices(
46 | string.ascii_letters + " ",
47 | weights=[1] * len(string.ascii_letters) + [5],
48 | k=100,
49 | )
50 | )
51 | for i in range(2000)
52 | ]
53 | )
54 | command = parse_help(["some", "command"], text=text)
55 | assert len(command.positional) == 0
56 | assert len(command.named) == 0
57 |
--------------------------------------------------------------------------------
/test/test_type_inference.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from aclimatise.cli_types import (
4 | CliBoolean,
5 | CliDir,
6 | CliFile,
7 | CliFloat,
8 | CliInteger,
9 | CliString,
10 | CliType,
11 | )
12 | from aclimatise.model import CliArgument, EmptyFlagArg, Flag, SimpleFlagArg, infer_type
13 |
14 |
15 | @pytest.mark.parametrize(
16 | "string,typ",
17 | [
18 | ("", None),
19 | ("int", CliInteger()),
20 | ("size", CliInteger()),
21 | ("length", CliInteger()),
22 | ("max", CliInteger()),
23 | ("min", CliInteger()),
24 | ("str", CliString()),
25 | ("float", CliFloat()),
26 | ("decimal", CliFloat()),
27 | ("bool", CliBoolean()),
28 | ("file", CliFile()),
29 | ("path", CliFile()),
30 | ("input file", CliFile(output=False)),
31 | ("output file", CliFile(output=True)),
32 | ("folder", CliDir()),
33 | ("directory", CliDir()),
34 | ("output directory", CliDir(output=True)),
35 | ("blah 23 blub", CliInteger()),
36 | ("nonsense 23.42", CliFloat()),
37 | (".42 gibberish", CliFloat()),
38 | ("1E-5", CliFloat()),
39 | ("BOOL Output strand bias files, 'true' or 'false'", CliBoolean()),
40 | ("file to write out dict file [stdout]", CliFile(output=True)),
41 | ("Filename to output the counts to instead of stdout.", CliFile(output=True)),
42 | pytest.param(
43 | "Write out all SAM alignment records into SAM/BAM files (one per input file needed), annotating each line with its feature assignment (as an optional field with tag 'XF'). See the -p option to use BAM instead of SAM.",
44 | CliFile(output=True),
45 | marks=pytest.mark.xfail(
46 | reason="This description doesn't make it clear that it wants an output file. I'm not sure how this could ever be parsed"
47 | ),
48 | ),
49 | ],
50 | )
51 | def test_type_inference(string, typ):
52 | inferred_type = infer_type(string)
53 | assert inferred_type == typ
54 |
55 |
56 | @pytest.mark.parametrize(
57 | "flag,typ",
58 | [
59 | [
60 | Flag(
61 | description="Filename to output the counts to instead of stdout.",
62 | synonyms=["-c", "--counts_output"],
63 | args=SimpleFlagArg("OUTPUT_FILENAME"),
64 | ),
65 | CliFile(output=True),
66 | ],
67 | pytest.param(
68 | Flag(
69 | description="redirect output to specified file\ndefault: undefined",
70 | synonyms=["-o"],
71 | args=EmptyFlagArg(),
72 | ),
73 | CliFile(output=True),
74 | marks=pytest.mark.xfail(
75 | reason="Because the help doesn't indicate an argument, we can't know that this is an output file"
76 | ),
77 | ),
78 | ],
79 | )
80 | def test_flag_type_inference(flag: CliArgument, typ: CliType):
81 | inferred_type = flag.get_type()
82 | assert inferred_type == typ
83 |
--------------------------------------------------------------------------------
/test/test_yaml_dump.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 |
3 | from aclimatise.integration import parse_help
4 | from aclimatise.yaml import yaml
5 |
6 |
7 | def test_round_trip(bwamem_help):
8 | command = parse_help(["bwa", "mem"], bwamem_help)
9 |
10 | # Dump
11 | buffer = StringIO()
12 | yaml.dump(command, buffer)
13 |
14 | # Load
15 | buffer.seek(0)
16 | output = yaml.load(buffer)
17 |
18 | # Assert the round trip worked
19 | assert command == output
20 |
--------------------------------------------------------------------------------
/test/usage/test_usage.py:
--------------------------------------------------------------------------------
1 | from itertools import chain
2 |
3 | import pytest
4 |
5 | from aclimatise.flag_parser.elements import arg_expression, flag_with_arg, list_type_arg
6 | from aclimatise.model import Flag, RepeatFlagArg, SimpleFlagArg
7 | from aclimatise.usage_parser.elements import ( # short_flag_list,
8 | stack,
9 | usage,
10 | usage_element,
11 | )
12 | from aclimatise.usage_parser.model import UsageElement
13 |
14 |
15 | def test_bwa():
16 | txt = "Usage: bwa mem [options] [in2.fq]"
17 | els = usage.parseString(txt)
18 | print(els)
19 |
20 |
21 | @pytest.mark.skip(
22 | "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash"
23 | )
24 | def test_samtools_merge_short_flags():
25 | text = "-nurlf"
26 | els = short_flag_list.parseString(text)
27 | assert len(els) == 5
28 | assert isinstance(els[0], Flag)
29 |
30 |
31 | @pytest.mark.skip(
32 | "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash"
33 | )
34 | def test_samtools_merge_optional_short_flags():
35 | text = "[-nurlf]"
36 | els = usage_element.parseString(text)
37 | assert len(els) == 5
38 | assert isinstance(els[0], Flag)
39 | assert els[0].optional
40 |
41 |
42 | def test_samtools_merge_variable():
43 | text = ""
44 | els = usage_element.parseString(text)
45 | assert len(els) == 1
46 | assert isinstance(els[0], UsageElement)
47 | assert els[0].variable
48 |
49 |
50 | def test_samtools_merge_flag_arg():
51 | text = "-h inh.sam"
52 | els = usage_element.parseString(text)
53 | assert len(els) == 1
54 | assert isinstance(els[0], Flag)
55 | assert isinstance(els[0].args, SimpleFlagArg)
56 |
57 |
58 | def test_samtools_merge_optional_flag_arg():
59 | text = "[-h inh.sam]"
60 | els = usage_element.setDebug().parseString(text)
61 | assert len(els) == 1
62 | assert isinstance(els[0], Flag)
63 | assert els[0].optional
64 | assert isinstance(els[0].args, SimpleFlagArg)
65 |
66 |
67 | def test_samtools_merge_list_args():
68 | text = "[ ... ]"
69 | el = usage_element.parseString(text)
70 | assert len(el) == 1
71 | assert isinstance(el[0], UsageElement)
72 | assert el[0].repeatable
73 |
74 |
75 | def test_samtools_merge_full(process, usage_parser):
76 | text = process(
77 | """
78 | Usage: samtools merge [-nurlf] [-h inh.sam] [-b ] [ ... ]
79 | """
80 | )
81 | command = usage_parser.parse_usage(cmd=["samtools", "merge"], usage=text)
82 |
83 | assert len(command.positional) == 3
84 | assert command.positional[0].name == "out.bam"
85 | assert command.positional[1].name == "in1.bam"
86 |
87 | assert len(command.named) == 3
88 | assert command.all_synonyms == {"-nurlf", "-h", "-b"}
89 |
90 |
91 | def test_pisces_usage(usage_parser):
92 | text = "USAGE: dotnet Pisces.dll -bam -g "
93 | command = usage_parser.parse_usage(["pisces"], text)
94 | assert len(command.named) == 2
95 | assert len(command.positional) == 0
96 | assert command.all_synonyms == {"-bam", "-g"}
97 |
98 |
99 | def test_trailing_text(process, usage_parser):
100 | """
101 | Tests that the usage parser will not parse text after the usage section has ended
102 | """
103 | text = process(
104 | """
105 | usage: htseq-count [options] alignment_file gff_file
106 |
107 | This script takes one or more alignment files in SAM/BAM format and a feature
108 | file in GFF format and calculates for each feature the number of reads mapping
109 | to it. See http://htseq.readthedocs.io/en/master/count.html for details.
110 | """
111 | )
112 | command = usage_parser.parse_usage(["htseq-count"], text)
113 | # We don't count either the command "htseq-count", or "[options]" as an argument, so there are only 2 positionals
114 | assert len(command.positional) == 2
115 |
116 |
117 | def test_bwt2sa(usage_parser):
118 | text = """
119 | Usage: bwa bwt2sa [-i 32]
120 | """
121 |
122 | command = usage_parser.parse_usage(["bwa", "bwt2sa"], text)
123 |
124 | # in and out
125 | assert len(command.positional) == 2
126 |
127 | # -i
128 | assert len(command.named) == 1
129 |
130 |
131 | def test_bedtools_multiinter_flag_arg():
132 | text = " FILE1 FILE2 .. FILEn"
133 | arg = arg_expression.parseString(text)[0]
134 | assert isinstance(arg, RepeatFlagArg)
135 | assert arg.name == "FILEn"
136 |
137 |
138 | def test_bedtools_multiinter_flag():
139 | text = "-i FILE1 FILE2 .. FILEn"
140 | arg = flag_with_arg.parseString(text)[0]
141 | assert isinstance(arg.argtype, RepeatFlagArg)
142 | assert arg.name == "-i"
143 |
144 |
145 | def test_bedtools_multiinter(usage_parser):
146 | text = """
147 | Summary: Identifies common intervals among multiple
148 | BED/GFF/VCF files.
149 |
150 | Usage: bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn
151 | Requires that each interval file is sorted by chrom/start.
152 |
153 | Options:
154 | -cluster Invoke Ryan Layers's clustering algorithm.
155 | """
156 |
157 | command = usage_parser.parse_usage(["bedtools", "multiinter"], text)
158 |
159 | assert len(command.positional) == 0
160 | assert len(command.named) == 1
161 | assert command.named[0].longest_synonym == "-i"
162 | assert isinstance(command.named[0].args, RepeatFlagArg)
163 |
164 |
165 | def test_samtools_dict(usage_parser):
166 | text = """
167 | Usage: samtools dict [options]
168 | """
169 | command = usage_parser.parse_usage(["samtools", "dict"], text, debug=True)
170 | assert len(command.positional) == 1
171 |
172 |
173 | def test_mid_line_usage(usage_parser):
174 | text = """
175 | Can't open --usage: No such file or directory at /usr/bin/samtools.pl line 50.
176 | """
177 | command = usage_parser.parse_usage(["samtools.pl", "showALEN"], text, debug=True)
178 | assert command.empty
179 |
180 |
181 | def test_usage_description_block(usage_parser):
182 | text = """
183 | Usage:
184 | shell [options] -e string
185 | execute string in V8
186 | shell [options] file1 file2 ... filek
187 | run JavaScript scripts in file1, file2, ..., filek
188 | shell [options]
189 | shell [options] --shell [file1 file2 ... filek]
190 | run an interactive JavaScript shell
191 | d8 [options] file1 file2 ... filek
192 | d8 [options]
193 | d8 [options] --shell [file1 file2 ... filek]
194 | run the new debugging shell
195 | """
196 | command = usage_parser.parse_usage(["typeHLA.js"], text, debug=True)
197 |
198 | positional_names = {pos.name for pos in command.positional}
199 | flag_synonyms = set(chain.from_iterable([flag.synonyms for flag in command.named]))
200 |
201 | assert "shell" in positional_names
202 | assert "filek" in positional_names
203 | assert "d8" in positional_names
204 |
205 | assert "--shell" in flag_synonyms
206 | assert "-e" in flag_synonyms
207 |
--------------------------------------------------------------------------------