├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.rst
├── aclimatise
    ├── __init__.py
    ├── cli.py
    ├── cli_types.py
    ├── converter
    │   ├── __init__.py
    │   ├── cwl.py
    │   ├── janis.py
    │   ├── wdl.py
    │   └── yml.py
    ├── execution
    │   ├── __init__.py
    │   ├── docker.py
    │   ├── help.py
    │   ├── local.py
    │   └── man.py
    ├── flag_parser
    │   ├── __init__.py
    │   ├── elements.py
    │   └── parser.py
    ├── integration.py
    ├── model.py
    ├── name_generation.py
    ├── nlp.py
    ├── parser.py
    ├── usage_parser
    │   ├── __init__.py
    │   ├── elements.py
    │   ├── model.py
    │   └── parser.py
    └── yaml.py
├── docs
    ├── Makefile
    ├── README.md
    ├── _static
    │   └── railroad.html
    ├── api.rst
    ├── changes.rst
    ├── cli.rst
    ├── conf.py
    ├── grammar.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    └── model.rst
├── environment.yml
├── setup.cfg
├── setup.py
└── test
    ├── __init__.py
    ├── conftest.py
    ├── executors
        ├── __init__.py
        ├── test_docker.py
        ├── test_local.py
        └── test_man.py
    ├── flags
        ├── conftest.py
        ├── test_bedtools.py
        ├── test_bwa.py
        ├── test_bwakit.py
        ├── test_gth.py
        ├── test_htseq.py
        ├── test_pisces.py
        ├── test_podchecker.py
        ├── test_samtools.py
        └── test_singularity.py
    ├── name_generation
        ├── __init__.py
        ├── conftest.py
        ├── test_batch.py
        ├── test_case.py
        ├── test_description.py
        ├── test_group.py
        └── test_single_flag.py
    ├── test_cli.py
    ├── test_convert.py
    ├── test_data
        ├── bedtools.txt
        ├── bedtools
        │   ├── bedtools.yml
        │   ├── bedtools_annotate.yml
        │   ├── bedtools_bamtobed.yml
        │   ├── bedtools_bamtofastq.yml
        │   ├── bedtools_bed12tobed6.yml
        │   ├── bedtools_bedpetobam.yml
        │   ├── bedtools_bedtobam.yml
        │   ├── bedtools_closest.yml
        │   ├── bedtools_cluster.yml
        │   ├── bedtools_complement.yml
        │   ├── bedtools_coverage.yml
        │   ├── bedtools_expand.yml
        │   ├── bedtools_fisher.yml
        │   ├── bedtools_flank.yml
        │   ├── bedtools_genomecov.yml
        │   ├── bedtools_getfasta.yml
        │   ├── bedtools_groupby.yml
        │   ├── bedtools_igv.yml
        │   ├── bedtools_intersect.yml
        │   ├── bedtools_jaccard.yml
        │   ├── bedtools_links.yml
        │   ├── bedtools_makewindows.yml
        │   ├── bedtools_map.yml
        │   ├── bedtools_maskfasta.yml
        │   ├── bedtools_merge.yml
        │   ├── bedtools_multicov.yml
        │   ├── bedtools_multiinter.yml
        │   ├── bedtools_nuc.yml
        │   ├── bedtools_overlap.yml
        │   ├── bedtools_pairtobed.yml
        │   ├── bedtools_pairtopair.yml
        │   ├── bedtools_random.yml
        │   ├── bedtools_reldist.yml
        │   ├── bedtools_sample.yml
        │   ├── bedtools_shift.yml
        │   ├── bedtools_shuffle.yml
        │   ├── bedtools_slop.yml
        │   ├── bedtools_sort.yml
        │   ├── bedtools_spacing.yml
        │   ├── bedtools_split.yml
        │   ├── bedtools_subtract.yml
        │   ├── bedtools_tag.yml
        │   ├── bedtools_unionbedg.yml
        │   └── bedtools_window.yml
        ├── bedtools_closest.txt
        ├── bedtools_coverage.txt
        ├── bedtools_multiinter.txt
        ├── bedtools_random.txt
        ├── bedtools_spacing.txt
        ├── bedtools_subtract.txt
        ├── bedtools_window.txt
        ├── bowtie2_build.txt
        ├── bwa.txt
        ├── bwa_bwt2sa.txt
        ├── bwa_bwtupdate.txt
        ├── bwa_index.txt
        ├── bwa_mem.txt
        ├── dinosaur.txt
        ├── gth.txt
        ├── htseq_count.txt
        ├── mauve.txt
        ├── pisces.txt
        ├── podchecker.txt
        ├── samtools.txt
        ├── samtools
        │   ├── samtools.yml
        │   ├── samtools_addreplacerg.yml
        │   ├── samtools_bedcov.yml
        │   ├── samtools_calmd.yml
        │   ├── samtools_cat.yml
        │   ├── samtools_collate.yml
        │   ├── samtools_depad.yml
        │   ├── samtools_depth.yml
        │   ├── samtools_dict.yml
        │   ├── samtools_faidx.yml
        │   ├── samtools_fasta.yml
        │   ├── samtools_fastq.yml
        │   ├── samtools_fixmate.yml
        │   ├── samtools_flags.yml
        │   ├── samtools_flagstat.yml
        │   ├── samtools_fqidx.yml
        │   ├── samtools_idxstats.yml
        │   ├── samtools_index.yml
        │   ├── samtools_markdup.yml
        │   ├── samtools_merge.yml
        │   ├── samtools_mpileup.yml
        │   ├── samtools_phase.yml
        │   ├── samtools_quickcheck.yml
        │   ├── samtools_reheader.yml
        │   ├── samtools_sort.yml
        │   ├── samtools_split.yml
        │   ├── samtools_stats.yml
        │   ├── samtools_targetcut.yml
        │   ├── samtools_tview.yml
        │   └── samtools_view.yml
        ├── samtools_bedcov.txt
        ├── samtools_dict.txt
        ├── samtools_pl.txt
        ├── samtools_quickcheck.txt
        └── typeHLA.txt
    ├── test_explore_e2e.py
    ├── test_model.py
    ├── test_parse_e2e.py
    ├── test_type_inference.py
    ├── test_yaml_dump.py
    ├── usage
        └── test_usage.py
    └── util.py


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name:
  2 | 
  3 | on: [push, pull_request]
  4 | 
  5 | jobs:
  6 |   test_conda:
  7 |     # Run tests using conda, which gives us bioinformatics tools installed
  8 |     runs-on: ubuntu-latest
  9 |     container: continuumio/miniconda3
 10 |     steps:
 11 |       - uses: actions/checkout@v2
 12 | 
 13 |       - name: apt dependencies, and fix the `man` pages
 14 |         run: |
 15 |           rm /etc/dpkg/dpkg.cfg.d/docker
 16 |           apt-get update && apt install -y --reinstall man coreutils manpages build-essential git git-man
 17 |           mandb --create
 18 | 
 19 |       - uses: actions/setup-dotnet@v1.7.2
 20 |         with:
 21 |           dotnet-version: "2.1.x"
 22 | 
 23 |       - name: Tests
 24 |         run: |
 25 |           source /root/.bashrc
 26 |           conda env create
 27 |           conda activate aclimatise-test
 28 |           pip install --upgrade pip wheel setuptools
 29 |           pip install -e .[dev]
 30 |           python -m spacy download en
 31 |           pytest --tb=native --log-cli-level INFO
 32 |         shell: bash
 33 | 
 34 |   test_system:
 35 |     runs-on: ubuntu-latest
 36 | 
 37 |     strategy:
 38 |       matrix:
 39 |         python-version: [3.6, 3.7, 3.8]
 40 | 
 41 |     steps:
 42 |       - uses: actions/checkout@v2
 43 | 
 44 |       - name: Set up Python ${{ matrix.python-version }}
 45 |         uses: actions/setup-python@v1
 46 |         with:
 47 |           python-version: ${{ matrix.python-version }}
 48 | 
 49 |       - name: Install dependencies
 50 |         run: |
 51 |           pip install --upgrade pip wheel setuptools
 52 |           pip install -e .[dev]
 53 |           python -m spacy download en
 54 | 
 55 |       - uses: pre-commit/action@v1.0.1
 56 | 
 57 |       - name: Test with pytest
 58 |         run: |
 59 |           pytest --tb=native
 60 | 
 61 |       - name: Test the documentation still builds
 62 |         run: |
 63 |           cd docs
 64 |           make html
 65 | 
 66 |   publish:
 67 |     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
 68 |     needs: [test_system, test_conda]
 69 |     runs-on: ubuntu-latest
 70 |     steps:
 71 |       - uses: actions/checkout@v2
 72 | 
 73 |       - name: Set up Python ${{ matrix.python-version }}
 74 |         uses: actions/setup-python@v1
 75 |         with:
 76 |           python-version: 3.8
 77 | 
 78 |       - name: Compile package
 79 |         run: |
 80 |           pip install -U wheel setuptools
 81 |           pip install -e .[dev]
 82 |           python -m spacy download en
 83 |           python setup.py sdist bdist_wheel
 84 | 
 85 |       - name: Create GitHub Release
 86 |         id: create_release
 87 |         uses: actions/create-release@v1
 88 |         env:
 89 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 90 |         with:
 91 |           tag_name: ${{ github.ref }}
 92 |           release_name: Release ${{ github.ref }}
 93 |           draft: false
 94 |           prerelease: false
 95 | 
 96 |       - name: Publish package
 97 |         uses: pypa/gh-action-pypi-publish@master
 98 |         with:
 99 |           user: __token__
100 |           password: ${{ secrets.pypi_password }}
101 | 
102 |       - name: Generate railroad diagram
103 |         run: |
104 |           # We need an unpublished version of PyParsing for this
105 |           pip install -U git+https://github.com/pyparsing/pyparsing.git#egg=pyparsing[diagrams]
106 |           aclimatise railroad > docs/_static/railroad.html
107 | 
108 |       - name: Build the documentation
109 |         run: |
110 |           cd docs
111 |           make html
112 | 
113 |       - name: Deploy docs
114 |         uses: peaceiris/actions-gh-pages@v3
115 |         with:
116 |           github_token: ${{ secrets.GITHUB_TOKEN }}
117 |           publish_dir: docs/_build/html
118 |           publish_branch: gh-pages
119 |           enable_jekyll: false
120 | 
121 |       - name: Update Basecamp
122 |         uses: peter-evans/repository-dispatch@v1
123 |         with:
124 |           token: ${{ secrets.REPOACCESSTOKEN }}
125 |           event-type: aclimatise-update
126 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .env
 95 | .venv
 96 | env/
 97 | venv/
 98 | ENV/
 99 | env.bak/
100 | venv.bak/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | .dmypy.json
115 | dmypy.json
116 | 
117 | # Pyre type checker
118 | .pyre/
119 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | known_third_party = WDL,click,cwl_utils,cwltool,inflection,pkg_resources,pyhash,pyparsing,pytest,ruamel,setuptools,spacy,wdlgen,wordsegment,regex,num2words,word2number,psutil,packaging,docker,attr
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/timothycrosley/isort
 3 |     rev: 4.3.21-2
 4 |     hooks:
 5 |       - id: isort
 6 |         types: [python]
 7 |         args:
 8 |           - "--multi-line=3"
 9 |           - "--trailing-comma"
10 |           - "--force-grid-wrap=0"
11 |           - "--use-parentheses"
12 |           - "--line-width=88"
13 | 
14 |   - repo: https://github.com/psf/black
15 |     rev: 20.8b1
16 |     hooks:
17 |       - id: black
18 | 
19 |   - repo: https://github.com/pre-commit/mirrors-prettier
20 |     rev: v2.1.2
21 |     hooks:
22 |       - id: prettier
23 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | aCLImatise
 2 | ***********
 3 | |DOI|
 4 | 
 5 | .. |DOI| image:: https://zenodo.org/badge/DOI/10.1093/bioinformatics/btaa1033.svg
 6 |    :target: https://doi.org/10.1093/bioinformatics/btaa1033
 7 | 
 8 | For the full documentation, refer to the `Github Pages Website
 9 | <https://aclimatise.github.io/CliHelpParser/>`_.
10 | 
11 | ======================================================================
12 | 
13 | aCLImatise is a Python library and command-line utility for parsing the help output
14 | of a command-line tool and then outputting a description of the tool in a more
15 | structured format, for example a
16 | `Common Workflow Language tool definition <https://www.commonwl.org/v1.1/CommandLineTool.html>`_.
17 | 
18 | Currently aCLImatise supports both `CWL <https://www.commonwl.org/>`_ and
19 | `WDL <https://openwdl.org/>`_ outputs, but other formats will be considered in the future, especially pull
20 | requests to support them.
21 | 
22 | Please also refer to `The aCLImatise Base Camp <https://aclimatise.github.io/BaseCamp/>`_, which is a database of pre-computed tool definitions
23 | generated by the aCLImatise parser. Most bioinformatics tools have a tool definition already generated in the Base Camp,
24 | so you may not need to run aCLImatise directly.
25 | 
26 | aCLImatise is now published in the journal *Bioinformatics*. You can read the application note here: https://doi.org/10.1093/bioinformatics/btaa1033.
27 | To cite aCLImatise, please use the citation generator provided by the journal.
28 | 
29 | Example
30 | -------
31 | 
32 | Lets say you want to create a CWL workflow containing the common Unix ``wc`` (word count)
33 | utility. Running ``wc --help`` returns:
34 | 
35 | .. code-block::
36 | 
37 |    Usage: wc [OPTION]... [FILE]...
38 |      or: wc [OPTION]... --files0-from=F
39 |    Print newline, word, and byte counts for each FILE, and a total line if
40 |    more than one FILE is specified.  A word is a non-zero-length sequence of
41 |    characters delimited by white space.
42 | 
43 |    With no FILE, or when FILE is -, read standard input.
44 | 
45 |    The options below may be used to select which counts are printed, always in
46 |    the following order: newline, word, character, byte, maximum line length.
47 |      -c, --bytes            print the byte counts
48 |      -m, --chars            print the character counts
49 |      -l, --lines            print the newline counts
50 |          --files0-from=F    read input from the files specified by
51 |                               NUL-terminated names in file F;
52 |                               If F is - then read names from standard input
53 |      -L, --max-line-length  print the maximum display width
54 |      -w, --words            print the word counts
55 |          --help display this help and exit
56 |          --version output version information and exit
57 | 
58 |    GNU coreutils online help: <http://www.gnu.org/software/coreutils/>
59 |    Full documentation at: <http://www.gnu.org/software/coreutils/wc>
60 |    or available locally via: info '(coreutils) wc invocation'
61 | 
62 | If you run ``aclimatise explore wc``, which means "parse the wc command and all subcommands",
63 | you'll end up with the following files in your current directory:
64 | 
65 | * ``wc.cwl``
66 | * ``wc.wdl``
67 | * ``wc.yml``
68 | 
69 | These are representations of the command ``wc`` in 3 different formats. If you look at ``wc.wdl``, you'll see that it
70 | contains a WDL-compatible tool definition for ``wc``:
71 | 
72 | .. code-block:: text
73 | 
74 |     version 1.0
75 |     task Wc {
76 |       input {
77 |         Boolean bytes
78 |         Boolean chars
79 |         Boolean lines
80 |         String files__from
81 |         Boolean max_line_length
82 |         Boolean words
83 |       }
84 |       command <<<
85 |         wc \
86 |           ~{true="--bytes" false="" bytes} \
87 |           ~{true="--chars" false="" chars} \
88 |           ~{true="--lines" false="" lines} \
89 |           ~{if defined(files__from) then ("--files0-from " +  '"' + files__from + '"') else ""} \
90 |           ~{true="--max-line-length" false="" max_line_length} \
91 |           ~{true="--words" false="" words}
92 |       >>>
93 |     }
94 | 


--------------------------------------------------------------------------------
/aclimatise/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from aclimatise.converter import WrapperGenerator
 4 | from aclimatise.converter.cwl import CwlGenerator
 5 | from aclimatise.converter.janis import JanisGenerator
 6 | from aclimatise.converter.wdl import WdlGenerator
 7 | from aclimatise.converter.yml import YmlGenerator
 8 | from aclimatise.execution import Executor
 9 | from aclimatise.execution.docker import DockerExecutor
10 | from aclimatise.execution.local import LocalExecutor
11 | from aclimatise.execution.man import ManPageExecutor
12 | from aclimatise.integration import parse_help
13 | from aclimatise.model import Command, Flag
14 | from deprecated import deprecated
15 | 
16 | default_executor = LocalExecutor()
17 | 
18 | 
19 | @deprecated(
20 |     reason="Please use the explore method on the executors directly. e.g. `LocalExecutor().explore()`"
21 | )
22 | def explore_command(
23 |     cmd: typing.List[str],
24 |     flags: typing.Iterable[str] = (["--help"], ["-h"], [], ["--usage"]),
25 |     parent: typing.Optional[Command] = None,
26 |     max_depth: int = 2,
27 |     try_subcommand_flags=True,
28 |     executor: Executor = default_executor,
29 | ) -> typing.Optional[Command]:
30 |     """
31 |     Given a command to start with, builds a model of this command and all its subcommands (if they exist).
32 |     Use this if you know the command you want to parse, you don't know which flags it responds to with help text, and
33 |     you want to include subcommands.
34 |     """
35 |     return executor.explore(cmd, max_depth=max_depth, parent=parent)
36 | 
37 | 
38 | __all__ = [
39 |     CwlGenerator,
40 |     WdlGenerator,
41 |     YmlGenerator,
42 |     JanisGenerator,
43 |     LocalExecutor,
44 |     DockerExecutor,
45 |     ManPageExecutor,
46 |     explore_command,
47 |     parse_help,
48 | ]
49 | 


--------------------------------------------------------------------------------
/aclimatise/cli.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code relating to the command line interface to aCLImatise
  3 | """
  4 | import sys
  5 | from pathlib import Path
  6 | from typing import Iterable, Tuple
  7 | 
  8 | import click
  9 | 
 10 | from aclimatise import WrapperGenerator, explore_command, parse_help
 11 | from aclimatise.execution.local import LocalExecutor
 12 | from aclimatise.execution.man import ManPageExecutor
 13 | from aclimatise.flag_parser.parser import CliParser
 14 | 
 15 | # Some common options
 16 | opt_generate_names = click.option(
 17 |     "--generate-names",
 18 |     "-g",
 19 |     is_flag=True,
 20 |     help=(
 21 |         "Rather than using the long flag to generate the argument name, generate them automatically using the "
 22 |         "flag description. Generally helpful if there are no long flags, only short flags."
 23 |     ),
 24 | )
 25 | opt_case = click.option(
 26 |     "--case",
 27 |     "-c",
 28 |     type=click.Choice(WrapperGenerator.cases),
 29 |     help=(
 30 |         "Which case to use for variable names. If not set, defaults to the language defaults: snake_case for CWL"
 31 |         " and snake_case for WDL"
 32 |     ),
 33 |     default="snake",
 34 | )
 35 | opt_cmd = click.argument("cmd", nargs=-1, required=True)
 36 | 
 37 | 
 38 | @click.group()
 39 | def main():
 40 |     pass
 41 | 
 42 | 
 43 | @main.command(help="Run an executable and explore all subcommands")
 44 | @opt_cmd
 45 | @opt_case
 46 | @opt_generate_names
 47 | @click.option(
 48 |     "--man",
 49 |     "-m",
 50 |     is_flag=True,
 51 |     help="Parse the help using its man page, rather than by executing the command. This will fail if the man page doesn't exist",
 52 | )
 53 | @click.option(
 54 |     "--depth",
 55 |     "-d",
 56 |     type=int,
 57 |     default=1,
 58 |     help="How many levels of subcommands we should look for. Depth 2 means commands can be 3 levels deep, such as "
 59 |     "``git submodule foreach``",
 60 | )
 61 | @click.option(
 62 |     "--format",
 63 |     "-f",
 64 |     "formats",
 65 |     type=click.Choice(["wdl", "cwl", "yml"]),
 66 |     multiple=True,
 67 |     default=("yml", "wdl", "cwl"),
 68 |     help="The language in which to output the CLI wrapper",
 69 | )
 70 | @click.option(
 71 |     "--out-dir",
 72 |     "-o",
 73 |     type=Path,
 74 |     help="Directory in which to put the output files",
 75 |     default=Path(),
 76 | )
 77 | @click.option(
 78 |     "--help-flag",
 79 |     "-l",
 80 |     type=str,
 81 |     help="Flag to append to the end of the command to make it output help text",
 82 | )
 83 | @click.option(
 84 |     "--subcommands/--no-subcommands", default=True, help="Look for subcommands"
 85 | )
 86 | def explore(
 87 |     cmd: Iterable[str],
 88 |     out_dir: Path,
 89 |     formats: Tuple[str],
 90 |     subcommands: bool,
 91 |     case: str,
 92 |     generate_names: bool,
 93 |     man: bool,
 94 |     help_flag: str,
 95 |     depth: int = None,
 96 | ):
 97 |     # We only support these two executors via CLI because the docker executor would require some additional config
 98 |     if man:
 99 |         exec = ManPageExecutor()
100 |     else:
101 |         kwargs = {}
102 |         if help_flag is not None:
103 |             kwargs["flags"] = [[help_flag]]
104 |         exec = LocalExecutor(**kwargs)
105 | 
106 |     if subcommands:
107 |         command = exec.explore(list(cmd), max_depth=depth)
108 |     else:
109 |         command = exec.convert(list(cmd))
110 | 
111 |     for format in formats:
112 |         converter_cls = WrapperGenerator.choose_converter(format)
113 |         converter = converter_cls(
114 |             generate_names=generate_names,
115 |             case=case,
116 |         )
117 |         list(converter.generate_tree(command, out_dir))
118 | 
119 | 
120 | @main.command(
121 |     help="Read a command help from stdin and output a tool definition to stdout"
122 | )
123 | @opt_cmd
124 | @opt_generate_names
125 | @opt_case
126 | @click.option(
127 |     "--format",
128 |     "-f",
129 |     type=click.Choice(["wdl", "cwl", "yml", "janis"]),
130 |     default="cwl",
131 |     help="The language in which to output the CLI wrapper",
132 | )
133 | def pipe(cmd, generate_names, case, format):
134 |     stdin = "".join(sys.stdin.readlines())
135 |     command = parse_help(cmd, stdin)
136 | 
137 |     converter_cls = WrapperGenerator.choose_converter(format)
138 |     converter = converter_cls(
139 |         generate_names=generate_names,
140 |         case=case,
141 |     )
142 |     output = converter.save_to_string(command)
143 |     print(output)
144 | 
145 | 
146 | @main.command(help="Output a representation of the internal grammar")
147 | def railroad():
148 |     try:
149 |         from pyparsing.diagram import to_railroad, railroad_to_html
150 | 
151 |         parser = CliParser()
152 |         railroad = to_railroad(parser.flags)
153 |         sys.stdout.write(railroad_to_html(railroad))
154 |     except ImportError:
155 |         print(
156 |             "You need PyParsing 3.0.0a2 or greater to use this feature", file=sys.stderr
157 |         )
158 |         sys.exit(1)
159 | 
160 | 
161 | if __name__ == "__main__":
162 |     main()
163 | 


--------------------------------------------------------------------------------
/aclimatise/cli_types.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains the objects that represent a "type" of data a flag argument might store
  3 | """
  4 | import typing
  5 | from enum import Enum
  6 | 
  7 | import attr
  8 | 
  9 | from aclimatise.yaml import AttrYamlMixin
 10 | 
 11 | 
 12 | @attr.s(auto_attribs=True, frozen=True)
 13 | class CliType(AttrYamlMixin):
 14 |     """
 15 |     A data type used in the command-line
 16 |     """
 17 | 
 18 |     @staticmethod
 19 |     def lowest_common_type(types: typing.Iterable["CliType"]) -> "CliType":
 20 |         type_set: typing.Set[typing.Type[CliType]] = {type(t) for t in types}
 21 | 
 22 |         if len(type_set) == 1:
 23 |             # If there is only one type, use it
 24 |             return next(iter(types))
 25 | 
 26 |         if len(type_set) == 2 and CliInteger in type_set and CliFloat in type_set:
 27 |             # If they're all numeric, they can be represented as floats
 28 |             return CliFloat()
 29 | 
 30 |         if {
 31 |             CliDir,
 32 |             CliDict,
 33 |             CliFile,
 34 |             CliTuple,
 35 |             CliList,
 36 |         } & type_set:
 37 |             # These complex types cannot be represented in a simpler way
 38 |             raise Exception(
 39 |                 "There is no common type between {}".format(
 40 |                     ", ".join([str(typ) for typ in type_set])
 41 |                 )
 42 |             )
 43 | 
 44 |         else:
 45 |             # Most of the time, strings can be used to represent primitive types
 46 |             return CliString()
 47 | 
 48 |     @property
 49 |     def representable(self) -> set:
 50 |         """
 51 |         Returns a set of types that this type could alternatively be represented as.
 52 |         Adds the class's own type to the _representable set
 53 |         """
 54 |         return self._representable.union({type(self)})
 55 | 
 56 |     # The list of types that this specific type could be representable as
 57 |     _representable = set()
 58 | 
 59 | 
 60 | @attr.s(auto_attribs=True, frozen=True)
 61 | class CliEnum(CliType):
 62 |     """
 63 |     One of a list of possible options
 64 |     """
 65 | 
 66 |     enum: Enum
 67 |     """
 68 |     The possible options as a Python Enum
 69 |     """
 70 | 
 71 | 
 72 | @attr.s(auto_attribs=True, frozen=True)
 73 | class CliFloat(CliType):
 74 |     """
 75 |     Takes a floating-point value
 76 |     """
 77 | 
 78 |     pass
 79 | 
 80 | 
 81 | @attr.s(auto_attribs=True, frozen=True)
 82 | class CliInteger(CliType):
 83 |     """
 84 |     Takes an integer value
 85 |     """
 86 | 
 87 |     _representable = {CliFloat}
 88 | 
 89 | 
 90 | @attr.s(auto_attribs=True, frozen=True)
 91 | class CliString(CliType):
 92 |     """
 93 |     Takes a string value
 94 |     """
 95 | 
 96 |     pass
 97 | 
 98 | 
 99 | @attr.s(auto_attribs=True, frozen=True)
100 | class CliBoolean(CliType):
101 |     """
102 |     Takes a boolean value
103 |     """
104 | 
105 |     pass
106 | 
107 | 
108 | @attr.s(auto_attribs=True, frozen=True)
109 | class CliFileSystemType(CliType):
110 |     """
111 |     Takes a directory / file path
112 |     """
113 | 
114 |     output: bool = False
115 |     """
116 |     Indicator if it is input or output
117 |     """
118 | 
119 | 
120 | @attr.s(auto_attribs=True, frozen=True)
121 | class CliDir(CliFileSystemType):
122 |     """
123 |     Takes a directory path
124 |     """
125 | 
126 |     pass
127 | 
128 | 
129 | @attr.s(auto_attribs=True, frozen=True)
130 | class CliFile(CliFileSystemType):
131 |     """
132 |     Takes a file path
133 |     """
134 | 
135 |     pass
136 | 
137 | 
138 | @attr.s(auto_attribs=True, frozen=True)
139 | class CliDict(CliType):
140 |     """
141 |     Takes a dictionary value
142 |     """
143 | 
144 |     key: CliType
145 |     """
146 |     Data type of the keys to this dictionary
147 |     """
148 | 
149 |     value: CliType
150 |     """
151 |     Data type of the values to this dictionary
152 |     """
153 | 
154 | 
155 | @attr.s(auto_attribs=True, frozen=True)
156 | class CliList(CliType):
157 |     """
158 |     Takes a list value
159 |     """
160 | 
161 |     value: CliType
162 |     """
163 |     Data type of the values in this list
164 |     """
165 | 
166 | 
167 | @attr.s(auto_attribs=True, frozen=True)
168 | class CliTuple(CliType):
169 |     """
170 |     Takes a list of values with a fixed length, possibly each with different types
171 |     """
172 | 
173 |     values: typing.List[CliType]
174 |     """
175 |     List of types, in order, held within the tuple
176 |     """
177 | 
178 |     @property
179 |     def homogenous(self):
180 |         """
181 |         A tuple is homogenous if all types in the tuple are the same, aka the set of all types has length 1
182 |         """
183 |         return len(set([type(x) for x in self.values])) == 1
184 | 


--------------------------------------------------------------------------------
/aclimatise/converter/__init__.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from itertools import groupby, zip_longest
  3 | from os import PathLike
  4 | from pathlib import Path
  5 | from typing import Generator, Iterable, List, Set, TextIO, Tuple, Type
  6 | 
  7 | import attr
  8 | 
  9 | from aclimatise.model import CliArgument, Command, Flag
 10 | from aclimatise.name_generation import (
 11 |     NameGenerationError,
 12 |     choose_unique_name,
 13 |     generate_name,
 14 |     generate_names_nlp,
 15 |     generate_names_segment,
 16 |     name_to_camel,
 17 |     name_to_snake,
 18 | )
 19 | from aclimatise.yaml import AttrYamlMixin
 20 | 
 21 | 
 22 | @attr.s(
 23 |     auto_attribs=True,
 24 | )
 25 | class NamedArgument(AttrYamlMixin):
 26 |     arg: CliArgument
 27 |     name: str
 28 | 
 29 | 
 30 | @attr.s(
 31 |     auto_attribs=True,
 32 | )
 33 | class WrapperGenerator(AttrYamlMixin):
 34 |     """
 35 |     Abstract base class for a class that converts a Command object into a string that defines a tool
 36 |     wrapper in a certain workflow language
 37 |     """
 38 | 
 39 |     cases = ["snake", "camel"]
 40 | 
 41 |     @classmethod
 42 |     def get_subclasses(cls) -> List[Type["WrapperGenerator"]]:
 43 |         return cls.__subclasses__()
 44 | 
 45 |     @classmethod
 46 |     def choose_converter(cls, typ) -> Type["WrapperGenerator"]:
 47 |         """
 48 |         Returns a converter subclass, given a converter type name
 49 |         :param type: The type of converter, e.g. 'cwl' or 'wdl'
 50 |         """
 51 |         for subclass in cls.__subclasses__():
 52 |             if subclass.format() == typ:
 53 |                 return subclass
 54 | 
 55 |         raise Exception("Unknown format type")
 56 | 
 57 |     @classmethod
 58 |     @abstractmethod
 59 |     def format(cls) -> str:
 60 |         """
 61 |         Returns the output format that this generator produces as a string, e.g. "cwl"
 62 |         """
 63 |         pass
 64 | 
 65 |     @abstractmethod
 66 |     def save_to_string(self, cmd: Command) -> str:
 67 |         """
 68 |         Convert the command into a single string, ignoring subcommands
 69 |         """
 70 |         pass
 71 | 
 72 |     def save_to_file(self, cmd: Command, path: Path) -> None:
 73 |         """
 74 |         Write the command into a file
 75 |         """
 76 |         # By default we just write the string out, but subclasses can have different behaviour
 77 |         path.write_text(self.save_to_string(cmd))
 78 | 
 79 |     def generate_tree(
 80 |         self, cmd: Command, out_dir: PathLike
 81 |     ) -> Generator[Tuple[Path, Command], None, None]:
 82 |         out_dir = Path(out_dir)
 83 |         for cmd in cmd.command_tree():
 84 |             path = out_dir / (cmd.as_filename + self.suffix)
 85 |             try:
 86 |                 self.save_to_file(cmd, path)
 87 |             except NameGenerationError as e:
 88 |                 raise NameGenerationError(
 89 |                     'Name generation error for command "{}". {}'.format(
 90 |                         " ".join(cmd.command), e.message
 91 |                     )
 92 |                 )
 93 |             yield path, cmd
 94 | 
 95 |     @property
 96 |     def reserved(self) -> Set[Tuple[str, ...]]:
 97 |         """
 98 |         A list of reserved keywords for this language
 99 |         """
100 |         return set()
101 | 
102 |     @property
103 |     @abstractmethod
104 |     def suffix(self) -> str:
105 |         """
106 |         Returns a suffix for files generated using this converter
107 |         """
108 | 
109 |     def words_to_name(self, words: Iterable[str]):
110 |         """
111 |         Converts a list of tokens, such as ["a", "variable", "name"] to a language-appropriate name, such as
112 |         "aVariableName"
113 |         """
114 |         if self.case == "snake":
115 |             return name_to_snake(words)
116 |         elif self.case == "camel":
117 |             return name_to_camel(words)
118 | 
119 |     def choose_variable_names(
120 |         self, flags: List[CliArgument], length: int = 3
121 |     ) -> List[NamedArgument]:
122 |         """
123 |         Choose names for a list of flags. This needs to be done in one go because there is a risk of duplicate
124 |         variable names otherwise
125 |         :param length: See :py:func:`from aclimatise.name_generation.generate_name`
126 |         """
127 |         options = list(
128 |             zip_longest(
129 |                 generate_names_segment([flag.full_name() for flag in flags]),
130 |                 generate_names_nlp(
131 |                     [flag.description for flag in flags], reserved=self.reserved
132 |                 ),
133 |                 [flag.argument_name() for flag in flags if isinstance(flag, Flag)],
134 |                 fillvalue=[],
135 |             )
136 |         )
137 | 
138 |         return [
139 |             NamedArgument(
140 |                 arg=flag,
141 |                 name=self.words_to_name(
142 |                     choose_unique_name(flag_options, reserved=self.reserved, number=i)
143 |                 ),
144 |             )
145 |             for i, (flag, flag_options) in enumerate(zip(flags, options))
146 |         ]
147 | 
148 |     case: str = "snake"
149 |     """
150 |     Which case to use for variable names
151 |     """
152 | 
153 |     generate_names: bool = True
154 |     """
155 |     Rather than using the long flag to generate the argument name, generate them automatically using the
156 |     flag description. Generally helpful if there are no long flags, only short flags.
157 |     """
158 | 
159 |     ignore_positionals: bool = False
160 |     """
161 |     Don't include positional arguments, for example because the help formatting has some
162 |     misleading sections that look like positional arguments
163 |     """
164 | 


--------------------------------------------------------------------------------
/aclimatise/converter/cwl.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from pathlib import Path
  3 | from typing import List
  4 | 
  5 | import attr
  6 | from cwl_utils.parser_v1_1 import (
  7 |     CommandInputParameter,
  8 |     CommandLineBinding,
  9 |     CommandLineTool,
 10 |     CommandOutputBinding,
 11 |     CommandOutputParameter,
 12 |     DockerRequirement,
 13 | )
 14 | 
 15 | from aclimatise import cli_types
 16 | from aclimatise.cli_types import CliType
 17 | from aclimatise.converter import NamedArgument, WrapperGenerator
 18 | from aclimatise.model import CliArgument, Command, Flag, Positional
 19 | from aclimatise.yaml import yaml
 20 | 
 21 | 
 22 | @attr.s(auto_attribs=True)
 23 | class CwlGenerator(WrapperGenerator):
 24 |     case = "snake"
 25 | 
 26 |     @classmethod
 27 |     def format(cls) -> str:
 28 |         return "cwl"
 29 | 
 30 |     @staticmethod
 31 |     def snake_case(words: list):
 32 |         return "_".join([word.lower() for word in words])
 33 | 
 34 |     @staticmethod
 35 |     def type_to_cwl_type(typ: cli_types.CliType) -> str:
 36 |         """
 37 |         Calculate the CWL type for a CLI type
 38 |         """
 39 |         if isinstance(typ, cli_types.CliFile):
 40 |             return "File"
 41 |         elif isinstance(typ, cli_types.CliDir):
 42 |             return "Directory"
 43 |         elif isinstance(typ, cli_types.CliString):
 44 |             return "string"
 45 |         elif isinstance(typ, cli_types.CliFloat):
 46 |             return "double"
 47 |         elif isinstance(typ, cli_types.CliInteger):
 48 |             return "long"
 49 |         elif isinstance(typ, cli_types.CliBoolean):
 50 |             return "boolean"
 51 |         elif isinstance(typ, cli_types.CliEnum):
 52 |             return "string"
 53 |         elif isinstance(typ, cli_types.CliList):
 54 |             return CwlGenerator.type_to_cwl_type(typ.value) + "[]"
 55 |         elif isinstance(typ, cli_types.CliTuple):
 56 |             return CwlGenerator.type_to_cwl_type(CliType.lowest_common_type(typ.values))
 57 |         else:
 58 |             raise Exception(f"Invalid type {typ}!")
 59 | 
 60 |     @staticmethod
 61 |     def arg_to_cwl_type(arg: CliArgument) -> str:
 62 |         """
 63 |         Calculate the CWL type for an entire argument
 64 |         """
 65 |         typ = arg.get_type()
 66 |         cwl_type = CwlGenerator.type_to_cwl_type(typ)
 67 | 
 68 |         if arg.optional and not cwl_type.endswith("[]"):
 69 |             return cwl_type + "?"
 70 |         else:
 71 |             return cwl_type
 72 | 
 73 |     def get_inputs(self, names: List[NamedArgument]) -> List[CommandInputParameter]:
 74 |         ret = []
 75 |         for arg in names:
 76 |             assert arg.name != "", arg
 77 |             ret.append(
 78 |                 CommandInputParameter(
 79 |                     id="in_" + arg.name,
 80 |                     type=self.arg_to_cwl_type(arg.arg),
 81 |                     inputBinding=CommandLineBinding(
 82 |                         position=arg.arg.position
 83 |                         if isinstance(arg.arg, Positional)
 84 |                         else None,
 85 |                         prefix=arg.arg.longest_synonym
 86 |                         if isinstance(arg.arg, Flag)
 87 |                         else None,
 88 |                     ),
 89 |                     doc=arg.arg.description,
 90 |                 )
 91 |             )
 92 | 
 93 |         return ret
 94 | 
 95 |     def get_outputs(self, names: List[NamedArgument]) -> List[CommandOutputParameter]:
 96 |         ret = [
 97 |             # We default to always capturing stdout
 98 |             CommandOutputParameter(
 99 |                 id="out_stdout",
100 |                 type="stdout",
101 |                 doc="Standard output stream",
102 |             )
103 |         ]
104 | 
105 |         for arg in names:
106 |             typ = arg.arg.get_type()
107 |             if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
108 |                 ret.append(
109 |                     CommandOutputParameter(
110 |                         id="out_" + arg.name,
111 |                         type=self.arg_to_cwl_type(arg.arg),
112 |                         doc=arg.arg.description,
113 |                         outputBinding=CommandOutputBinding(
114 |                             glob="$(inputs.in_{})".format(arg.name)
115 |                         ),
116 |                     )
117 |                 )
118 |         return ret
119 | 
120 |     def command_to_tool(self, cmd: Command) -> CommandLineTool:
121 |         """
122 |         Outputs the CWL wrapper to the provided file
123 |         """
124 |         inputs: List[CliArgument] = [*cmd.named] + (
125 |             [] if self.ignore_positionals else [*cmd.positional]
126 |         )
127 |         names = self.choose_variable_names(inputs)
128 | 
129 |         hints = []
130 |         if cmd.docker_image is not None:
131 |             hints.append(DockerRequirement(dockerPull=cmd.docker_image))
132 | 
133 |         tool = CommandLineTool(
134 |             id=cmd.as_filename + ".cwl",
135 |             baseCommand=list(cmd.command),
136 |             cwlVersion="v1.1",
137 |             inputs=self.get_inputs(names),
138 |             outputs=self.get_outputs(names),
139 |             hints=hints,
140 |         )
141 | 
142 |         return tool
143 | 
144 |     @property
145 |     def suffix(self) -> str:
146 |         return ".cwl"
147 | 
148 |     def save_to_string(self, cmd: Command) -> str:
149 |         io = StringIO()
150 |         yaml.dump(self.command_to_tool(cmd).save(), io)
151 |         return io.getvalue()
152 | 
153 |     def save_to_file(self, cmd: Command, path: Path) -> None:
154 |         map = self.command_to_tool(cmd).save()
155 |         with path.open("w") as fp:
156 |             yaml.dump(map, fp)
157 | 


--------------------------------------------------------------------------------
/aclimatise/converter/janis.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import janis_core as janis
  4 | from aclimatise import cli_types
  5 | from aclimatise.cli_types import CliType
  6 | from aclimatise.converter import NamedArgument, WrapperGenerator
  7 | from aclimatise.model import CliArgument, Command, Flag, Positional
  8 | 
  9 | 
 10 | class JanisGenerator(WrapperGenerator):
 11 |     @classmethod
 12 |     def format(cls) -> str:
 13 |         return "janis"
 14 | 
 15 |     def save_to_string(self, cmd: Command) -> str:
 16 | 
 17 |         clt = self.command_to_tool(cmd)
 18 |         return clt.translate("janis", to_console=False)
 19 | 
 20 |     def command_to_tool(self, cmd: Command) -> janis.CommandToolBuilder:
 21 | 
 22 |         inputs: List[CliArgument] = [*cmd.named] + (
 23 |             [] if self.ignore_positionals else [*cmd.positional]
 24 |         )
 25 |         names = self.choose_variable_names(inputs)
 26 | 
 27 |         tool = janis.CommandToolBuilder(
 28 |             tool=cmd.as_filename,
 29 |             base_command=list(cmd.command),
 30 |             inputs=self.get_inputs(names),
 31 |             outputs=self.get_outputs(names),
 32 |             version="v0.1.0",
 33 |             container=cmd.docker_image,
 34 |         )
 35 | 
 36 |         return tool
 37 | 
 38 |     def type_to_janis_type(
 39 |         self, typ: cli_types.CliType, optional: bool
 40 |     ) -> janis.DataType:
 41 | 
 42 |         if isinstance(typ, cli_types.CliFile):
 43 |             return janis.File(optional=optional)
 44 |         elif isinstance(typ, cli_types.CliDir):
 45 |             return janis.Directory(optional=optional)
 46 |         elif isinstance(typ, cli_types.CliString):
 47 |             return janis.String(optional=optional)
 48 |         elif isinstance(typ, cli_types.CliFloat):
 49 |             return janis.Float(optional=optional)
 50 |         elif isinstance(typ, cli_types.CliInteger):
 51 |             return janis.Int(optional=optional)
 52 |         elif isinstance(typ, cli_types.CliBoolean):
 53 |             return janis.Boolean(optional=optional)
 54 |         elif isinstance(typ, cli_types.CliEnum):
 55 |             return janis.String(optional=optional)
 56 |         elif isinstance(typ, cli_types.CliList):
 57 |             # TODO: how is Array<String?> represented?
 58 |             inner = self.type_to_janis_type(typ.value, optional=False)
 59 |             return janis.Array(inner, optional=optional)
 60 | 
 61 |         elif isinstance(typ, cli_types.CliTuple):
 62 |             return self.type_to_janis_type(
 63 |                 CliType.lowest_common_type(typ.values), optional=False
 64 |             )
 65 |         else:
 66 |             raise Exception(f"Invalid type {typ}!")
 67 | 
 68 |     def arg_to_janis_type(self, arg: CliArgument) -> janis.DataType:
 69 |         return self.type_to_janis_type(arg.get_type(), arg.optional)
 70 | 
 71 |     def get_inputs(self, names: List[NamedArgument]) -> List[janis.ToolInput]:
 72 |         ret = []
 73 |         for arg in names:
 74 |             assert arg.name != "", arg
 75 |             ret.append(
 76 |                 janis.ToolInput(
 77 |                     tag="in_" + arg.name,
 78 |                     input_type=self.arg_to_janis_type(arg.arg),
 79 |                     position=arg.arg.position
 80 |                     if isinstance(arg.arg, Positional)
 81 |                     else None,
 82 |                     prefix=arg.arg.longest_synonym
 83 |                     if isinstance(arg.arg, Flag)
 84 |                     else None,
 85 |                     doc=arg.arg.description,
 86 |                 )
 87 |             )
 88 |         return ret
 89 | 
 90 |     def get_outputs(self, names: List[NamedArgument]) -> List[janis.ToolOutput]:
 91 |         ret = []
 92 |         for arg in names:
 93 |             typ = arg.arg.get_type()
 94 |             if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
 95 |                 ret.append(
 96 |                     janis.ToolOutput(
 97 |                         tag="out_" + arg.name,
 98 |                         output_type=self.arg_to_janis_type(arg.arg),
 99 |                         doc=arg.arg.description,
100 |                         selector=janis.InputSelector("in_" + arg.name),
101 |                     )
102 |                 )
103 |         return ret
104 | 
105 |     @property
106 |     def suffix(self) -> str:
107 |         return ".py"
108 | 


--------------------------------------------------------------------------------
/aclimatise/converter/wdl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for generating WDL from the CLI data model
  3 | """
  4 | import re
  5 | from typing import Iterable, List, Set, Tuple
  6 | 
  7 | from inflection import camelize
  8 | from WDL._grammar import keywords
  9 | from wdlgen import (
 10 |     ArrayType,
 11 |     File,
 12 |     Input,
 13 |     Output,
 14 |     ParameterMeta,
 15 |     PrimitiveType,
 16 |     Task,
 17 |     WdlType,
 18 | )
 19 | 
 20 | from aclimatise import cli_types, model
 21 | from aclimatise.converter import NamedArgument, WrapperGenerator
 22 | from aclimatise.model import CliArgument, Command, Flag, Positional
 23 | from aclimatise.nlp import wordsegment
 24 | 
 25 | #: A regex, borrowed from MiniWDL, that ma
 26 | WDL_IDENT = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
 27 | #: Matches all characters we should remove from a WDL identifier
 28 | WDL_STRIP = re.compile(r"(^[^a-zA-Z])|([^a-zA-Z0-9_])")
 29 | 
 30 | 
 31 | def escape_wdl_str(text: str):
 32 |     """
 33 |     Escape literal quotes in a Python string, to become suitable for WDL
 34 |     """
 35 |     return text.replace('"', '\\"').replace("\n", "\\n")
 36 | 
 37 | 
 38 | def flag_to_command_input(
 39 |     named_flag: NamedArgument, converter: WrapperGenerator
 40 | ) -> Task.Command.CommandInput:
 41 |     args = dict(name=named_flag.name)
 42 | 
 43 |     if isinstance(named_flag.arg, model.Flag):
 44 |         args.update(dict(optional=named_flag.arg.optional))
 45 |         if isinstance(named_flag.arg.args, model.EmptyFlagArg):
 46 |             args.update(dict(true=named_flag.arg.longest_synonym, false=""))
 47 |         else:
 48 |             args.update(
 49 |                 dict(
 50 |                     prefix=named_flag.arg.longest_synonym,
 51 |                 )
 52 |             )
 53 |     elif isinstance(named_flag, model.Positional):
 54 |         args.update(dict(optional=False, position=named_flag.position))
 55 | 
 56 |     return Task.Command.CommandInput.from_fields(**args)
 57 | 
 58 | 
 59 | class WdlGenerator(WrapperGenerator):
 60 |     @property
 61 |     def suffix(self) -> str:
 62 |         return ".wdl"
 63 | 
 64 |     case = "snake"
 65 | 
 66 |     @property
 67 |     def reserved(self) -> Set[Tuple[str, ...]]:
 68 |         # Steal the keywords list from miniWDL
 69 |         return {tuple(wordsegment.segment(key)) for key in keywords["1.0"]}
 70 | 
 71 |     @classmethod
 72 |     def format(cls) -> str:
 73 |         return "wdl"
 74 | 
 75 |     @classmethod
 76 |     def type_to_wdl(cls, typ: cli_types.CliType, optional: bool = False) -> WdlType:
 77 |         if isinstance(typ, cli_types.CliString):
 78 |             return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
 79 |         elif isinstance(typ, cli_types.CliFloat):
 80 |             return WdlType(PrimitiveType(PrimitiveType.kFloat), optional=optional)
 81 |         elif isinstance(typ, cli_types.CliBoolean):
 82 |             return WdlType(PrimitiveType(PrimitiveType.kBoolean), optional=optional)
 83 |         elif isinstance(typ, cli_types.CliInteger):
 84 |             return WdlType(PrimitiveType(PrimitiveType.kInt), optional=optional)
 85 |         elif isinstance(typ, cli_types.CliFile):
 86 |             return WdlType(PrimitiveType(PrimitiveType.kFile), optional=optional)
 87 |         elif isinstance(typ, cli_types.CliDir):
 88 |             return WdlType(PrimitiveType(PrimitiveType.kDirectory), optional=optional)
 89 |         elif isinstance(typ, cli_types.CliTuple):
 90 |             if typ.homogenous:
 91 |                 return WdlType(
 92 |                     ArrayType(
 93 |                         cls.type_to_wdl(typ.values[0]), requires_multiple=not optional
 94 |                     )
 95 |                 )
 96 |             else:
 97 |                 return WdlType(
 98 |                     ArrayType(
 99 |                         cls.type_to_wdl(
100 |                             cli_types.CliType.lowest_common_type(typ.values)
101 |                         ),
102 |                         requires_multiple=not optional,
103 |                     )
104 |                 )
105 |         elif isinstance(typ, cli_types.CliList):
106 |             return WdlType(
107 |                 ArrayType(cls.type_to_wdl(typ.value), requires_multiple=not optional)
108 |             )
109 |         elif isinstance(typ, cli_types.CliEnum):
110 |             return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
111 |         else:
112 |             return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional)
113 | 
114 |     def make_inputs(self, named: Iterable[NamedArgument]) -> List[Input]:
115 |         return [
116 |             Input(
117 |                 data_type=self.type_to_wdl(
118 |                     named_arg.arg.get_type(), optional=named_arg.arg.optional
119 |                 ),
120 |                 name=named_arg.name,
121 |             )
122 |             for named_arg in named
123 |         ]
124 | 
125 |     def make_command(self, cmd: Command, inputs: List[NamedArgument]) -> Task.Command:
126 |         return Task.Command(
127 |             command=" ".join([WDL_STRIP.sub("_", tok) for tok in cmd.command]),
128 |             inputs=[
129 |                 flag_to_command_input(input, self)
130 |                 for input in inputs
131 |                 if isinstance(input.arg, Positional)
132 |             ],
133 |             arguments=[
134 |                 flag_to_command_input(input, self)
135 |                 for input in inputs
136 |                 if isinstance(input.arg, Flag)
137 |             ],
138 |         )
139 | 
140 |     def make_parameter_meta(self, named: Iterable[NamedArgument]) -> ParameterMeta:
141 |         params = {}
142 |         for named_arg in named:
143 |             params[named_arg.name] = escape_wdl_str(named_arg.arg.description)
144 | 
145 |         return ParameterMeta(**params)
146 | 
147 |     def make_task_name(self, cmd: Command) -> str:
148 |         return camelize(
149 |             "_".join([WDL_STRIP.sub("", token) for token in cmd.command]).replace(
150 |                 "-", "_"
151 |             )
152 |         )
153 | 
154 |     def make_outputs(self, names: List[NamedArgument]) -> List[Output]:
155 |         ret = [
156 |             # We default to always capturing stdout
157 |             Output(data_type=File, name="out_stdout", expression="stdout()")
158 |         ]
159 |         for arg in names:
160 |             typ = arg.arg.get_type()
161 |             if isinstance(typ, cli_types.CliFileSystemType) and typ.output:
162 |                 ret.append(
163 |                     Output(
164 |                         data_type=self.type_to_wdl(typ),
165 |                         name="out_" + arg.name,
166 |                         expression='"${{in_{}}}"'.format(arg.name),
167 |                     )
168 |                 )
169 | 
170 |         return ret
171 | 
172 |     def save_to_string(self, cmd: Command) -> str:
173 |         inputs: List[CliArgument] = [*cmd.named] + (
174 |             [] if self.ignore_positionals else [*cmd.positional]
175 |         )
176 |         names = self.choose_variable_names(inputs)
177 |         runtime = Task.Runtime()
178 |         runtime.add_docker(cmd.docker_image)
179 | 
180 |         tool = Task(
181 |             name=self.make_task_name(cmd),
182 |             command=self.make_command(cmd, names),
183 |             version="1.0",
184 |             inputs=self.make_inputs(names),
185 |             outputs=self.make_outputs(names),
186 |             parameter_meta=self.make_parameter_meta(names),
187 |             runtime=runtime,
188 |         )
189 | 
190 |         return tool.get_string()
191 | 


--------------------------------------------------------------------------------
/aclimatise/converter/yml.py:
--------------------------------------------------------------------------------
 1 | from io import StringIO
 2 | from os import PathLike
 3 | from pathlib import Path
 4 | from typing import Generator, List
 5 | 
 6 | import attr
 7 | 
 8 | from aclimatise.converter import WrapperGenerator
 9 | from aclimatise.model import Command
10 | from aclimatise.yaml import yaml
11 | 
12 | 
13 | @attr.s(auto_attribs=True)
14 | class YmlGenerator(WrapperGenerator):
15 |     """
16 |     Internal YML format
17 |     """
18 | 
19 |     @property
20 |     def suffix(self) -> str:
21 |         return ".yml"
22 | 
23 |     def save_to_file(self, cmd: Command, path: Path) -> None:
24 |         with path.open("w") as fp:
25 |             yaml.dump(cmd, fp)
26 | 
27 |     def save_to_string(self, cmd: Command) -> str:
28 |         buffer = StringIO()
29 |         yaml.dump(cmd, buffer)
30 |         return buffer.getvalue()
31 | 
32 |     @classmethod
33 |     def format(cls) -> str:
34 |         return "yml"
35 | 


--------------------------------------------------------------------------------
/aclimatise/execution/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module is concerned with running the actual commands so that we can parse their output
 3 | """
 4 | import abc
 5 | from typing import List, Optional
 6 | 
 7 | from aclimatise.model import Command
 8 | 
 9 | 
10 | class Executor(abc.ABC):
11 |     """
12 |     An executor is anything that can take a command such as ["bwa"] or
13 |     ["samtools", "sort"] and return the help output
14 |     """
15 | 
16 |     def __init__(
17 |         self, timeout: int = 10, raise_on_timout=False, max_length: Optional[int] = 1000
18 |     ):
19 |         """
20 |         :param timeout: Amount of inactivity before the execution will be killed
21 |         :param raise_on_timout: If true, execute will raise a TimeoutError if it
22 |             times out
23 |         """
24 |         # Here we initialise all shared parameters that are used by all executors
25 |         self.timeout = timeout
26 |         self.raise_on_timeout = raise_on_timout
27 |         self.max_length = max_length
28 | 
29 |     def handle_timeout(self, e: Exception) -> str:
30 |         """
31 |         Subclasses can call this when a timeout has occurred
32 |         :param e: The timeout exception that caused the timeout
33 |         """
34 |         if self.raise_on_timeout:
35 |             raise TimeoutError()
36 |         else:
37 |             return ""
38 | 
39 |     def explore(
40 |         self,
41 |         command: List[str],
42 |         max_depth: int = 2,
43 |         parent: Optional[Command] = None,
44 |     ) -> Command:
45 |         """
46 |         Given a command to start with, builds a model of this command and all its subcommands (if they exist)
47 |         """
48 |         # If the executor doesn't implement a specific exploration technique, we just execute and ignore subcommands
49 |         return self.convert(command)
50 | 
51 |     @abc.abstractmethod
52 |     def convert(self, command: List[str]) -> Command:
53 |         """
54 |         Convert a single executable to a Command object, without considering subcommands
55 |         """
56 |         pass
57 | 


--------------------------------------------------------------------------------
/aclimatise/execution/docker.py:
--------------------------------------------------------------------------------
 1 | import select
 2 | import socket
 3 | import time
 4 | from select import select as original_select
 5 | from typing import List, Tuple
 6 | from unittest.mock import patch
 7 | 
 8 | from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter
 9 | 
10 | from aclimatise.execution.help import CliHelpExecutor
11 | from aclimatise.model import Command
12 | 
13 | 
14 | def read_socket(sock, timeout: int = None) -> Tuple[bytes, bytes]:
15 |     """
16 |     Reads from a docker socket, and returns everything
17 |     :param sock: Docker socket to read from
18 |     :param timeout: Number of seconds after which we return all data collected
19 |     :return: A tuple of stdout, stderr
20 |     """
21 |     start_time = time.time()
22 |     out = [b"", b""]
23 |     for frame in frames_iter(sock, tty=False):
24 |         frame = demux_adaptor(*frame)
25 | 
26 |         # If we hit the timeout, return anyawy
27 |         if time.time() >= start_time + timeout:
28 |             return tuple(out)
29 | 
30 |         assert frame != (None, None)
31 | 
32 |         if frame[0] is not None:
33 |             out[0] += frame[0]
34 |         else:
35 |             out[1] += frame[1]
36 |     return tuple(out)
37 | 
38 | 
39 | class DockerExecutor(CliHelpExecutor):
40 |     """
41 |     An executor that runs the commands on an already-running docker Container (not an Image!)
42 |     """
43 | 
44 |     def __init__(
45 |         self, container: "docker.models.containers.Container", save_image=True, **kwargs
46 |     ):
47 |         """
48 |         :param container: The object from the Docker API that represents the running container to run inside
49 |         :param save_image: If true (default), save the image name on the command, meaning that the resulting tool
50 |             definitions also use this Docker image
51 |         """
52 |         super().__init__(**kwargs)
53 |         self.container = container
54 |         self.save_image = save_image
55 | 
56 |     def convert(
57 |         self,
58 |         cmd: List[str],
59 |     ) -> Command:
60 |         # Use the existing function, but patch in the docker image
61 |         cmd = super().convert(cmd)
62 |         if self.save_image:
63 |             cmd.docker_image = self.container.image.tags[0]
64 |         return cmd
65 | 
66 |     def execute(self, command: List[str]) -> str:
67 |         _, sock = self.container.exec_run(
68 |             command, stdout=True, stderr=True, demux=True, socket=True
69 |         )
70 |         try:
71 |             # These are timeouts that define how long to wait while nothing is being output
72 |             sock._sock.settimeout(self.timeout)
73 |             with patch.object(
74 |                 select,
75 |                 "select",
76 |                 new=lambda rlist, wlist, xlist: original_select(
77 |                     rlist, wlist, xlist, self.timeout
78 |                 ),
79 |             ):
80 |                 stdout, stderr = read_socket(sock, timeout=self.timeout)
81 |         except socket.timeout as e:
82 |             return self.handle_timeout(e)
83 | 
84 |         return (stdout or stderr or b"").decode()
85 | 


--------------------------------------------------------------------------------
/aclimatise/execution/help.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import copy
  3 | import logging
  4 | from typing import Iterable, List, Optional
  5 | 
  6 | from pyparsing import ParseBaseException
  7 | 
  8 | from aclimatise.execution import Executor
  9 | from aclimatise.integration import parse_help
 10 | from aclimatise.model import Command
 11 | 
 12 | logger = logging.getLogger()
 13 | 
 14 | 
 15 | class CliHelpExecutor(Executor):
 16 |     """
 17 |     This is an abstract class for any executor that works with command-line help conventions like using help flags in
 18 |     order to obtain the help text.
 19 |     """
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         flags: Iterable[str] = (["--help"], ["-h"], [], ["--usage"]),
 24 |         try_subcommand_flags=True,
 25 |         **kwargs
 26 |     ):
 27 |         super().__init__(**kwargs)
 28 |         self.flags = flags
 29 |         self.try_subcommand_flags = try_subcommand_flags
 30 | 
 31 |     def explore(
 32 |         self,
 33 |         command: List[str],
 34 |         max_depth: int = 2,
 35 |         parent: Optional[Command] = None,
 36 |     ) -> Optional[Command]:
 37 | 
 38 |         logger.info("Exploring {}".format(" ".join(command)))
 39 |         best = self.convert(command)
 40 |         best.parent = parent
 41 | 
 42 |         # Check if this is a valid subcommand
 43 |         if parent:
 44 |             if best.valid_subcommand():
 45 |                 logger.info(
 46 |                     "{} seems to be a valid subcommand".format(" ".join(command))
 47 |                 )
 48 |             else:
 49 |                 logger.info(
 50 |                     "{} does not seem to be a valid subcommand".format(
 51 |                         " ".join(command)
 52 |                     )
 53 |                 )
 54 |                 return None
 55 | 
 56 |         # Recursively call this function on positionals, but only do this if we aren't at max depth
 57 |         if best.depth < max_depth:
 58 |             # By default we use the best parent help-flag
 59 |             child_executor = copy.copy(self)
 60 |             child_executor.flags = (
 61 |                 self.flags if self.try_subcommand_flags else [best.generated_using]
 62 |             )
 63 | 
 64 |             # Try each *unique* positional
 65 |             for positional in {positional.name for positional in best.positional}:
 66 |                 subcommand = child_executor.explore(
 67 |                     command=command + [positional],
 68 |                     parent=best,
 69 |                     max_depth=max_depth,
 70 |                 )
 71 |                 if subcommand is not None:
 72 |                     best.subcommands.append(subcommand)
 73 |                     # If we had any subcommands then we probably don't have any positionals, or at least don't care about them
 74 |                     best.positional = []
 75 | 
 76 |         return best
 77 | 
 78 |     @abc.abstractmethod
 79 |     def execute(self, cmd: List[str]) -> str:
 80 |         """
 81 |         Executes the provided command and returns a string containing the output
 82 |         """
 83 |         pass
 84 | 
 85 |     def convert(
 86 |         self,
 87 |         cmd: List[str],
 88 |     ) -> Command:
 89 |         """
 90 |         Determine the best Command instance for a given command line tool, by trying many
 91 |         different help flags, such as --help and -h, then return the Command. Use this if you know the command you want to
 92 |         parse, but you don't know which flags it responds to with help text. Unlike :py:func:`aclimatise.explore_command`,
 93 |         this doesn't even attempt to parse subcommands.
 94 | 
 95 |         :param cmd: The command to analyse, e.g. ['wc'] or ['bwa', 'mem']
 96 |         :param flags: A list of help flags to try, e.g. ['--help', '-h'], in order how which one you would prefer to use.
 97 |         Generally [] aka no flags should be last
 98 |         :param executor: A class that provides the means to run a command. You can use the pre-made classes or write your own.
 99 |         """
100 |         # For each help flag, run the command and then try to parse it
101 |         logger.info("Trying flags for {}".format(" ".join(cmd)))
102 |         commands = []
103 |         for flag in self.flags:
104 |             help_cmd = cmd + flag
105 |             logger.info("Trying {}".format(" ".join(help_cmd)))
106 |             try:
107 |                 final = self.execute(help_cmd)
108 |                 result = parse_help(cmd, final, max_length=self.max_length)
109 |                 result.generated_using = flag
110 |                 commands.append(result)
111 |             except (ParseBaseException, UnicodeDecodeError) as e:
112 |                 # If parsing fails, this wasn't the right flag to use
113 |                 continue
114 | 
115 |         # Sort by flags primarily, and if they're equal, return the command with the longest help text, and if they're equal
116 |         # return the command with the most help flags. This helps ensure we get ["bedtools", "--help"] instead of
117 |         # ["bedtools"]
118 |         best = Command.best(commands)
119 |         logger.info(
120 |             "The best help flag seems to be {}".format(
121 |                 " ".join(best.command + best.generated_using)
122 |             )
123 |         )
124 |         return best
125 | 


--------------------------------------------------------------------------------
/aclimatise/execution/local.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions that relate to executing the programs of interest, in order to extract their help text
 3 | """
 4 | import os
 5 | import pty
 6 | import signal
 7 | import subprocess
 8 | import sys
 9 | from typing import List
10 | 
11 | import psutil
12 | 
13 | from aclimatise.execution.help import CliHelpExecutor
14 | 
15 | 
16 | def kill_proc_tree(pid, sig=signal.SIGTERM, include_parent=True):
17 |     """
18 |     Kill a process tree (including grandchildren) with signal "sig" and return a (gone, still_alive) tuple.
19 |     "on_terminate", if specified, is a callabck function which is called as soon as a child terminates.
20 | 
21 |     Taken from https://psutil.readthedocs.io/en/latest/#kill-process-tree
22 |     """
23 |     assert pid != os.getpid(), "won't kill myself"
24 |     parent = psutil.Process(pid)
25 |     children = parent.children(recursive=True)
26 |     if include_parent:
27 |         children.append(parent)
28 |     for p in children:
29 |         p.send_signal(sig)
30 | 
31 | 
32 | class LocalExecutor(CliHelpExecutor):
33 |     def __init__(self, popen_args: dict = {}, **kwargs):
34 |         super().__init__(**kwargs)
35 |         self.popen_args = popen_args
36 | 
37 |     def execute(self, command: List[str]) -> str:
38 |         master, slave = pty.openpty()
39 |         popen_kwargs = dict(
40 |             stdout=subprocess.PIPE,
41 |             stderr=subprocess.PIPE,
42 |             stdin=slave,
43 |             encoding="utf-8",
44 |         )
45 |         popen_kwargs.update(self.popen_args)
46 | 
47 |         # This works a lot like subprocess.run, but we need access to the pid in order to kill the process tree, so use Popen
48 |         with subprocess.Popen(command, **popen_kwargs) as process:
49 |             try:
50 |                 stdout, stderr = process.communicate(timeout=self.timeout)
51 |             except subprocess.TimeoutExpired as e:
52 |                 # Kill the entire process tree, because sometimes killing the parent isn't enough
53 |                 kill_proc_tree(
54 |                     process.pid,
55 |                     include_parent=True,
56 |                     sig=signal.SIGKILL if sys.platform == "linux" else None,
57 |                 )
58 |                 process.communicate()
59 |                 return self.handle_timeout(e)
60 |             finally:
61 |                 os.close(master)
62 |                 os.close(slave)
63 | 
64 |         return stdout or stderr
65 | 


--------------------------------------------------------------------------------
/aclimatise/execution/man.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | from typing import Collection, List, Optional
 4 | 
 5 | from aclimatise.execution import Executor
 6 | from aclimatise.integration import parse_help
 7 | from aclimatise.model import Command
 8 | 
 9 | 
10 | class ManPageExecutor(Executor):
11 |     def __init__(
12 |         self,
13 |         man_paths: List[str] = [],
14 |         subcommand_sep: Collection[str] = ("-", "_"),
15 |         man_flags: Collection[str] = ["--no-subpages"],
16 |         **kwargs
17 |     ):
18 |         """
19 |         :param man_paths: Additional paths within which to look for man pages
20 |         :param subcommand_sep: A list of separators to use to generate man paths from subcommands. For example
21 |             ``git branch`` has an associated man page at ``git-branch``, using the hyphen as a separator.
22 |         :param man_flags: Additional flags to pass to the ``man`` command
23 |         """
24 |         super().__init__(**kwargs)
25 |         self.man_paths = man_paths
26 |         self.subcommand_sep = subcommand_sep
27 |         self.man_flags = man_flags
28 | 
29 |     def execute_with_sep(self, command: List[str], separator: str = "-") -> str:
30 |         """
31 |         Returns the man page text for the provided command, using the provided subcommand separator, or an empty string
32 |         if this man page doesn't exist
33 |         """
34 |         env = {**os.environ.copy(), "MANPAGER": "cat"}  # Don't use a pager
35 |         if len(self.man_paths) > 0:
36 |             env.update({"MANPATH": ":".join(self.man_paths)})
37 | 
38 |         sub_man = separator.join(command)
39 |         result = subprocess.run(
40 |             ["man", *self.man_flags, sub_man],
41 |             env=env,
42 |             stdout=subprocess.PIPE,
43 |             stderr=subprocess.PIPE,
44 |         )
45 |         if result.returncode == 0:
46 |             return result.stdout.decode()
47 | 
48 |         return ""
49 | 
50 |     def convert(self, command: List[str]) -> Command:
51 |         if len(command) == 1:
52 |             return parse_help(
53 |                 command, self.execute_with_sep(command), max_length=self.max_length
54 |             )
55 |         else:
56 |             commands = []
57 |             for sep in self.subcommand_sep:
58 |                 man_text = self.execute_with_sep(command, sep)
59 |                 commands.append(
60 |                     parse_help(command, man_text, max_length=self.max_length)
61 |                 )
62 |             return Command.best(commands)
63 | 


--------------------------------------------------------------------------------
/aclimatise/flag_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/flag_parser/__init__.py


--------------------------------------------------------------------------------
/aclimatise/flag_parser/elements.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Re-usable parser elements that aren't tied to the parser object
  3 | """
  4 | from typing import List
  5 | 
  6 | from pyparsing import *
  7 | 
  8 | from aclimatise.model import *
  9 | 
 10 | #: Characters that delimit flag synonyms
 11 | synonym_delim_chars = ",|/"
 12 | #: Characters that can start a CLI element, e.g. "-@"
 13 | element_start_chars = alphanums + "@"
 14 | #: Characters that can be in the middle of a CLI element, e.g. "-some-arg"
 15 | element_body_chars = element_start_chars + "-_."
 16 | #: Characters that can only be used in arguments for flags e.g. "<file.fa|file.fa.gz>"
 17 | argument_body_chars = element_body_chars + "|"
 18 | #: Characters that can be in the middle of an argument that has brackets around it, e.g. "-arg <argument with space>"
 19 | delimited_body_chars = argument_body_chars + r" \/"
 20 | 
 21 | NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()).setName("Newline")
 22 | 
 23 | 
 24 | cli_id = Word(initChars=element_start_chars, bodyChars=element_body_chars)
 25 | 
 26 | positional_name = Word(
 27 |     initChars=element_start_chars, bodyChars=element_body_chars, min=2
 28 | )
 29 | 
 30 | # short_flag = originalTextFor(Literal('-') + Word(alphanums + '@', max=1))
 31 | # """A short flag has only a single dash and single character, e.g. `-m`"""
 32 | # long_flag = originalTextFor(Literal('--') + cli_id)
 33 | # """A long flag has two dashes and any amount of characters, e.g. `--max-count`"""
 34 | any_flag = (
 35 |     originalTextFor("-" + Optional("-") + cli_id).leaveWhitespace().setName("Flag")
 36 | )
 37 | """The flag is the part with the dashes, e.g. `-m` or `--max-count`"""
 38 | 
 39 | flag_arg_sep = (
 40 |     Or([Literal("="), Literal(" ")]).leaveWhitespace().setName("FlagArgSeparator")
 41 | )
 42 | """The term that separates the flag from the arguments, e.g. in `--file=FILE` it's `=`"""
 43 | 
 44 | arg = Word(initChars=element_start_chars, bodyChars=argument_body_chars)
 45 | """A single argument name, e.g. `FILE`"""
 46 | 
 47 | optional_args = Forward().setName("OptionalArg")
 48 | 
 49 | 
 50 | def visit_optional_args(s, lok, toks):
 51 |     if len(toks) == 1:
 52 |         return OptionalFlagArg(names=[toks[0]])
 53 |     else:
 54 |         first, _, sep, second, _ = toks
 55 |         if isinstance(second, str):
 56 |             return OptionalFlagArg(names=[first, second], separator=sep)
 57 |         elif isinstance(second, OptionalFlagArg):
 58 |             return OptionalFlagArg(names=[first] + second.names, separator=sep)
 59 | 
 60 | 
 61 | optional_args <<= (
 62 |     (arg + "[" + "," + (optional_args ^ arg) + "]")
 63 |     .setParseAction(visit_optional_args)
 64 |     .setName("OptionalArgs")
 65 | )
 66 | """
 67 | When the flag has multiple arguments, some of which are optional, e.g.
 68 | -I FLOAT[,FLOAT[,INT[,INT]]]
 69 | """
 70 | 
 71 | # simple_arg = arg.copy().setParseAction(
 72 | #     lambda s, loc, toks: SimpleFlagArg(toks[0]))
 73 | simple_arg = (
 74 |     (
 75 |         Or(
 76 |             [
 77 |                 Word(initChars=element_start_chars, bodyChars=element_body_chars),
 78 |                 # Allow spaces in the argument name, but only if it's enclosed in angle brackets
 79 |                 (
 80 |                     Literal("<").suppress()
 81 |                     + Word(
 82 |                         initChars=element_start_chars, bodyChars=delimited_body_chars
 83 |                     )
 84 |                     + Literal(">").suppress()
 85 |                 ).setName("angle_delimited_arg"),
 86 |             ]
 87 |         )
 88 |     )
 89 |     .leaveWhitespace()
 90 |     .setParseAction(lambda s, loc, toks: SimpleFlagArg(toks[0]))
 91 | ).setName("SimpleArg")
 92 | 
 93 | repeated_segment = (
 94 |     (ZeroOrMore(arg) + Literal(".")[2, 3].suppress() + Optional(arg))
 95 |     .setParseAction(lambda s, loc, toks: RepeatFlagArg(toks[-1] or toks[0]))
 96 |     .setName("RepeatedSegment")
 97 | )  # Either ".." or "..."
 98 | 
 99 | list_type_arg = (
100 |     (
101 |         (arg + repeated_segment)
102 |         ^ (arg + Literal("[").suppress() + repeated_segment + Literal("]").suppress())
103 |     )
104 |     .setParseAction(lambda s, loc, toks: toks[1])
105 |     .setName("repeated_arg")
106 | )
107 | """
108 |     When the argument is an array of values, e.g. when the help says `--samout SAMOUTS [SAMOUTS ...]` or 
109 |     `-i FILE1 FILE2 .. FILEn`
110 | 
111 | """
112 | 
113 | choice_type_arg = (
114 |     nestedExpr(opener="{", closer="}", content=delimitedList(cli_id, delim=","))
115 |     .setParseAction(lambda s, loc, toks: ChoiceFlagArg(set(toks[0])))
116 |     .setName("ChoiceArg")
117 | )
118 | """When the argument is one from a list of values, e.g. when the help says `--format {sam,bam}`"""
119 | 
120 | 
121 | def noop(s, loc, toks):
122 |     return toks
123 | 
124 | 
125 | arg_expression = (
126 |     (
127 |         flag_arg_sep.suppress()
128 |         + (list_type_arg | choice_type_arg | optional_args | simple_arg)
129 |     )
130 |     # .leaveWhitespace()
131 |     .setParseAction(lambda s, loc, toks: toks[0])
132 | )
133 | arg_expression.skipWhitespace = False
134 | """An argument with separator, e.g. `=FILE`"""
135 | 
136 | flag_with_arg = (
137 |     (any_flag + Optional(arg_expression))
138 |     .setParseAction(
139 |         lambda s, loc, toks: (
140 |             FlagSynonym(
141 |                 name=toks[0], argtype=toks[1] if len(toks) > 1 else EmptyFlagArg()
142 |             )
143 |         )
144 |     )
145 |     .setName("FlagWithArg")
146 | )
147 | flag_with_arg.skipWhitespace = True
148 | """e.g. `--max-count=NUM`"""
149 | 
150 | synonym_delim = (
151 |     White() ^ (Optional(White()) + Char(synonym_delim_chars) + Optional(White()))
152 | ).leaveWhitespace()
153 | """
154 | The character used to separate synonyms of a flag. Depending on the help text this might be a comma, pipe or space
155 | """
156 | 
157 | description_sep = White(min=1).suppress()
158 | """
159 | The section that separates a flag from its description. This needs to be broad enough that it will match all different
160 | formats of help outputs but not so broad that every single word starting with a dash will be matched as a flag
161 | """
162 | 
163 | # block_element_prefix = LineStart().leaveWhitespace()
164 | block_element_prefix = (
165 |     ((LineStart().leaveWhitespace() ^ Literal(":")) + White(min=1))
166 |     .setName("block_element_prefix")
167 |     .leaveWhitespace()
168 |     .suppress()
169 | )
170 | """
171 | Each element (e.g. flag) in a list of flags must either start with a colon or nothing
172 | 
173 | e.g. in this example "index" is prefixed by a colon and "mem" is prefixed by a LineStart
174 | 
175 | Command: index         index sequences in the FASTA format
176 |          mem           BWA-MEM algorithm
177 | """
178 | 
179 | flag_synonyms = delimitedList(flag_with_arg, delim=synonym_delim).setName(
180 |     "FlagSynonyms"
181 | )
182 | """
183 | When the help lists multiple synonyms for a flag, e.g:
184 | -n, --lines=NUM
185 | """
186 | 
187 | 
188 | # The description of the flag
189 | # e.g. for grep's `-o, --only-matching`, this is:
190 | # "Print only the matched (non-empty) parts of a matching line, with each such part on a separate output line."
191 | # desc_line = originalTextFor(SkipTo(LineEnd())).setName(
192 | #     "DescriptionLine"
193 | # )  # .setParseAction(success))
194 | # desc_line = originalTextFor(
195 | #     delimitedList(Regex("[^\s]+"), delim=" ", combine=True)
196 | # ).leaveWhitespace()
197 | 
198 | 
199 | def visit_description_line(s, loc, toks):
200 |     return toks[0].strip()
201 | 
202 | 
203 | description_line = (
204 |     SkipTo(LineEnd(), include=True)
205 |     .setParseAction(visit_description_line)
206 |     .setWhitespaceChars(" \t")
207 | ).setName("DescriptionLine")
208 | 


--------------------------------------------------------------------------------
/aclimatise/integration.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import attr
 4 | 
 5 | from aclimatise.flag_parser.parser import CliParser
 6 | from aclimatise.model import Command, Flag
 7 | from aclimatise.usage_parser.parser import UsageParser
 8 | 
 9 | 
10 | def parse_help(cmd: typing.Collection[str], text: str, max_length=1000) -> Command:
11 |     """
12 |     Parse a string of help text into a Command. Use this if you already have run the executable and extracted the
13 |     help text yourself
14 | 
15 |     :param cmd: List of arguments used to generate this help text, e.g. ['bwa', 'mem']
16 |     :param text: The help text to parse
17 |     :param max_length: If the input text has more than this many lines, no attempt will be made to parse the file (as
18 |         it's too large, will likely take a long time, and there's probably an underlying problem if this has happened).
19 |         In this case, an empty Command will be returned
20 |     """
21 |     if len(text.splitlines()) > max_length:
22 |         return Command(list(cmd))
23 | 
24 |     help_command = CliParser().parse_command(name=cmd, cmd=text)
25 |     usage_command = UsageParser().parse_usage(list(cmd), text)
26 | 
27 |     # Combine the two commands by picking from the help_command where possible, otherwise falling back on the usage
28 |     fields = dict(
29 |         help_text=text,
30 |         # Use the help command's positionals preferentially, but fall back to usage
31 |         positional=help_command.positional or usage_command.positional,
32 |         # Combine the flags from both help and usage
33 |         named=list(Flag.combine([help_command.named, usage_command.named])),
34 |     )
35 |     for field in attr.fields(Command):
36 |         fields[field.name] = (
37 |             fields.get(field.name)
38 |             or getattr(help_command, field.name)
39 |             or getattr(usage_command, field.name)
40 |         )
41 | 
42 |     return Command(**fields)
43 | 


--------------------------------------------------------------------------------
/aclimatise/nlp.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | import wordsegment
 3 | 
 4 | 
 5 | @spacy.language.Language.component("prevent_sentence_boundary")
 6 | def prevent_sentence_boundary_detection(doc):
 7 |     for token in doc:
 8 |         token.is_sent_start = False
 9 |     return doc
10 | 
11 | 
12 | try:
13 |     nlp = spacy.load("en_core_web_sm")
14 |     no_sentences = spacy.load("en_core_web_sm")
15 |     no_sentences.add_pipe(
16 |         "prevent_sentence_boundary", name="prevent-sbd", before="parser"
17 |     )
18 | except IOError:
19 |     raise Exception(
20 |         "Spacy model doesn't exist! Install it with `python -m spacy download en`"
21 |     )
22 | 
23 | # We load the spacy and the wordsegment models here as a kind of singleton pattern, to avoid multiple functions loading
24 | # redundant copies
25 | 
26 | if len(wordsegment.WORDS) == 0:
27 |     wordsegment.load()
28 | 
29 | 
30 | def is_sentence(text: str, threshold: float = 0.8) -> bool:
31 |     """
32 |     Returns a bool that indicates if this text is likely a sentence. This should probably be replaced by a machine
33 |     learning classifier in the future
34 |     :param threshold: If the ratio of non-word tokens over word tokens is higher than this, then return False
35 |     """
36 | 
37 |     doc = no_sentences(text)
38 |     sents = list(doc.sents)
39 | 
40 |     if len(sents) == 0:
41 |         return False
42 | 
43 |     sentence = sents[0]
44 |     non_word_count = 0
45 |     word_count = 0
46 |     for tok in sentence:
47 |         pos = tok.pos_
48 |         if pos == "SPACE":
49 |             # Ignore whitespace
50 |             continue
51 | 
52 |         if pos in {"X", "SYM", "PUNCT", "NUM"}:
53 |             non_word_count += 1
54 |         word_count += 1
55 | 
56 |     result = word_count == 0 or non_word_count / word_count < threshold
57 |     return result
58 | 


--------------------------------------------------------------------------------
/aclimatise/parser.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | from pyparsing import *
  4 | 
  5 | 
  6 | class IndentCheckpoint(ParseElementEnhance):
  7 |     """
  8 |     This is a wrapper element that simply rolls back changes in the indent stack whenever the contained element
  9 |     fails to match. This ensures the stack remains accurate
 10 |     """
 11 | 
 12 |     def __init__(self, expr: ParserElement, indent_stack: List[int]):
 13 |         super().__init__(expr)
 14 |         # self.expr = expr
 15 |         self.stack = indent_stack
 16 | 
 17 |     def parseImpl(self, instring, loc, doActions=True):
 18 |         # Backup the stack whenever we reach this element during the parse
 19 |         backup_stack = self.stack[:]
 20 |         try:
 21 |             return self.expr._parse(instring, loc, doActions, callPreParse=False)
 22 |         except ParseException as e:
 23 |             # On a parse failure, reset the stack
 24 |             self.stack[:] = backup_stack
 25 |             raise e
 26 | 
 27 |     def __str__(self):
 28 |         if hasattr(self, "name"):
 29 |             return self.name
 30 | 
 31 |         if self.strRepr is None:
 32 |             self.strRepr = "Indented[" + str(self.expr) + "]"
 33 | 
 34 |         return self.strRepr
 35 | 
 36 | 
 37 | class IndentParserMixin:
 38 |     """
 39 |     A mixin that maintains an indent stack, and utility methods for them
 40 |     """
 41 | 
 42 |     def __init__(self):
 43 |         self.stack = [1]
 44 | 
 45 |     def pop_indent(self):
 46 |         def check_indent(s, l, t):
 47 |             self.stack.pop()
 48 | 
 49 |         return (Empty() + Empty()).setParseAction(check_indent).setName("Pop")
 50 | 
 51 |     def push_indent(self):
 52 |         def check_indent(s, l, t):
 53 |             curCol = col(l, s)
 54 |             self.stack.append(curCol)
 55 | 
 56 |         return (Empty() + Empty()).setParseAction(check_indent).setName("Push")
 57 | 
 58 |     def peer_indent(self, allow_greater=False):
 59 |         """
 60 |         :param allow_greater: Allow greater indent than the previous indentation, but don't add it to the stack
 61 |         """
 62 | 
 63 |         def check_peer_indent(s, l, t):
 64 |             if l >= len(s):
 65 |                 return
 66 |             curCol = col(l, s)
 67 |             if allow_greater and curCol >= self.stack[-1]:
 68 |                 return
 69 |             elif curCol == self.stack[-1]:
 70 |                 return
 71 |             else:
 72 |                 if curCol > self.stack[-1]:
 73 |                     raise ParseException(s, l, "illegal nesting")
 74 |                 raise ParseException(s, l, "not a peer entry")
 75 | 
 76 |         return Empty().setParseAction(check_peer_indent).setName("Peer")
 77 | 
 78 |     def indent(self, update=True):
 79 |         """
 80 |         :param update: If true, update the stack, otherwise simply check for an indent
 81 |         """
 82 | 
 83 |         def check_sub_indent(s, l, t):
 84 |             curCol = col(l, s)
 85 |             if curCol > self.stack[-1]:
 86 |                 if update:
 87 |                     self.stack.append(curCol)
 88 |             else:
 89 |                 raise ParseException(s, l, "not a subentry")
 90 | 
 91 |         return (Empty() + Empty().setParseAction(check_sub_indent)).setName("Indent")
 92 | 
 93 |     def dedent(self, precise=True):
 94 |         def check_dedent(s, l, t):
 95 |             if l >= len(s):
 96 |                 return
 97 |             curCol = col(l, s)
 98 |             if precise and self.stack and curCol not in self.stack:
 99 |                 raise ParseException(s, l, "not an unindent")
100 |             if curCol < self.stack[-1]:
101 |                 self.stack.pop()
102 | 
103 |         return Empty().setParseAction(check_dedent).setName("Unindent")
104 | 
105 | 
106 | __all__ = [IndentCheckpoint, IndentParserMixin]
107 | 


--------------------------------------------------------------------------------
/aclimatise/usage_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/usage_parser/__init__.py


--------------------------------------------------------------------------------
/aclimatise/usage_parser/elements.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from pyparsing import *
  4 | 
  5 | # from aclimatise.flag_parser.elements import cli_id, any_flag, long_flag, short_flag, flag_with
  6 | from aclimatise.flag_parser.elements import (
  7 |     arg,
  8 |     argument_body_chars,
  9 |     delimited_body_chars,
 10 |     element_body_chars,
 11 |     element_start_chars,
 12 |     flag_with_arg,
 13 |     repeated_segment,
 14 | )
 15 | from aclimatise.model import (
 16 |     Command,
 17 |     EmptyFlagArg,
 18 |     Flag,
 19 |     FlagSynonym,
 20 |     Positional,
 21 |     SimpleFlagArg,
 22 | )
 23 | from aclimatise.usage_parser.model import UsageElement
 24 | 
 25 | 
 26 | def delimited_item(open, el, close):
 27 |     def action(s, loc, toks):
 28 |         return toks[1:-1]
 29 | 
 30 |     return (open + el + close).setParseAction(action)
 31 | 
 32 | 
 33 | usage_element = Forward()
 34 | element_char = arg.copy()  # Word(initChars=element_start_chars, bodyChars=)
 35 | 
 36 | mandatory_element = (
 37 |     element_char.copy()
 38 |     .setParseAction(
 39 |         lambda s, loc, toks: UsageElement(
 40 |             text=toks[0],
 41 |         )
 42 |     )
 43 |     .setName("MandatoryElement")
 44 | )
 45 | """
 46 | A mandatory element in the command-line invocation. Might be a variable or a constant
 47 | """
 48 | 
 49 | variable_element = (
 50 |     delimited_item(
 51 |         "<", Word(initChars=element_start_chars, bodyChars=delimited_body_chars), ">"
 52 |     )
 53 |     .setParseAction(lambda s, loc, toks: UsageElement(text=toks[1], variable=True))
 54 |     .setName("VariableElement")
 55 | )
 56 | """
 57 | Any element inside angle brackets is a variable, meaning you are supposed to provide your own value for it.
 58 | However, some usage formats show variables without the angle brackets
 59 | """
 60 | 
 61 | 
 62 | def visit_optional_section(s, loc, toks):
 63 |     inner = toks[1:-1]
 64 |     for tok in inner:
 65 |         tok.optional = True
 66 |     return inner
 67 | 
 68 | 
 69 | optional_section = (
 70 |     delimited_item("[", OneOrMore(usage_element), "]")
 71 |     .setParseAction(visit_optional_section)
 72 |     .setName("OptionalSection")
 73 | )
 74 | """
 75 | Anything can be nested within square brackets, which indicates that everything there is optional
 76 | """
 77 | 
 78 | # flag_arg = Or([
 79 | #     variable_element,
 80 | #     element_char
 81 | # ])
 82 | """
 83 | The argument after a flag, e.g. in "-b <bamlist.fofn>" this would be everything after "-b"
 84 | """
 85 | 
 86 | # short_flag_name = Char(alphas)
 87 | """
 88 | The single character for a short flag, e.g. "n" for a "-n" flag
 89 | """
 90 | 
 91 | # short_flag = (
 92 | #         '-' + short_flag_name + White() + Optional(flag_arg)
 93 | # ).setParseAction(
 94 | #     lambda s, loc, toks:
 95 | #     Flag.from_synonyms([FlagSynonym(
 96 | #         name=toks[0] + toks[1],
 97 | #         argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg()
 98 | #     )], description=None)
 99 | # )
100 | """
101 | The usage can contain a flag with its argument
102 | """
103 | 
104 | # long_flag = (
105 | #         '--' + element_char + White() + Optional(flag_arg)
106 | # ).setParseAction(lambda s, loc, toks: Flag.from_synonyms([FlagSynonym(
107 | #     name=toks[1],
108 | #     argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg()
109 | # )]))
110 | """
111 | The usage can contain a flag with its argument
112 | """
113 | 
114 | 
115 | def visit_short_flag_list(s, loc, toks):
116 |     return [
117 |         Flag.from_synonyms(
118 |             [FlagSynonym(name="-" + flag, argtype=EmptyFlagArg())], description=None
119 |         )
120 |         for flag in toks[1:]
121 |     ]
122 | 
123 | 
124 | # short_flag_list = ('-' + short_flag_name + OneOrMore(short_flag_name)).setParseAction(
125 | #     visit_short_flag_list).leaveWhitespace()
126 | """
127 | Used to illustrate where a list of short flags could be used, e.g. -nurlf indicates -n or -u etc
128 | """
129 | 
130 | 
131 | def visit_list_element(s, loc, toks):
132 |     # Pick the last element if there is one, otherwise use the first element
133 |     # This gives us a better name like 'inN.bam' instead of 'in2.bam'
134 |     els = [tok for tok in toks if isinstance(tok, (UsageElement, Flag))]
135 |     for el in els:
136 |         el.repeatable = True
137 |     return els[-1]
138 | 
139 | 
140 | options_placeholder = (
141 |     Regex("options?", flags=re.IGNORECASE).suppress().setName("OptionsPlaceholder")
142 | )
143 | 
144 | list_element = (
145 |     (
146 |         OneOrMore(options_placeholder ^ mandatory_element ^ variable_element)
147 |         + Literal(".")[2, 3]
148 |         + Optional(options_placeholder ^ mandatory_element ^ variable_element)
149 |     )
150 |     .setParseAction(visit_list_element)
151 |     .setName("list_element")
152 | )
153 | """
154 | When one or more arguments are allowed, e.g. "<in2.bam> ... <inN.bam>"
155 | """
156 | 
157 | usage_flag = (
158 |     And([flag_with_arg])
159 |     .setParseAction(lambda s, loc, toks: Flag.from_synonyms(toks, description=""))
160 |     .setName("usage_flag")
161 | )
162 | 
163 | 
164 | usage_element <<= Or(
165 |     [
166 |         optional_section,
167 |         list_element,
168 |         # short_flag_list,
169 |         usage_flag,
170 |         variable_element,
171 |         options_placeholder,
172 |         mandatory_element,
173 |     ]
174 | ).setName("usage_element")
175 | 
176 | stack = [1]
177 | 
178 | 
179 | def visit_usage(s, loc, toks):
180 |     # Fix up stack inconsistencies
181 |     while len(stack) > 1:
182 |         stack.pop()
183 | 
184 |     return toks[0][0]
185 | 
186 | 
187 | usage_example = OneOrMore(usage_element, stopOn=LineEnd())
188 | """
189 | Each usage example is a single line of text, e.g. 
190 | 
191 |   shell [options] -e string
192 | """
193 | 
194 | usage = (
195 |     LineStart()
196 |     + Regex("usage:", flags=re.IGNORECASE).suppress()
197 |     + OneOrMore(usage_example)
198 | )  # .setParseAction(visit_usage).setDebug()
199 | """
200 | Each usage block can have one or more lines of different usage. e.g.
201 | 
202 | Usage:
203 |   shell [options] -e string
204 |     execute string in V8
205 |   shell [options] file1 file2 ... filek
206 |     run JavaScript scripts in file1, file2, ..., filek
207 | """
208 | 
209 | 
210 | # usage = Regex('usage:', flags=re.IGNORECASE).suppress() + delimitedList(usage_element, delim=Or([' ', '\n']))
211 | # indentedBlock(
212 | #     delimitedList(usage_element, delim=' '),
213 | #     indentStack=stack,
214 | #     indent=True
215 | # )
216 | 


--------------------------------------------------------------------------------
/aclimatise/usage_parser/model.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import attr
 4 | 
 5 | from aclimatise import model
 6 | from aclimatise.yaml import AttrYamlMixin
 7 | 
 8 | 
 9 | @attr.s(auto_attribs=True)
10 | class UsageElement(AttrYamlMixin):
11 |     text: str
12 |     """
13 |     The name of this element, as defined in the usage section
14 |     """
15 | 
16 |     optional: bool = False
17 |     """
18 |     Whether or not this element is required
19 |     """
20 | 
21 |     variable: bool = False
22 |     """
23 |     True if this is a variable, ie you are supposed to replace this text with your own, False if this is a constant
24 |     that you shouldn't change, e.g. the name of the application
25 |     """
26 | 
27 |     # flag: bool = False
28 |     """
29 |     True if this is a flag (starts with dashes) and not a regular element
30 |     """
31 | 
32 |     repeatable: bool = False
33 |     """
34 |     If this flag/argument can be used multiple times
35 |     """
36 | 
37 | 
38 | @attr.s(auto_attribs=True)
39 | class UsageInstance(AttrYamlMixin):
40 |     items: List[UsageElement]
41 |     """
42 |     The string of elements that make up a valid command invocation
43 |     """
44 | 
45 |     description: Optional[str] = None
46 |     """
47 |     Description of this invocation
48 |     """
49 | 
50 |     @property
51 |     def positionals(self) -> List["model.Positional"]:
52 |         """
53 |         Return all the positional arguments that could be derived from this instance
54 |         """
55 |         return [
56 |             model.Positional(
57 |                 description="", position=i, name=el.text, optional=el.optional
58 |             )
59 |             for i, el in enumerate(self.items)
60 |             if isinstance(el, UsageElement)
61 |         ]
62 | 
63 |     @property
64 |     def flags(self) -> List["model.Flag"]:
65 |         """
66 |         Return all the flags that could be derived from this instance
67 |         """
68 |         return [el for el in self.items if isinstance(el, model.Flag)]
69 | 


--------------------------------------------------------------------------------
/aclimatise/usage_parser/parser.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List
  3 | 
  4 | from pyparsing import *
  5 | 
  6 | from aclimatise.flag_parser.elements import description_line
  7 | from aclimatise.parser import IndentCheckpoint, IndentParserMixin
  8 | from aclimatise.usage_parser.elements import usage_example
  9 | from aclimatise.usage_parser.model import UsageElement, UsageInstance
 10 | 
 11 | from .elements import *
 12 | 
 13 | 
 14 | def normalise_cline(tokens):
 15 |     """
 16 |     Normalise a command line string, such as ["dotnet", "Pisces.dll"], converting it to ["dotnet", "pisces"]
 17 |     :param tokens:
 18 |     :return:
 19 |     """
 20 |     return [Path(el.lower()).stem for el in tokens]
 21 | 
 22 | 
 23 | class UsageParser(IndentParserMixin):
 24 |     def __init__(self):
 25 |         super().__init__()
 26 | 
 27 |         def visit_description_block(s, loc, toks):
 28 |             return "\n".join(toks)
 29 | 
 30 |         self.description_block = IndentCheckpoint(
 31 |             self.indent()
 32 |             + (self.peer_indent(allow_greater=True) + description_line)[1, ...]
 33 |             + self.dedent(precise=False),
 34 |             indent_stack=self.stack,
 35 |         ).setParseAction(visit_description_block)
 36 | 
 37 |         def visit_single_usage(s, loc, toks):
 38 |             return [UsageInstance(items=list(toks), description=None)]
 39 | 
 40 |         self.single_usage = usage_example.copy().setParseAction(visit_single_usage)
 41 | 
 42 |         def visit_described_usage(s, loc, toks):
 43 |             if len(toks) > 0 and isinstance(toks[-1], str):
 44 |                 description = toks[-1]
 45 |             else:
 46 |                 description = None
 47 | 
 48 |             return UsageInstance(items=list(toks[:-1]), description=description)
 49 | 
 50 |         self.described_usage = (
 51 |             usage_example + Optional(self.description_block)
 52 |         ).setParseAction(visit_described_usage)
 53 | 
 54 |         def visit_multi_usage(s, loc, toks):
 55 |             return list(toks)
 56 | 
 57 |         self.multi_usage = (
 58 |             LineEnd().suppress()
 59 |             + (
 60 |                 IndentCheckpoint(
 61 |                     # This indent ensures that every usage example is somewhat indented (more than column 1, at least),
 62 |                     # and also sets the baseline from which the description block is measured
 63 |                     self.indent() + self.described_usage
 64 |                     # The pop here doesn't check that we have dedented, but rather it just resets the indentation so that
 65 |                     # a new usage block can have a different indentation
 66 |                     + self.pop_indent(),
 67 |                     indent_stack=self.stack,
 68 |                 )
 69 |             )[1, ...]
 70 |         ).setParseAction(visit_multi_usage)
 71 | 
 72 |         self.usage = (
 73 |             LineStart()
 74 |             + Regex("usage:", flags=re.IGNORECASE).suppress()
 75 |             + Optional(self.multi_usage | self.single_usage)
 76 |         ).setWhitespaceChars(
 77 |             "\t "
 78 |         )  # .setParseAction(visit_usage).setDebug()
 79 | 
 80 |     def parse_usage(self, cmd: List[str], usage: str, debug: bool = False) -> Command:
 81 |         # return self.usage.searchString(usage)
 82 |         usage_blocks = self.usage.setDebug(debug).searchString(usage)
 83 |         if not usage_blocks:
 84 |             # If we had no results, return an empty command
 85 |             return Command(command=cmd)
 86 | 
 87 |         instances = []
 88 |         all_positionals = []
 89 |         all_flags = []
 90 |         for block in usage_blocks:
 91 |             for instance in block:
 92 | 
 93 |                 positional = [
 94 |                     tok for tok in instance.items if isinstance(tok, UsageElement)
 95 |                 ]
 96 |                 flags = [tok for tok in instance.items if isinstance(tok, Flag)]
 97 | 
 98 |                 # Remove an "options" argument which is just a proxy for other flags
 99 |                 # positional = [pos for pos in positional if pos.text.lower() != "options"]
100 |                 # The usage often starts with a re-iteration of the command name itself. Remove this if present
101 |                 for i in range(len(positional)):
102 |                     # For each positional argument, if the entire cmd string is present, slice away this and everything before it
103 |                     end = i + len(cmd)
104 |                     if end <= len(positional) and normalise_cline(
105 |                         [pos.text for pos in positional[i:end]]
106 |                     ) == normalise_cline(cmd):
107 |                         positional = positional[end:]
108 | 
109 |                 if not any([tok for tok in positional if tok.variable]):
110 |                     # If the usage didn't explicitly mark anything as a variable using < > brackets, we have to assume that
111 |                     # everything other than flags are positional elements
112 |                     for element in positional:
113 |                         element.variable = True
114 | 
115 |                 instances.append(instance)
116 |                 # Convert these UsageElements into Positionals
117 |                 all_positionals += [
118 |                     Positional(
119 |                         description="", position=i, name=el.text, optional=el.optional
120 |                     )
121 |                     for i, el in enumerate(positional)
122 |                 ]
123 |                 all_flags += flags
124 | 
125 |         return Command(
126 |             command=cmd,
127 |             positional=Positional.deduplicate(all_positionals),
128 |             named=Flag.deduplicate(all_flags),
129 |         )
130 | 


--------------------------------------------------------------------------------
/aclimatise/yaml.py:
--------------------------------------------------------------------------------
 1 | from ruamel.yaml import YAML, yaml_object
 2 | from ruamel.yaml.comments import CommentedMap
 3 | 
 4 | yaml = YAML()
 5 | 
 6 | 
 7 | class AttrYamlMixin:
 8 |     @classmethod
 9 |     def from_yaml(cls, constructor, node):
10 |         state = CommentedMap()
11 |         constructor.construct_mapping(node, state)
12 |         return cls(**state)
13 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/docs/_static/railroad.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/docs/_static/railroad.html


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | Python API
2 | ==========
3 | .. automodule:: aclimatise
4 |    :members: parse_help, best_cmd, explore_command, execute_cmd
5 |    :undoc-members:
6 |    :show-inheritance:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
1 | Command Line Interface
2 | ======================
3 | 
4 | .. click:: aclimatise.cli:main
5 |    :prog: aclimatise
6 |    :show-nested:
7 |    :commands: explore, pipe
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = "aCLImatise"
21 | copyright = "2020, Michael Milton"
22 | author = "Michael Milton"
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = "0.0.16"
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     "sphinx.ext.autodoc",
35 |     "sphinx_click.ext",
36 | ]
37 | 
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ["_templates"]
40 | 
41 | # List of patterns, relative to source directory, that match files and
42 | # directories to ignore when looking for source files.
43 | # This pattern also affects html_static_path and html_extra_path.
44 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
45 | 
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = "alabaster"
53 | 
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ["_static"]
58 | 


--------------------------------------------------------------------------------
/docs/grammar.rst:
--------------------------------------------------------------------------------
 1 | Grammar
 2 | =======
 3 | 
 4 | Internally, aCLImatise uses a `Parsing Expression Grammar <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_,
 5 | which is a class of recursive grammar used to parse programming languages. This grammar is expressed and parsed using
 6 | the `PyParsing <https://github.com/pyparsing/pyparsing>`_ Python library. To help visualise the grammar used to parse
 7 | command-line help, here is a `Railroad Diagram <https://en.wikipedia.org/wiki/Syntax_diagram>`_ generated using
 8 | PyParsing.
 9 | 
10 | The "terminal" nodes (circular) are either:
11 | 
12 | * In quotes, e.g. ``':'``, which indicates a literal string
13 | * In the form ``W:(start, body)``, e.g. ``W:(0-9@-Za-z, \--9@-Z\\_a-z|)``, which indicates a word where the first character comes from the ``start`` list of characters, and the remaining characters come from the ``body`` characters
14 | * In the form ``Re: pattern``, which indicates a regular expression pattern used to match this terminal
15 | * Whitespace nodes, e.g. ``<SP><TAB><CR><LF>``, which list the types of whitespace being parsed by that terminal
16 | * Certain other special nodes like ``Empty``, and ``LineStart`` which match based on custom code. Where possible, these are annotated with what they are designed to match, for example ``UnIndent`` matches an unindent in the input file.
17 | 
18 | The "non-terminal" nodes (square) refer to subsections of the diagram, which are spelled-out under the subheading with
19 | the same name.
20 | 
21 | To read the diagram, start with ``FlagList``, the start node, and from there follow the lines along any branch of the path that goes forward (although some paths end up turning backwards to indicate loops). Any string that matches the sequence of tokens you encounter along that path will be parsed by the grammar.
22 | 
23 | .. raw:: html
24 |    :file: _static/railroad.html
25 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. aCLImatise documentation master file, created by
 2 |    sphinx-quickstart on Mon May 11 16:46:56 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to aCLImatise's documentation!
 7 | ======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 |    :caption: Contents:
12 | 
13 |    installation
14 |    api
15 |    cli
16 |    model
17 |    grammar
18 |    changes
19 |    aCLImatise Base Camp <https://aclimatise.github.io/BaseCamp/>
20 | 
21 | .. include:: ../README.rst
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | To install ``aCLImatise``, run:
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     pip install aclimatise
 9 |     python -m spacy download en  # Install an internal language model
10 | 
11 | Now you can use either the :doc:`Python API <api>` or the :doc:`CLI <cli>`.
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/model.rst:
--------------------------------------------------------------------------------
 1 | Data Model
 2 | ==========
 3 | 
 4 | Command
 5 | -------
 6 | .. autoclass:: aclimatise.model.Command
 7 |     :members:
 8 | 
 9 | Command Inputs
10 | --------------
11 | .. autoclass:: aclimatise.model.CliArgument
12 |     :members:
13 | .. autoclass:: aclimatise.model.Positional
14 |     :members:
15 | .. autoclass:: aclimatise.model.Flag
16 |     :members:
17 | .. autoclass:: aclimatise.model.FlagSynonym
18 |     :members:
19 | 
20 | Flag Arguments
21 | --------------
22 | .. autoclass:: aclimatise.model.FlagArg
23 |     :members:
24 | .. autoclass:: aclimatise.model.EmptyFlagArg
25 |     :members:
26 | .. autoclass:: aclimatise.model.OptionalFlagArg
27 |     :members:
28 | .. autoclass:: aclimatise.model.SimpleFlagArg
29 |     :members:
30 | .. autoclass:: aclimatise.model.RepeatFlagArg
31 |     :members:
32 | .. autoclass:: aclimatise.model.ChoiceFlagArg
33 |     :members:
34 | 
35 | Argument Types
36 | --------------
37 | .. automodule:: aclimatise.cli_types
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: aclimatise-test
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - python>=3.7.5
 7 |   - bwa==0.7.17
 8 |   - samtools=1.9
 9 |   - bedtools==2.26.0
10 |   - htseq==0.12.4
11 |   - dinosaur==1.1.3
12 |   - pisces==5.2.9.122
13 |   - genomethreader==1.7.1
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = aclimatise
 3 | version = 3.0.1
 4 | description = aCLImatise is a Python library and command-line utility for parsing the help output of a command-line tool and then outputting a description of the tool in a more structured format
 5 | long_description = file: README.rst
 6 | long_description_content_type: text/x-rst
 7 | license = GPLv3
 8 | classifiers =
 9 |     License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10 |     Programming Language :: Python :: 3
11 |     Programming Language :: Python :: 3.7
12 |     Programming Language :: Python :: 3.8
13 |     Intended Audience :: Developers
14 |     Natural Language :: English
15 | 
16 | [tool:pytest]
17 | log_level = INFO
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from setuptools import find_packages, setup
 3 | 
 4 | setup(
 5 |     packages=find_packages(exclude="test"),
 6 |     install_requires=[
 7 |         "pyparsing",
 8 |         "jinja2",
 9 |         "spacy~=3.0",
10 |         "miniwdl",
11 |         "wordsegment",
12 |         "inflection",
13 |         "illusional.wdlgen==0.3.0",
14 |         "ruamel.yaml==0.16.5",
15 |         "click",
16 |         "cwltool",
17 |         "cwl-utils>=0.4",
18 |         "regex",
19 |         "num2words",
20 |         "word2number",
21 |         "psutil",
22 |         "deprecated",
23 |         "attrs",
24 |         "janis-pipelines.core>=0.11.2",
25 |     ],
26 |     python_requires=">=3.6",
27 |     entry_points={"console_scripts": ["aclimatise = aclimatise.cli:main"]},
28 |     extras_require={
29 |         "dev": [
30 |             "pytest",
31 |             "pre-commit",
32 |             "Sphinx",
33 |             "sphinx-click",
34 |             "pytest-timeout",
35 |             "docker",
36 |         ],
37 |     },
38 | )
39 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/__init__.py


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | import pytest
  4 | from pkg_resources import resource_filename
  5 | 
  6 | from aclimatise.converter.yml import YmlGenerator
  7 | from aclimatise.execution.local import LocalExecutor
  8 | from aclimatise.flag_parser.parser import CliParser
  9 | from aclimatise.usage_parser.parser import UsageParser
 10 | from aclimatise.yaml import yaml
 11 | 
 12 | 
 13 | @pytest.fixture()
 14 | def usage_parser():
 15 |     return UsageParser()
 16 | 
 17 | 
 18 | @pytest.fixture()
 19 | def local_executor():
 20 |     return LocalExecutor()
 21 | 
 22 | 
 23 | @pytest.fixture()
 24 | def yaml_converter():
 25 |     return YmlGenerator()
 26 | 
 27 | 
 28 | @pytest.fixture()
 29 | def bedtools_cmd():
 30 |     with open(resource_filename(__name__, "test_data/bedtools/bedtools.yml")) as fp:
 31 |         return yaml.load(fp)
 32 | 
 33 | 
 34 | @pytest.fixture()
 35 | def samtools_cmd():
 36 |     with open(resource_filename(__name__, "test_data/samtools/samtools.yml")) as fp:
 37 |         return yaml.load(fp)
 38 | 
 39 | 
 40 | @pytest.fixture
 41 | def samtools_help():
 42 |     with open(resource_filename(__name__, "test_data/samtools.txt")) as fp:
 43 |         return fp.read()
 44 | 
 45 | 
 46 | @pytest.fixture
 47 | def htseq_help():
 48 |     with open(resource_filename(__name__, "test_data/htseq_count.txt")) as fp:
 49 |         return fp.read()
 50 | 
 51 | 
 52 | @pytest.fixture
 53 | def bwamem_help():
 54 |     with open(resource_filename(__name__, "test_data/bwa_mem.txt")) as fp:
 55 |         return fp.read()
 56 | 
 57 | 
 58 | @pytest.fixture
 59 | def pisces_help():
 60 |     with open(resource_filename(__name__, "test_data/pisces.txt")) as fp:
 61 |         return fp.read()
 62 | 
 63 | 
 64 | @pytest.fixture
 65 | def bwa_help():
 66 |     with open(resource_filename(__name__, "test_data/bwa.txt")) as fp:
 67 |         return fp.read()
 68 | 
 69 | 
 70 | @pytest.fixture
 71 | def bwa_bwt2sa_help():
 72 |     with open(resource_filename(__name__, "test_data/bwa_bwt2sa.txt")) as fp:
 73 |         return fp.read()
 74 | 
 75 | 
 76 | @pytest.fixture
 77 | def bedtools_help():
 78 |     with open(resource_filename(__name__, "test_data/bedtools.txt")) as fp:
 79 |         return fp.read()
 80 | 
 81 | 
 82 | @pytest.fixture
 83 | def bedtools_coverage_help():
 84 |     with open(resource_filename(__name__, "test_data/bedtools_coverage.txt")) as fp:
 85 |         return fp.read()
 86 | 
 87 | 
 88 | @pytest.fixture
 89 | def podchecker_help():
 90 |     with open(resource_filename(__name__, "test_data/podchecker.txt")) as fp:
 91 |         return fp.read()
 92 | 
 93 | 
 94 | @pytest.fixture()
 95 | def process():
 96 |     def process_help_section(help):
 97 |         """
 98 |         Does some preprocessing on a help text segment to facilitate testing
 99 |         """
100 |         help = help.strip("\n")
101 |         return dedent(help)
102 | 
103 |     return process_help_section
104 | 


--------------------------------------------------------------------------------
/test/executors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/executors/__init__.py


--------------------------------------------------------------------------------
/test/executors/test_docker.py:
--------------------------------------------------------------------------------
 1 | import docker
 2 | import pytest
 3 | 
 4 | from aclimatise.execution.docker import DockerExecutor
 5 | 
 6 | 
 7 | @pytest.mark.timeout(360)
 8 | def test_docker_image_saved(bwamem_help):
 9 |     client = docker.from_env()
10 |     container = client.containers.run(
11 |         "biocontainers/bwa:v0.7.17_cv1",
12 |         entrypoint=["sleep", "999999999"],
13 |         detach=True,
14 |     )
15 | 
16 |     exec = DockerExecutor(container)
17 |     cmd = exec.convert(["bwa", "mem"])
18 |     assert cmd.docker_image == "biocontainers/bwa:v0.7.17_cv1"
19 | 
20 | 
21 | def test_docker(bwamem_help):
22 |     client = docker.from_env()
23 |     container = client.containers.run(
24 |         "biocontainers/bwa:v0.7.17_cv1",
25 |         entrypoint=["sleep", "999999999"],
26 |         detach=True,
27 |     )
28 | 
29 |     exec = DockerExecutor(container)
30 |     output = exec.execute(["bwa", "mem"])
31 |     assert output == bwamem_help
32 |     container.kill()
33 | 
34 | 
35 | @pytest.mark.timeout(360)
36 | def test_docker_kill():
37 |     """
38 |     Test that the DockerExecutor can kill the command if it times out
39 |     """
40 |     client = docker.from_env(timeout=99999)
41 |     container = client.containers.run(
42 |         "ubuntu:latest",
43 |         entrypoint=["sleep", "999999999"],
44 |         detach=True,
45 |     )
46 | 
47 |     exec = DockerExecutor(container)
48 |     output = exec.execute(["sleep", "999999"])
49 |     container.kill()
50 |     assert output == ""
51 | 
52 | 
53 | def test_no_output():
54 |     # Check that it doesn't crash when no output is received
55 | 
56 |     client = docker.from_env()
57 |     container = client.containers.run(
58 |         "quay.io/biocontainers/gadem:1.3.1--h516909a_2",
59 |         entrypoint=["sleep", "9999999"],
60 |         detach=True,
61 |     )
62 |     exec = DockerExecutor(container)
63 |     output = exec.execute(["gadem"])
64 |     container.kill()
65 |     assert output is not None
66 | 
67 | 
68 | @pytest.mark.timeout(360)
69 | def test_infinite_output():
70 |     """
71 |     Test that the DockerExecutor can kill the command if it's constantly producing output
72 |     """
73 |     client = docker.from_env(timeout=99999)
74 |     container = client.containers.run(
75 |         "ubuntu:latest",
76 |         entrypoint=["sleep", "999999999"],
77 |         detach=True,
78 |     )
79 | 
80 |     exec = DockerExecutor(container)
81 |     output = exec.execute(["yes"])
82 |     container.kill()
83 |     assert output.startswith("y")
84 | 


--------------------------------------------------------------------------------
/test/executors/test_local.py:
--------------------------------------------------------------------------------
 1 | from aclimatise.execution.local import LocalExecutor
 2 | 
 3 | from ..util import skip_not_installed
 4 | 
 5 | 
 6 | @skip_not_installed("bwa")
 7 | def test_local(bwamem_help):
 8 |     exec = LocalExecutor()
 9 |     output = exec.execute(["bwa", "mem"])
10 |     assert output == bwamem_help
11 | 


--------------------------------------------------------------------------------
/test/executors/test_man.py:
--------------------------------------------------------------------------------
 1 | from test.util import skip_not_installed
 2 | 
 3 | from aclimatise.execution.man import ManPageExecutor
 4 | 
 5 | 
 6 | @skip_not_installed("git")
 7 | @skip_not_installed("man")
 8 | def test_git():
 9 |     cmd = ManPageExecutor(max_length=99999).explore(
10 |         ["git"],
11 |     )
12 |     assert len(cmd.positional) > 20
13 | 
14 | 
15 | @skip_not_installed("git")
16 | @skip_not_installed("ls")
17 | def test_ls():
18 |     cmd = ManPageExecutor().explore(
19 |         ["ls"],
20 |     )
21 |     assert {"-A", "--almost-all", "-1", "--context"} <= cmd.all_synonyms
22 | 


--------------------------------------------------------------------------------
/test/flags/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | from aclimatise.flag_parser.parser import CliParser
4 | 
5 | 
6 | @pytest.fixture
7 | def parser():
8 |     return CliParser()
9 | 


--------------------------------------------------------------------------------
/test/flags/test_bedtools.py:
--------------------------------------------------------------------------------
 1 | def test_bedtools_block(parser, process):
 2 |     txt = """
 3 | [ Multi-way file comparisons ]
 4 |     multiinter    Identifies common intervals among multiple interval files.
 5 |     unionbedg     Combines coverage intervals from multiple BEDGRAPH files.
 6 | 
 7 | [ Paired-end manipulation ]
 8 |     """
 9 |     blocks = parser.flags.searchString(txt)
10 |     assert len(blocks) == 1, "This comprises only one block of flags"
11 |     assert len(blocks[0]) == 2, "The single block contains 2 positional arguments"
12 | 
13 | 
14 | def test_bedtools_root(parser, bedtools_help):
15 |     command = parser.parse_command(bedtools_help, ["bedtools"])
16 |     assert len(command.named) == 1
17 |     assert len(command.positional) == 43
18 | 


--------------------------------------------------------------------------------
/test/flags/test_bwa.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | from textwrap import dedent
  3 | 
  4 | import pytest
  5 | 
  6 | from aclimatise.flag_parser import elements
  7 | from aclimatise.integration import parse_help
  8 | from aclimatise.model import Flag, FlagSynonym, OptionalFlagArg
  9 | 
 10 | 
 11 | def test_flag_arg(parser):
 12 |     result = elements.flag_with_arg.parseString("-A INT")[0]
 13 |     assert isinstance(result, FlagSynonym)
 14 |     assert result.argtype.name == "INT"
 15 |     assert result.name == "-A"
 16 | 
 17 | 
 18 | def test_flag(parser):
 19 |     result = parser.flag.parseString(
 20 |         "-A INT        score for a sequence match, which scales options -TdBOELU unless overridden [1]"
 21 |     )[0]
 22 |     assert isinstance(result, Flag)
 23 |     assert result.synonyms[0] == "-A"
 24 |     assert result.args.name == "INT"
 25 | 
 26 | 
 27 | def test_flag_b(parser):
 28 |     result = parser.flag.parseString("-B INT        penalty for a mismatch [4]")
 29 |     print(result)
 30 | 
 31 | 
 32 | def test_multiarg_flag(parser):
 33 |     result = parser.flag.parseString(
 34 |         "-O INT[,INT]  gap open penalties for deletions and insertions [6,6]"
 35 |     )[0]
 36 |     assert isinstance(result, Flag)
 37 | 
 38 | 
 39 | def test_flags(parser):
 40 |     result = parser.flags.parseString(
 41 |         """
 42 |        -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [1]
 43 |        -B INT        penalty for a mismatch [4]
 44 |     """,
 45 |         parseAll=True,
 46 |     )
 47 | 
 48 | 
 49 | def test_bwa_segmented_options(parser):
 50 |     result = parser.flag_block.parseString(
 51 |         """
 52 |        -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [1]
 53 |        -B INT        penalty for a mismatch [4]
 54 |        -O INT[,INT]  gap open penalties for deletions and insertions [6,6]
 55 |        -E INT[,INT]  gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
 56 |        -L INT[,INT]  penalty for 5'- and 3'-end clipping [5,5]
 57 |        -U INT        penalty for an unpaired read pair [17]
 58 | 
 59 |        -x STR        read type. Setting -x changes multiple parameters unless overriden [null]
 60 |                      pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0  (PacBio reads to ref)
 61 |                      ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0  (Oxford Nanopore 2D-reads to ref)
 62 |                      intractg: -B9 -O16 -L5  (intra-species contigs to ref)
 63 |     """,
 64 |         parseAll=True,
 65 |     )
 66 |     assert len(result) == 7
 67 | 
 68 | 
 69 | def test_bwa_help_part(parser):
 70 |     results = list(
 71 |         parser.flags.scanString(
 72 |             """
 73 | Algorithm options:
 74 | 
 75 |        -t INT        number of threads [1]
 76 |        -k INT        minimum seed length [19]
 77 |        -w INT        band width for banded alignment [100]
 78 |        -d INT        off-diagonal X-dropoff [100]
 79 |        -r FLOAT      look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
 80 |        -y INT        seed occurrence for the 3rd round seeding [20]
 81 |        -c INT        skip seeds with more than INT occurrences [500]
 82 |        -D FLOAT      drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
 83 |        -W INT        discard a chain if seeded bases shorter than INT [0]
 84 |        -m INT        perform at most INT rounds of mate rescues for each read [50]
 85 |        -S            skip mate rescue
 86 |        -P            skip pairing; mate rescue performed unless -S also in use
 87 |     """
 88 |         )
 89 |     )
 90 |     assert len(results) == 1
 91 | 
 92 |     for tokens, start, end in results:
 93 |         assert len(tokens) == 12
 94 | 
 95 | 
 96 | def test_bwa_multisection(parser):
 97 |     s = """
 98 | Scoring options:
 99 | 
100 |        -x STR        read type. Setting -x changes multiple parameters unless overriden [null]
101 |                      pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0  (PacBio reads to ref)
102 |                      ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0  (Oxford Nanopore 2D-reads to ref)
103 |                      intractg: -B9 -O16 -L5  (intra-species contigs to ref)
104 | 
105 | Input/output options:
106 | 
107 |        -p            smart pairing (ignoring in2.fq)
108 |     """
109 |     result_lists = list(parser.flags.scanString(s))
110 |     assert len(result_lists) == 2
111 |     for result_list, b, c in result_lists:
112 |         assert len(result_list) == 1
113 | 
114 | 
115 | def test_complex_optionals(parser):
116 |     s = """
117 |        -I FLOAT[,FLOAT[,INT[,INT]]]
118 |                      specify the mean, standard deviation (10% of the mean if absent), max
119 |                      (4 sigma from the mean if absent) and min of the insert size distribution.
120 |                      FR orientation only. [inferred]
121 |     """
122 |     results = list(parser.flag_block.parseString(s))[0]
123 |     assert isinstance(results, Flag)
124 |     assert isinstance(results.args, OptionalFlagArg)
125 |     assert results.args.names == ["FLOAT", "FLOAT", "INT", "INT"]
126 | 
127 | 
128 | def test_bwa_skipping(parser):
129 |     s = """
130 | Input/output options:
131 | 
132 |        -p            smart pairing (ignoring in2.fq)
133 |        -R STR        read group header line such as '@RG\tID:foo\tSM:bar' [null]
134 |        -H STR/FILE   insert STR to header if it starts with @; or insert lines in FILE [null]
135 |        -o FILE       sam file to output results to [stdout]
136 |        -j            treat ALT contigs as part of the primary assembly (i.e. ignore <idxbase>.alt file)
137 |        -5            for split alignment, take the alignment with the smallest coordinate as primary
138 |      """
139 |     cmd = parser.parse_command(cmd=s, name=["bwa", "mem"])
140 |     assert len(cmd.named) == 6
141 | 
142 | 
143 | def test_bwa_root(bwa_help):
144 |     command = parse_help(["bwa"], bwa_help)
145 |     assert len(command.named) == 0
146 |     assert len(command.positional) == 14
147 |     assert command.positional[0].name == "index"
148 |     assert command.positional[-1].name == "bwt2sa"
149 | 
150 | 
151 | def test_bwa(parser, bwamem_help):
152 |     # Parse help
153 |     command = parse_help(["bwa", "mem"], text=bwamem_help)
154 | 
155 |     assert len(command.named) == 36
156 |     assert len(command.positional) == 3
157 | 


--------------------------------------------------------------------------------
/test/flags/test_bwakit.py:
--------------------------------------------------------------------------------
 1 | def test_single_flag(parser):
 2 | 
 3 |     txt = """
 4 |     --use_strict (enforce strict mode)
 5 |           type: bool  default: false
 6 |       """
 7 | 
 8 |     result = parser.flag_block.parseString(txt)[0]
 9 |     assert "type: bool" in result.description
10 | 
11 | 
12 | def test_multiple_flags(parser):
13 | 
14 |     txt = """
15 |     --use_strict (enforce strict mode)
16 |           type: bool  default: false
17 |     --es5_readonly (activate correct semantics for inheriting readonliness)
18 |           type: bool  default: true
19 |       """
20 | 
21 |     result = parser.flag_block.setDebug().parseString(txt)
22 |     assert len(result) == 2
23 | 


--------------------------------------------------------------------------------
/test/flags/test_gth.py:
--------------------------------------------------------------------------------
 1 | from aclimatise.model import Flag
 2 | 
 3 | 
 4 | def test_unindented_flags(parser):
 5 |     """
 6 |     Verify that we can parse blocks of flags that aren't intended (which is unusual)
 7 |     """
 8 | 
 9 |     text = """
10 | -genomic          specify input files containing genomic sequences
11 |                   mandatory option
12 | -cdna             specify input files containing cDNA/EST sequences
13 | -protein          specify input files containing protein sequences
14 |     """.strip()
15 |     flags = parser.flags.parseString(text)
16 |     assert len(flags) == 3
17 |     for flag in flags:
18 |         assert isinstance(flag, Flag)
19 | 


--------------------------------------------------------------------------------
/test/flags/test_htseq.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Uses htseq-count, which is used as an example of a Python argparse CLI
  3 | """
  4 | import shutil
  5 | from textwrap import dedent
  6 | 
  7 | import pytest
  8 | 
  9 | from aclimatise.flag_parser import elements
 10 | from aclimatise.model import EmptyFlagArg, FlagSynonym, RepeatFlagArg
 11 | 
 12 | 
 13 | def test_short(parser):
 14 |     flag = elements.flag_with_arg.parseString(
 15 |         dedent(
 16 |             """
 17 |         -i IDATTR
 18 |         """
 19 |         )
 20 |     )[0]
 21 |     assert isinstance(flag, FlagSynonym)
 22 | 
 23 | 
 24 | def test_long_short_synonyms(parser):
 25 |     flag = elements.flag_synonyms.parseString(
 26 |         dedent(
 27 |             """
 28 |         -i IDATTR, --idattr IDATTR
 29 |         """
 30 |         )
 31 |     )[0]
 32 |     print(flag)
 33 | 
 34 | 
 35 | def test_long_short_desc(parser):
 36 |     flag = parser.flag_block.parseString(
 37 |         """
 38 |         -i IDATTR, --idattr IDATTR
 39 |                           GFF attribute to be used as feature ID (default,
 40 |                           suitable for Ensembl GTF files: gene_id)
 41 |         """
 42 |     )[0]
 43 |     print(flag)
 44 | 
 45 | 
 46 | def test_long_short_choices(parser):
 47 |     flag = parser.flag_block.parseString(
 48 |         """
 49 |           -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
 50 |                                 mode to handle reads overlapping more than one feature
 51 |                                 (choices: union, intersection-strict, intersection-
 52 |                                 nonempty; default: union)
 53 |         """
 54 |     )
 55 | 
 56 | 
 57 | def test_help_section_preamble(parser):
 58 |     flags = list(
 59 |         parser.flags.searchString(
 60 |             dedent(
 61 |                 """
 62 | optional arguments:
 63 |   -h, --help            show this help message and exit
 64 |   -f {sam,bam}, --format {sam,bam}
 65 |                         type of <alignment_file> data, either 'sam' or 'bam'
 66 |                         (default: sam)
 67 |         """
 68 |             )
 69 |         )
 70 |     )[0]
 71 |     assert len(flags) == 2
 72 | 
 73 | 
 74 | def test_repeat_type(parser):
 75 |     flag = elements.flag_synonyms.parseString(
 76 |         "--additional-attr ADDITIONAL_ATTR [ADDITIONAL_ATTR ...]"
 77 |     )[0]
 78 |     assert flag.name == "--additional-attr"
 79 |     assert isinstance(flag.argtype, RepeatFlagArg)
 80 |     assert flag.argtype.name == "ADDITIONAL_ATTR"
 81 | 
 82 | 
 83 | def test_full_flags(parser):
 84 |     results = parser.flags.parseString(
 85 |         """
 86 |   -h, --help            show this help message and exit
 87 |   -f {sam,bam}, --format {sam,bam}
 88 |                         type of <alignment_file> data, either 'sam' or 'bam'
 89 |                         (default: sam)
 90 |   -r {pos,name}, --order {pos,name}
 91 |                         'pos' or 'name'. Sorting order of <alignment_file>
 92 |                         (default: name). Paired-end sequencing data must be
 93 |                         sorted either by position or by read name, and the
 94 |                         sorting order must be specified. Ignored for single-
 95 |                         end data.
 96 |   --max-reads-in-buffer MAX_BUFFER_SIZE
 97 |                         When <alignment_file> is paired end sorted by
 98 |                         position, allow only so many reads to stay in memory
 99 |                         until the mates are found (raising this number will
100 |                         use more memory). Has no effect for single end or
101 |                         paired end sorted by name
102 |   -s {yes,no,reverse}, --stranded {yes,no,reverse}
103 |                         whether the data is from a strand-specific assay.
104 |                         Specify 'yes', 'no', or 'reverse' (default: yes).
105 |                         'reverse' means 'yes' with reversed strand
106 |                         interpretation
107 |   -a MINAQUAL, --minaqual MINAQUAL
108 |                         skip all reads with alignment quality lower than the
109 |                         given minimum value (default: 10)
110 |   -t FEATURETYPE, --type FEATURETYPE
111 |                         feature type (3rd column in GFF file) to be used, all
112 |                         features of other type are ignored (default, suitable
113 |                         for Ensembl GTF files: exon)
114 |   -i IDATTR, --idattr IDATTR
115 |                         GFF attribute to be used as feature ID (default,
116 |                         suitable for Ensembl GTF files: gene_id)
117 |   --additional-attr ADDITIONAL_ATTR
118 |                         Additional feature attributes (default: none, suitable
119 |                         for Ensembl GTF files: gene_name). Use multiple times
120 |                         for each different attribute
121 |   -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
122 |                         mode to handle reads overlapping more than one feature
123 |                         (choices: union, intersection-strict, intersection-
124 |                         nonempty; default: union)
125 |   --nonunique {none,all}
126 |                         Whether to score reads that are not uniquely aligned
127 |                         or ambiguously assigned to features
128 |   --secondary-alignments {score,ignore}
129 |                         Whether to score secondary alignments (0x100 flag)
130 |   --supplementary-alignments {score,ignore}
131 |                         Whether to score supplementary alignments (0x800 flag)
132 |   -o SAMOUTS, --samout SAMOUTS
133 |                         write out all SAM alignment records into SAM files
134 |                         (one per input file needed), annotating each line with
135 |                         its feature assignment (as an optional field with tag
136 |                         'XF')
137 |   -q, --quiet           suppress progress report
138 | """
139 |     )
140 |     assert len(list(results)) == 15
141 | 
142 | 
143 | def test_choice(parser):
144 |     flag = elements.flag_with_arg.parseString("--format {sam,bam}")[0]
145 |     assert flag.name == "--format"
146 | 
147 |     # Both sets should be the same
148 |     assert len(flag.argtype.choices & {"sam", "bam"}) == 2
149 | 
150 | 
151 | def test_noarg(parser):
152 |     flag = parser.flag.parseString("-q, --quiet           suppress progress report")[0]
153 |     assert flag.longest_synonym == "--quiet"
154 |     assert len(flag.synonyms) == 2
155 |     assert isinstance(flag.args, EmptyFlagArg)
156 | 
157 | 
158 | @pytest.mark.skipif(
159 |     not shutil.which("htseq-count"), reason="htseq-count is not installed"
160 | )
161 | def test_full(parser, local_executor):
162 |     # Parse help
163 |     help_text = local_executor.execute(["htseq-count", "--help"])
164 |     flag_sections = parser.flags.searchString(help_text)
165 |     # There is one section for positional arguments and one for named arguments
166 |     assert len(flag_sections) == 2
167 |     # There are two positional arguments
168 |     assert len(flag_sections[0]) == 2
169 |     # There are at least 15 flags
170 |     assert len(flag_sections[1]) >= 15
171 | 


--------------------------------------------------------------------------------
/test/flags/test_pisces.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | from pkg_resources import resource_filename
  4 | 
  5 | from aclimatise.flag_parser import elements
  6 | from aclimatise.flag_parser.parser import CliParser
  7 | from aclimatise.model import SimpleFlagArg
  8 | 
  9 | 
 10 | def test_pisces_flag(parser):
 11 |     cmd = """
 12 |   --targetlodfrequency, --targetvf <FLOAT>
 13 |     """
 14 |     flag_synonyms = elements.flag_synonyms.parseString(cmd)
 15 |     # There is one section for positional arguments and one for named arguments
 16 |     assert len(flag_synonyms) == 2
 17 |     assert isinstance(flag_synonyms[1].argtype, SimpleFlagArg)
 18 |     assert flag_synonyms[1].argtype.name == "FLOAT"
 19 | 
 20 | 
 21 | def test_pisces_arg(parser):
 22 |     cmd = """
 23 |   --targetlodfrequency, --targetvf <FLOAT>
 24 |                      FLOAT Target Frequency to call a variant. Ie, to 
 25 |                        target a 5% allele frequency, we must call down 
 26 |                        to 2.6%, to capture that 5% allele 95% of the 
 27 |                        time. This parameter is used by the Somatic 
 28 |                        Genotyping Model
 29 |     """
 30 |     flag = parser.flag_block.parseString(cmd)[0]
 31 | 
 32 |     assert len(flag.synonyms) == 2
 33 |     assert flag.description.startswith("FLOAT Target Frequency")
 34 |     assert flag.args.name == "FLOAT"
 35 | 
 36 | 
 37 | def test_pisces_arg_2(parser):
 38 |     cmd = """
 39 |       --vqfilter, --variantqualityfilter <INT>
 40 |                              INT FilteredVariantQScore to report variant as 
 41 |                                filtered
 42 |     """
 43 |     flag = parser.flag_block.parseString(cmd)[0]
 44 | 
 45 |     assert len(flag.synonyms) == 2
 46 |     assert flag.description.startswith("INT FilteredVariantQScore ")
 47 |     assert flag.args.name == "INT"
 48 | 
 49 | 
 50 | def test_pisces_indent_dedent(parser):
 51 |     cmd = """
 52 |   -i, --intervalpaths <PATHS>
 53 |                              PATHS IntervalPath(s), single value or comma 
 54 |                                delimited list corresponding to BAMPath(s). At 
 55 |                                most one value should be provided if BAM folder 
 56 |                                is specified
 57 |       --coveragemethod <STRING>
 58 |                              STRING'approximate' or 'exact'. Exact is more 
 59 |                                precise but requires more memory (minimum 8 GB). 
 60 |                                 Default approximate
 61 |       --baselogname <STRING> STRING 
 62 |   -d, --debug <BOOL>         BOOL
 63 |       --usestitchedxd <BOOL> BOOL Set to true to make use of the consensus 
 64 |                                read-direction information (the XD tag) from 
 65 |                                stitched reads. This is on by default when using 
 66 |                                Stitcher output bam, but must be deliberately 
 67 |                                set for Gemini output.
 68 |     """
 69 |     flags = parser.flag_block.parseString(cmd)
 70 | 
 71 |     assert len(flags) == 5
 72 | 
 73 |     assert isinstance(flags[0].args, SimpleFlagArg)
 74 |     assert flags[0].synonyms == ["-i", "--intervalpaths"]
 75 | 
 76 |     assert isinstance(flags[3].args, SimpleFlagArg)
 77 |     assert flags[3].synonyms == ["-d", "--debug"]
 78 |     assert flags[3].description == "BOOL"
 79 | 
 80 | 
 81 | def test_pisces_triple_long_flag_synonyms(parser):
 82 |     cmd = "--minvf, --minimumvariantfrequency, --minimumfrequency <FLOAT>"
 83 |     synonyms = elements.flag_synonyms.parseString(cmd)
 84 | 
 85 |     assert len(synonyms) == 3
 86 | 
 87 | 
 88 | def test_pisces_triple_long_flag(parser):
 89 |     cmd = """
 90 |     --minvf, --minimumvariantfrequency, --minimumfrequency <FLOAT>
 91 |                          FLOAT MinimumFrequency to call a variant
 92 |     """
 93 |     flag = parser.flag_block.parseString(cmd)[0]
 94 | 
 95 |     assert len(flag.synonyms) == 3
 96 |     assert flag.description.startswith("FLOAT MinimumFrequency")
 97 | 
 98 | 
 99 | def test_pisces_quad_flag_synonyms(parser):
100 |     cmd = "-c, --mindp, --mindepth, --mincoverage <INT>"
101 |     synonyms = elements.flag_synonyms.parseString(cmd)
102 | 
103 |     assert len(synonyms) == 4
104 | 
105 | 
106 | def test_pisces_quad_flag(parser):
107 |     cmd = """
108 |     -c, --mindp, --mindepth, --mincoverage <INT>
109 |                              INT Minimum depth to call a variant
110 |     """
111 |     flag = parser.flag_block.parseString(cmd)[0]
112 | 
113 |     assert len(flag.synonyms) == 4
114 |     assert flag.description.startswith("INT Minimum")
115 | 
116 | 
117 | def test_pisces_multi_indent(parser):
118 |     cmd = """
119 |       --minvq, --minvariantqscore <INT>
120 |                              INT MinimumVariantQScore to report variant
121 |   -c, --mindp, --mindepth, --mincoverage <INT>
122 |                              INT Minimum depth to call a variant
123 |       --minvf, --minimumvariantfrequency, --minimumfrequency <FLOAT>
124 |                              FLOAT MinimumFrequency to call a variant
125 |       --targetlodfrequency, --targetvf <FLOAT>
126 |                              FLOAT Target Frequency to call a variant. Ie, to 
127 |                                target a 5% allele frequency, we must call down 
128 |                                to 2.6%, to capture that 5% allele 95% of the 
129 |                                time. This parameter is used by the Somatic 
130 |                                Genotyping Model
131 |       --vqfilter, --variantqualityfilter <INT>
132 |                              INT FilteredVariantQScore to report variant as 
133 |                                filtered
134 | 
135 |    """
136 |     flags = parser.flags.parseString(cmd)
137 | 
138 |     assert len(flags) == 5
139 | 
140 | 
141 | def test_pisces(parser, pisces_help):
142 |     # Parse help
143 |     flag_sections = parser.flags.searchString(pisces_help)
144 |     # There is one section for positional arguments and one for named arguments
145 |     assert len(flag_sections) == 5
146 | 
147 |     # There are two arguments in the first block
148 |     assert len(flag_sections[0]) == 2
149 | 
150 |     # There are 23 arguments in the second block
151 |     assert len(flag_sections[1]) == 24
152 | 
153 |     # There are 4 arguments in the third block
154 |     assert len(flag_sections[2]) == 4
155 | 
156 |     # There are 23 arguments in the fourth block
157 |     assert len(flag_sections[3]) == 23
158 | 
159 |     # There are 6 arguments in the fifth block
160 |     assert len(flag_sections[4]) == 6
161 | 
162 |     # The very first argument has 3 synonyms
163 |     assert len(flag_sections[0][0].synonyms) == 3
164 | 


--------------------------------------------------------------------------------
/test/flags/test_podchecker.py:
--------------------------------------------------------------------------------
 1 | from aclimatise.model import Flag
 2 | 
 3 | 
 4 | def test_podchecker_flags(parser):
 5 |     cmd = """
 6 |         -warnings -nowarnings
 7 |             Turn on/off printing of warnings. Repeating -warnings increases
 8 |             the warning level, i.e. more warnings are printed. Currently
 9 |             increasing to level two causes flagging of unescaped "<,>"
10 |             characters.
11 |     """
12 |     flag = parser.flag_block.parseString(cmd)
13 |     assert isinstance(flag[0], Flag)
14 |     assert len(flag[0].synonyms) == 2
15 | 
16 | 
17 | def test_podchecker(podchecker_help, parser):
18 |     cmd = """
19 | Options and Arguments:
20 |     -help   Print a brief help message and exit.
21 | 
22 |     -man    Print the manual page and exit.
23 | 
24 |     -warnings -nowarnings
25 |             Turn on/off printing of warnings. Repeating -warnings increases
26 |             the warning level, i.e. more warnings are printed. Currently
27 |             increasing to level two causes flagging of unescaped "<,>"
28 |             characters.
29 | 
30 |     file    The pathname of a POD file to syntax-check (defaults to standard
31 |             input).
32 | """
33 |     flags = parser.flags.searchString(cmd)[0]
34 |     assert len(flags) == 4
35 | 


--------------------------------------------------------------------------------
/test/flags/test_samtools.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | 
 3 | import pytest
 4 | 
 5 | from aclimatise.model import Flag
 6 | 
 7 | 
 8 | def test_samtools_bedcov_j(parser):
 9 |     text = """
10 |       -j                  do not include deletions (D) and ref skips (N) in bedcov computation
11 |     """
12 |     flag = parser.flag_block.parseString(text)[0]
13 |     assert isinstance(flag, Flag)
14 |     assert flag.synonyms[0] == "-j"
15 | 
16 | 
17 | def test_samtools_bedcov_qjfmt(parser):
18 |     text = """
19 |       -Q <int>            mapping quality threshold [0]
20 |       -j                  do not include deletions (D) and ref skips (N) in bedcov computation
21 |       --input-fmt-option OPT[=VAL]
22 |                Specify a single input file format option in the form
23 |                of OPTION or OPTION=VALUE
24 |     """
25 |     flags = list(parser.flags.setDebug().searchString(text)[0])
26 |     assert len(flags) == 3
27 | 
28 | 
29 | def test_samtools(parser, samtools_help):
30 |     # Parse the root samtools command
31 |     samtools = parser.parse_command(name=["samtools"], cmd=samtools_help)
32 |     assert len(samtools.named) == 0
33 |     assert len(samtools.positional) > 25
34 | 
35 | 
36 | @pytest.mark.skipif(not shutil.which("samtools"), reason="samtools is not installed")
37 | def test_samtools_index(parser, local_executor):
38 |     # Parse help
39 |     help_text = local_executor.execute(["samtools", "index"])
40 |     flag_sections = parser.flags.searchString(help_text)
41 |     # There is one section for positional arguments and one for named arguments
42 |     assert len(flag_sections) == 1
43 |     # There are two positional arguments
44 |     assert len(flag_sections[0]) == 4
45 | 


--------------------------------------------------------------------------------
/test/flags/test_singularity.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | 
 3 | import pytest
 4 | 
 5 | from aclimatise.flag_parser.parser import CliParser
 6 | 
 7 | 
 8 | def test_singularity_style_flags(parser):
 9 |     flag = parser.flag_block.parseString(
10 |         "    -n|--name   Specify a custom container name (first priority)"
11 |     )[0]
12 |     assert len(flag.synonyms) == 2
13 |     assert flag.synonyms == ["-n", "--name"]
14 | 
15 | 
16 | @pytest.mark.skipif(
17 |     not shutil.which("singularity"), reason="singularity is not installed"
18 | )
19 | def test_singularity_pull(local_executor):
20 |     parser = CliParser(parse_positionals=False)
21 | 
22 |     # Parse help
23 |     help_text = local_executor.execute(["singularity", "pull", "--help"])
24 |     flag_sections = parser.flags.searchString(help_text)
25 |     # There is one section for positional arguments and one for named arguments
26 |     assert len(flag_sections) == 1
27 |     # There are two positional arguments
28 |     assert len(flag_sections[0]) >= 5
29 | 


--------------------------------------------------------------------------------
/test/name_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/name_generation/__init__.py


--------------------------------------------------------------------------------
/test/name_generation/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from aclimatise.converter import WrapperGenerator
 4 | 
 5 | 
 6 | @pytest.fixture()
 7 | def snake_gen():
 8 |     return WrapperGenerator(case="snake", generate_names=True)
 9 | 
10 | 
11 | @pytest.fixture()
12 | def camel_gen():
13 |     return WrapperGenerator(case="camel", generate_names=True)
14 | 
15 | 
16 | @pytest.fixture()
17 | def gen():
18 |     return WrapperGenerator()
19 | 


--------------------------------------------------------------------------------
/test/name_generation/test_batch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test all the test data files
 3 | """
 4 | import pytest
 5 | from pkg_resources import resource_filename
 6 | 
 7 | from aclimatise import WrapperGenerator, parse_help
 8 | 
 9 | from ..util import HelpText, all_tests, convert_validate, validate_cwl, validate_wdl
10 | 
11 | 
12 | @pytest.mark.parametrize("test", all_tests)
13 | def test_all(test: HelpText):
14 |     """
15 |     Tests that generate_names can work on real-life Commands without exceeding reasonable system resources
16 |     """
17 |     with open(resource_filename("test", test.path)) as fp:
18 |         help_text = fp.read()
19 | 
20 |     cmd = parse_help(test.cmd, help_text)
21 | 
22 |     WrapperGenerator().choose_variable_names([*cmd.positional, *cmd.named])
23 | 


--------------------------------------------------------------------------------
/test/name_generation/test_case.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the casing (snake_case vs camelCase) used by the converters
 3 | """
 4 | import pytest
 5 | 
 6 | from aclimatise.converter import WrapperGenerator
 7 | from aclimatise.model import EmptyFlagArg, Flag
 8 | 
 9 | 
10 | def test_camel_short(camel_gen):
11 |     flag = Flag(
12 |         synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg()
13 |     )
14 |     names = camel_gen.choose_variable_names([flag], length=3)
15 |     assert names[0].name == "numberOfThreads"
16 | 
17 | 
18 | def test_snake_short(snake_gen):
19 |     flag = Flag(
20 |         synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg()
21 |     )
22 |     names = snake_gen.choose_variable_names([flag], length=2)
23 |     assert "number" in names[0].name
24 |     assert "threads" in names[0].name
25 | 
26 | 
27 | def test_camel_long(camel_gen):
28 |     flag = Flag(
29 |         synonyms=["-g", "--genomepaths", "--genomefolders"],
30 |         description="number of threads [1]",
31 |         args=EmptyFlagArg(),
32 |     )
33 |     names = camel_gen.choose_variable_names([flag], length=2)
34 |     assert names[0].name == "genomeFolders"
35 | 
36 | 
37 | def test_snake_long(snake_gen):
38 |     flag = Flag(
39 |         synonyms=["-g", "--genomepaths", "--genomefolders"],
40 |         description="number of threads [1]",
41 |         args=EmptyFlagArg(),
42 |     )
43 |     names = snake_gen.choose_variable_names([flag], length=2)
44 |     assert names[0].name == "genome_folders"
45 | 


--------------------------------------------------------------------------------
/test/name_generation/test_description.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests the generate_name function, which converts a paragraph of text into a variable name
  3 | """
  4 | from aclimatise.name_generation import generate_name, preprocess
  5 | 
  6 | 
  7 | def test_bwa_mem_t():
  8 |     name = next(generate_name(preprocess("number of threads [1]")))
  9 |     assert len(name) < 5
 10 |     assert "number" in name
 11 |     assert "threads" in name
 12 | 
 13 | 
 14 | def test_bwa_mem_p():
 15 |     name = next(generate_name(preprocess("smart pairing (ignoring in2.fq)")))
 16 |     assert len(name) <= 3
 17 |     assert "smart" in name
 18 |     assert "pairing" in name
 19 | 
 20 | 
 21 | def test_bwa_mem_r():
 22 |     name = next(
 23 |         generate_name(
 24 |             preprocess("read group header line such as '@RG\tID:foo\tSM:bar' [null]")
 25 |         )
 26 |     )
 27 |     assert len(name) < 5
 28 |     assert "read" in name
 29 |     # assert 'header' in name
 30 | 
 31 | 
 32 | def test_bwa_mem_i():
 33 |     name = next(
 34 |         generate_name(
 35 |             preprocess(
 36 |                 "specify the mean, standard deviation (10% of the mean if absent), max (4 sigma from the mean if absent) and min of the insert size distribution. FR orientation only. [inferred]"
 37 |             )
 38 |         )
 39 |     )
 40 |     assert len(name) < 5
 41 |     assert "specify" in name
 42 | 
 43 |     # Ideally this would return "mean" first, but the POS engine thinks that "mean" describes "deviation"
 44 |     # assert "mean" in name
 45 |     assert "deviation" in name
 46 | 
 47 | 
 48 | def test_bedtools_coverage_d():
 49 |     name = next(
 50 |         generate_name(
 51 |             preprocess(
 52 |                 "Report the depth at each position in each A feature. Positions reported are one based. Each position and depth follow the complete A feature."
 53 |             )
 54 |         )
 55 |     )
 56 |     assert len(name) < 5
 57 |     assert "report" in name
 58 |     assert "depth" in name
 59 | 
 60 | 
 61 | def test_bedtools_coverage_s():
 62 |     name = next(
 63 |         generate_name(
 64 |             preprocess(
 65 |                 "Require same strandedness. That is, only report hits in B that overlap A on the _same_ strand. By default, overlaps are reported without respect to strand"
 66 |             )
 67 |         )
 68 |     )
 69 |     assert len(name) < 5
 70 |     assert "require" in name
 71 |     assert "strandedness" in name
 72 | 
 73 | 
 74 | def test_bedtools_coverage_g():
 75 |     name = next(
 76 |         generate_name(
 77 |             preprocess(
 78 |                 "Provide a genome file to enforce consistent chromosome sort order across input files. Only applies when used with -sorted option."
 79 |             )
 80 |         )
 81 |     )
 82 |     assert len(name) < 5
 83 |     assert "provide" in name
 84 |     assert "file" in name
 85 | 
 86 | 
 87 | def test_symbol():
 88 |     """
 89 |     Check that symbols are correctly removed from the output
 90 |     """
 91 |     name = next(generate_name(preprocess("/genome@ #file$")))
 92 |     assert len(name) < 5
 93 |     assert "genome" in name
 94 |     assert "file" in name
 95 | 
 96 | 
 97 | def test_hyphens():
 98 |     name = next(generate_name(preprocess("penalty for 5'- and 3'-end clipping [5,5]")))
 99 |     assert len(name) < 5
100 |     assert "penalty" in name
101 | 
102 |     for word in name:
103 |         assert "-" not in word
104 |         assert "[" not in word
105 |         assert "," not in word
106 | 


--------------------------------------------------------------------------------
/test/name_generation/test_group.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests certain groups of flags used together
 3 | """
 4 | from aclimatise.converter import WrapperGenerator
 5 | from aclimatise.model import EmptyFlagArg, Flag, SimpleFlagArg
 6 | 
 7 | 
 8 | def test_bedtools_window_sm():
 9 |     """
10 |     These two flags have almost the same name, and almost the same description
11 |     """
12 |     flags = [
13 |         Flag(
14 |             synonyms=["-sm"],
15 |             description="Only report hits in B that overlap A on the _same_ strand.",
16 |             args=EmptyFlagArg(),
17 |         ),
18 |         Flag(
19 |             synonyms=["-sm"],
20 |             description="Only report hits in B that overlap A on the _opposite_ strand.",
21 |             args=EmptyFlagArg(),
22 |         ),
23 |         Flag(
24 |             synonyms=["-c"],
25 |             description="For each entry in A, report the number of overlaps with B.",
26 |             args=EmptyFlagArg(),
27 |         ),
28 |     ]
29 |     args = WrapperGenerator().choose_variable_names(flags)
30 |     assert len(set([arg.name for arg in args])) == 3
31 | 
32 | 
33 | def test_same_description():
34 |     """
35 |     Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name.
36 |     However, if the descriptions are identical to each other, we have to fall back to the description
37 |     """
38 |     flags = [
39 |         Flag(
40 |             synonyms=["-a"],
41 |             description="Makes the program do a certain thing",
42 |             args=EmptyFlagArg(),
43 |         ),
44 |         Flag(
45 |             synonyms=["-b"],
46 |             description="Makes the program do a certain thing",
47 |             args=EmptyFlagArg(),
48 |         ),
49 |     ]
50 |     names = WrapperGenerator().choose_variable_names(flags)
51 |     assert names[0].name == "a"
52 |     assert names[1].name == "b"
53 | 
54 | 
55 | def test_same_arg():
56 |     """
57 |     Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name.
58 |     However, if the descriptions are identical to each other, we have to fall back to the description
59 |     """
60 |     flags = [
61 |         Flag(synonyms=["-a"], description="", args=SimpleFlagArg("SomeThing")),
62 |         Flag(synonyms=["-b"], description="", args=SimpleFlagArg("SomeThing")),
63 |     ]
64 |     names = WrapperGenerator().choose_variable_names(flags)
65 |     assert names[0].name == "a"
66 |     assert names[1].name == "b"
67 | 


--------------------------------------------------------------------------------
/test/name_generation/test_single_flag.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for the name generation of single flags
 3 | """
 4 | from aclimatise.converter.wdl import WdlGenerator
 5 | from aclimatise.model import EmptyFlagArg, Flag, Positional, SimpleFlagArg
 6 | 
 7 | 
 8 | def test_samtools_dict_output():
 9 |     gen = WdlGenerator()
10 |     arg = Flag(
11 |         synonyms=["-o", "--output"],
12 |         description="file to write out dict file [stdout]",
13 |         args=SimpleFlagArg(name="str"),
14 |     )
15 |     name = gen.choose_variable_names([arg])[0].name
16 |     # The WDL converter should avoid naming a variable "output" since that's a WDL keyword
17 |     assert name != "output"
18 | 
19 |     # Also, since we have a description, the generator shouldn't choose the lazy option of var_output
20 |     assert name != "var_output"
21 | 
22 | 
23 | def test_samtools_quickcheck_output():
24 |     gen = WdlGenerator()
25 |     arg = Positional(description="", position=0, name="input")
26 |     name = gen.choose_variable_names([arg])[0].name
27 |     # The WDL converter should avoid naming a variable "output" since that's a WDL keyword
28 |     assert name != "input"
29 | 
30 | 
31 | def test_bwt2sa_i(gen):
32 |     arg = Flag(synonyms=["-i"], description="", args=SimpleFlagArg(name="32"))
33 | 
34 |     name = gen.choose_variable_names([arg])[0].name
35 |     # 32 isn't a valid variable name, so the only option here is to use the letter i
36 |     assert name == "i"
37 | 
38 | 
39 | def test_name_to_words_symbol(gen):
40 |     """
41 |     Check that we can get an argument name even if the argument's flag is a symbol
42 |     """
43 |     arg = Flag(
44 |         synonyms=["-@"],
45 |         description="Number of additional threads to use",
46 |         args=EmptyFlagArg(),
47 |     )
48 | 
49 |     name = gen.choose_variable_names([arg])[0].name
50 |     assert name == "at"
51 | 
52 | 
53 | def test_name_to_words(gen):
54 |     """
55 |     Check that we can get an argument name even if the argument's flag is a symbol
56 |     """
57 |     arg = Flag(
58 |         synonyms=["--genomepaths"],
59 |         description="",
60 |         args=EmptyFlagArg(),
61 |     )
62 | 
63 |     name = gen.choose_variable_names([arg])[0].name
64 |     assert "genome" in name
65 |     assert "paths" in name
66 |     # assert list(arg._name_from_name()) == ["genome", "paths"]
67 | 
68 | 
69 | def test_bwa_mem_infq(gen):
70 |     arg = Positional(name="in1.fq", description="", position=0)
71 |     name = gen.choose_variable_names([arg])[0].name
72 |     # name = arg.variable_name([])
73 |     assert "1" in name or "one" in name
74 |     assert "in" in name
75 |     assert "fq" in name
76 | 


--------------------------------------------------------------------------------
/test/test_cli.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import tempfile
  3 | import traceback
  4 | from pathlib import Path
  5 | 
  6 | import pyparsing
  7 | import pytest
  8 | from click.testing import CliRunner
  9 | from packaging import version
 10 | 
 11 | from aclimatise.cli import main
 12 | from aclimatise.yaml import yaml
 13 | 
 14 | from .util import skip_not_installed, validate_cwl, validate_janis, validate_wdl
 15 | 
 16 | 
 17 | @pytest.fixture()
 18 | def runner():
 19 |     return CliRunner()
 20 | 
 21 | 
 22 | def cli_worked(result):
 23 |     if result.exit_code == 0:
 24 |         return True
 25 |     else:
 26 |         traceback.print_exception(*result.exc_info)
 27 |         assert result.exit_code == 0
 28 | 
 29 | 
 30 | def test_pipe_wdl(runner, htseq_help):
 31 |     result = runner.invoke(
 32 |         main, ["pipe", "htseq-count", "--format", "wdl"], input=htseq_help
 33 |     )
 34 |     cli_worked(result)
 35 |     validate_wdl(result.output)
 36 | 
 37 | 
 38 | def test_pipe_cwl(runner, htseq_help):
 39 |     result = runner.invoke(
 40 |         main, ["pipe", "htseq-count", "--format", "cwl"], input=htseq_help
 41 |     )
 42 |     cli_worked(result)
 43 |     validate_cwl(result.output)
 44 | 
 45 | 
 46 | def test_pipe_janis(runner, htseq_help):
 47 |     result = runner.invoke(
 48 |         main, ["pipe", "htseq-count", "--format", "janis"], input=htseq_help
 49 |     )
 50 |     cli_worked(result)
 51 |     validate_janis(result.output)
 52 | 
 53 | 
 54 | @skip_not_installed("htseq-count")
 55 | def test_explore_htseq(runner, caplog):
 56 |     caplog.set_level(100000)
 57 |     with tempfile.TemporaryDirectory() as tempdir:
 58 |         result = runner.invoke(main, ["explore", "htseq-count", "--out-dir", tempdir])
 59 |         cli_worked(result)
 60 |         assert len(list(Path(tempdir).iterdir())) == 3
 61 | 
 62 | 
 63 | @skip_not_installed("ls")
 64 | @skip_not_installed("man")
 65 | def test_explore_ls_man(runner, caplog):
 66 |     caplog.set_level(100000)
 67 |     with tempfile.TemporaryDirectory() as tempdir:
 68 |         result = runner.invoke(main, ["explore", "ls", "--man", "--out-dir", tempdir])
 69 |         cli_worked(result)
 70 |         with (Path(tempdir) / "ls.yml").open() as fp:
 71 |             parsed = yaml.load(fp)
 72 |             assert parsed.help_text.startswith("LS(1)")
 73 | 
 74 | 
 75 | @skip_not_installed("samtools")
 76 | def test_explore_samtools(runner, caplog):
 77 |     caplog.set_level(100000)
 78 |     with tempfile.TemporaryDirectory() as tempdir:
 79 |         result = runner.invoke(
 80 |             main, ["explore", "samtools", "--help-flag", "--help", "--out-dir", tempdir]
 81 |         )
 82 |         cli_worked(result)
 83 |         assert len(list(Path(tempdir).iterdir())) > 20
 84 | 
 85 | 
 86 | @skip_not_installed("samtools")
 87 | def test_explore_samtools_no_subcommands(runner, caplog):
 88 |     caplog.set_level(100000)
 89 |     with tempfile.TemporaryDirectory() as tempdir:
 90 |         result = runner.invoke(
 91 |             main,
 92 |             [
 93 |                 "explore",
 94 |                 "samtools",
 95 |                 "--no-subcommands",
 96 |                 "--out-dir",
 97 |                 tempdir,
 98 |             ],
 99 |         )
100 |         cli_worked(result)
101 |         # Since we aren't looking at subcommands, there should be one file for each format
102 |         assert len(list(Path(tempdir).iterdir())) >= 3
103 | 
104 | 
105 | @pytest.mark.skipif(
106 |     version.parse(pyparsing.__version__) < version.parse("3.0.0a2"),
107 |     reason="PyParsing 3.0.0+ is not installed",
108 | )
109 | def test_grammar(runner):
110 |     result = runner.invoke(main, ["grammar"])
111 |     assert result.exit_code == 0
112 |     assert len(result.output) > 20
113 | 


--------------------------------------------------------------------------------
/test/test_convert.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import tempfile
  3 | from pathlib import Path
  4 | 
  5 | import pytest
  6 | from WDL import parse_document
  7 | 
  8 | from aclimatise import explore_command
  9 | from aclimatise.converter.cwl import CwlGenerator
 10 | from aclimatise.converter.wdl import WdlGenerator
 11 | from aclimatise.model import CliArgument, Flag, SimpleFlagArg
 12 | from aclimatise.yaml import yaml
 13 | 
 14 | from .util import convert_validate, skip_not_installed
 15 | 
 16 | # Note: the parse and explore tests run conversion tests already. These tests are for specific edge cases
 17 | 
 18 | 
 19 | def test_premade_samtools(samtools_cmd):
 20 |     """
 21 |     Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command
 22 |     trees
 23 |     """
 24 |     convert_validate(samtools_cmd, explore=True)
 25 | 
 26 | 
 27 | def test_premade_bedtools(bedtools_cmd):
 28 |     """
 29 |     Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command
 30 |     trees
 31 |     """
 32 |     convert_validate(bedtools_cmd, explore=True)
 33 | 
 34 | 
 35 | @skip_not_installed("samtools")
 36 | @skip_not_installed("samtools.pl")
 37 | def test_explore_samtools_pl(yaml_converter):
 38 |     """
 39 |     Tests that commands with a non-standard file extension include their extension in the final output, and don't
 40 |     override another command with the same stem
 41 |     """
 42 |     samtools = explore_command(["samtools"], max_depth=0)
 43 |     samtools_pl = explore_command(["samtools.pl"], max_depth=0)
 44 |     with tempfile.TemporaryDirectory() as temp_dir:
 45 |         path = Path(temp_dir)
 46 |         filenames = set()
 47 |         for path, command in itertools.chain(
 48 |             yaml_converter.generate_tree(samtools, temp_dir),
 49 |             yaml_converter.generate_tree(samtools_pl, temp_dir),
 50 |         ):
 51 |             filenames.add(path.name)
 52 | 
 53 |         assert filenames == {"samtools.yml", "samtools.pl.yml"}
 54 | 
 55 | 
 56 | def test_docker_conversion(bedtools_cmd):
 57 |     intersect = bedtools_cmd["intersect"]
 58 |     container = "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
 59 |     intersect.docker_image = container
 60 |     with tempfile.NamedTemporaryFile() as cwl_file:
 61 |         CwlGenerator().save_to_file(intersect, path=Path(cwl_file.name))
 62 |         cwl_file.seek(0)
 63 |         parsed_cwl = yaml.load(cwl_file)
 64 |         assert any(
 65 |             [
 66 |                 hint["class"] == "DockerRequirement" and hint["dockerPull"] == container
 67 |                 for hint in parsed_cwl["hints"]
 68 |             ]
 69 |         )
 70 | 
 71 |     wdl = WdlGenerator().save_to_string(intersect)
 72 |     parsed_wdl = parse_document(wdl).tasks[0]
 73 |     assert parsed_wdl.runtime["docker"].literal.value == container
 74 | 
 75 | 
 76 | @pytest.mark.parametrize(
 77 |     "flag,cwltype,wdltype",
 78 |     [
 79 |         [
 80 |             Flag(
 81 |                 synonyms=["--some-flag"],
 82 |                 optional=True,
 83 |                 args=SimpleFlagArg("string"),
 84 |                 description="",
 85 |             ),
 86 |             "string?",
 87 |             "String?",
 88 |         ],
 89 |         [
 90 |             Flag(
 91 |                 synonyms=["--some-flag"],
 92 |                 optional=False,
 93 |                 args=SimpleFlagArg("string"),
 94 |                 description="",
 95 |             ),
 96 |             "string",
 97 |             "String",
 98 |         ],
 99 |     ],
100 | )
101 | def test_types_conversion(flag: CliArgument, cwltype: str, wdltype: str):
102 |     """
103 |     Test that types are being correctly translated from aCLImatise types to CWL and WDL
104 |     """
105 |     assert CwlGenerator.arg_to_cwl_type(flag) == cwltype
106 |     assert (
107 |         WdlGenerator.type_to_wdl(flag.get_type(), optional=flag.optional).get_string()
108 |         == wdltype
109 |     )
110 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools.txt:
--------------------------------------------------------------------------------
 1 | bedtools: flexible tools for genome arithmetic and DNA sequence analysis.
 2 | usage:    bedtools <subcommand> [options]
 3 | 
 4 | The bedtools sub-commands include:
 5 | 
 6 | [ Genome arithmetic ]
 7 |     intersect     Find overlapping intervals in various ways.
 8 |     window        Find overlapping intervals within a window around an interval.
 9 |     closest       Find the closest, potentially non-overlapping interval.
10 |     coverage      Compute the coverage over defined intervals.
11 |     map           Apply a function to a column for each overlapping interval.
12 |     genomecov     Compute the coverage over an entire genome.
13 |     merge         Combine overlapping/nearby intervals into a single interval.
14 |     cluster       Cluster (but don't merge) overlapping/nearby intervals.
15 |     complement    Extract intervals _not_ represented by an interval file.
16 |     shift         Adjust the position of intervals.
17 |     subtract      Remove intervals based on overlaps b/w two files.
18 |     slop          Adjust the size of intervals.
19 |     flank         Create new intervals from the flanks of existing intervals.
20 |     sort          Order the intervals in a file.
21 |     random        Generate random intervals in a genome.
22 |     shuffle       Randomly redistrubute intervals in a genome.
23 |     sample        Sample random records from file using reservoir sampling.
24 |     spacing       Report the gap lengths between intervals in a file.
25 |     annotate      Annotate coverage of features from multiple files.
26 | 
27 | [ Multi-way file comparisons ]
28 |     multiinter    Identifies common intervals among multiple interval files.
29 |     unionbedg     Combines coverage intervals from multiple BEDGRAPH files.
30 | 
31 | [ Paired-end manipulation ]
32 |     pairtobed     Find pairs that overlap intervals in various ways.
33 |     pairtopair    Find pairs that overlap other pairs in various ways.
34 | 
35 | [ Format conversion ]
36 |     bamtobed      Convert BAM alignments to BED (& other) formats.
37 |     bedtobam      Convert intervals to BAM records.
38 |     bamtofastq    Convert BAM records to FASTQ records.
39 |     bedpetobam    Convert BEDPE intervals to BAM records.
40 |     bed12tobed6   Breaks BED12 intervals into discrete BED6 intervals.
41 | 
42 | [ Fasta manipulation ]
43 |     getfasta      Use intervals to extract sequences from a FASTA file.
44 |     maskfasta     Use intervals to mask sequences from a FASTA file.
45 |     nuc           Profile the nucleotide content of intervals in a FASTA file.
46 | 
47 | [ BAM focused tools ]
48 |     multicov      Counts coverage from multiple BAMs at specific intervals.
49 |     tag           Tag BAM alignments based on overlaps with interval files.
50 | 
51 | [ Statistical relationships ]
52 |     jaccard       Calculate the Jaccard statistic b/w two sets of intervals.
53 |     reldist       Calculate the distribution of relative distances b/w two files.
54 |     fisher        Calculate Fisher statistic b/w two feature files.
55 | 
56 | [ Miscellaneous tools ]
57 |     overlap       Computes the amount of overlap from two intervals.
58 |     igv           Create an IGV snapshot batch script.
59 |     links         Create a HTML page of links to UCSC locations.
60 |     makewindows   Make interval "windows" across a genome.
61 |     groupby       Group by common cols. & summarize oth. cols. (~ SQL "groupBy")
62 |     expand        Replicate lines based on lists of values in columns.
63 |     split         Split a file into multiple files with equal records or base pairs.
64 | 
65 | [ General help ]
66 |     --help        Print this help menu.
67 |     --version     What version of bedtools are you using?.
68 |     --contact     Feature requests, bugs, mailing lists, etc.
69 | 
70 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_closest.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Tool:    bedtools closest (aka closestBed)
  3 | Version: v2.26.0
  4 | Summary: For each feature in A, finds the closest 
  5 | 	 feature (upstream or downstream) in B.
  6 | 
  7 | Usage:   bedtools closest [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>
  8 | 
  9 | Options: 
 10 | 	-d	In addition to the closest feature in B, 
 11 | 		report its distance to A as an extra column.
 12 | 		- The reported distance for overlapping features will be 0.
 13 | 
 14 | 	-D	Like -d, report the closest feature in B, and its distance to A
 15 | 		as an extra column. Unlike -d, use negative distances to report
 16 | 		upstream features.
 17 | 		The options for defining which orientation is "upstream" are:
 18 | 		- "ref"   Report distance with respect to the reference genome. 
 19 | 		            B features with a lower (start, stop) are upstream
 20 | 		- "a"     Report distance with respect to A.
 21 | 		            When A is on the - strand, "upstream" means B has a
 22 | 		            higher (start,stop).
 23 | 		- "b"     Report distance with respect to B.
 24 | 		            When B is on the - strand, "upstream" means A has a
 25 | 		            higher (start,stop).
 26 | 
 27 | 	-io	Ignore features in B that overlap A.  That is, we want close,
 28 | 		yet not touching features only.
 29 | 
 30 | 	-iu	Ignore features in B that are upstream of features in A.
 31 | 		This option requires -D and follows its orientation
 32 | 		rules for determining what is "upstream".
 33 | 
 34 | 	-id	Ignore features in B that are downstream of features in A.
 35 | 		This option requires -D and follows its orientation
 36 | 		rules for determining what is "downstream".
 37 | 
 38 | 	-fu	Choose first from features in B that are upstream of features in A.
 39 | 		This option requires -D and follows its orientation
 40 | 		rules for determining what is "upstream".
 41 | 
 42 | 	-fd	Choose first from features in B that are downstream of features in A.
 43 | 		This option requires -D and follows its orientation
 44 | 		rules for determining what is "downstream".
 45 | 
 46 | 	-t	How ties for closest feature are handled.  This occurs when two
 47 | 		features in B have exactly the same "closeness" with A.
 48 | 		By default, all such features in B are reported.
 49 | 		Here are all the options:
 50 | 		- "all"    Report all ties (default).
 51 | 		- "first"  Report the first tie that occurred in the B file.
 52 | 		- "last"   Report the last tie that occurred in the B file.
 53 | 
 54 | 	-mdb	How multiple databases are resolved.
 55 | 		- "each"    Report closest records for each database (default).
 56 | 		- "all"  Report closest records among all databases.
 57 | 
 58 | 	-k	Report the k closest hits. Default is 1. If tieMode = "all", 
 59 | 		- all ties will still be reported.
 60 | 
 61 | 	-N	Require that the query and the closest hit have different names.
 62 | 		For BED, the 4th column is compared.
 63 | 
 64 | 	-s	Require same strandedness.  That is, only report hits in B
 65 | 		that overlap A on the _same_ strand.
 66 | 		- By default, overlaps are reported without respect to strand.
 67 | 
 68 | 	-S	Require different strandedness.  That is, only report hits in B
 69 | 		that overlap A on the _opposite_ strand.
 70 | 		- By default, overlaps are reported without respect to strand.
 71 | 
 72 | 	-f	Minimum overlap required as a fraction of A.
 73 | 		- Default is 1E-9 (i.e., 1bp).
 74 | 		- FLOAT (e.g. 0.50)
 75 | 
 76 | 	-F	Minimum overlap required as a fraction of B.
 77 | 		- Default is 1E-9 (i.e., 1bp).
 78 | 		- FLOAT (e.g. 0.50)
 79 | 
 80 | 	-r	Require that the fraction overlap be reciprocal for A AND B.
 81 | 		- In other words, if -f is 0.90 and -r is used, this requires
 82 | 		  that B overlap 90% of A and A _also_ overlaps 90% of B.
 83 | 
 84 | 	-e	Require that the minimum fraction be satisfied for A OR B.
 85 | 		- In other words, if -e is used with -f 0.90 and -F 0.10 this requires
 86 | 		  that either 90% of A is covered OR 10% of  B is covered.
 87 | 		  Without -e, both fractions would have to be satisfied.
 88 | 
 89 | 	-split	Treat "split" BAM or BED12 entries as distinct BED intervals.
 90 | 
 91 | 	-g	Provide a genome file to enforce consistent chromosome sort order
 92 | 		across input files. Only applies when used with -sorted option.
 93 | 
 94 | 	-nonamecheck	For sorted data, don't throw an error if the file has different naming conventions
 95 | 			for the same chromosome. ex. "chr1" vs "chr01".
 96 | 
 97 | 	-names	When using multiple databases, provide an alias for each that
 98 | 		will appear instead of a fileId when also printing the DB record.
 99 | 
100 | 	-filenames	When using multiple databases, show each complete filename
101 | 			instead of a fileId when also printing the DB record.
102 | 
103 | 	-sortout	When using multiple databases, sort the output DB hits
104 | 			for each record.
105 | 
106 | 	-bed	If using BAM input, write output as BED.
107 | 
108 | 	-header	Print the header from the A file prior to results.
109 | 
110 | 	-nobuf	Disable buffered output. Using this option will cause each line
111 | 		of output to be printed as it is generated, rather than saved
112 | 		in a buffer. This will make printing large output files 
113 | 		noticeably slower, but can be useful in conjunction with
114 | 		other software tools and scripts that need to process one
115 | 		line of bedtools output at a time.
116 | 
117 | 	-iobuf	Specify amount of memory to use for input buffer.
118 | 		Takes an integer argument. Optional suffixes K/M/G supported.
119 | 		Note: currently has no effect with compressed files.
120 | 
121 | Notes: 
122 | 	Reports "none" for chrom and "-1" for all other fields when a feature
123 | 	is not found in B on the same chromosome as the feature in A.
124 | 	E.g. none	-1	-1
125 | 
126 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_coverage.txt:
--------------------------------------------------------------------------------
 1 | Tool:    bedtools coverage (aka coverageBed)
 2 | Version: v2.26.0
 3 | Summary: Returns the depth and breadth of coverage of features from B
 4 | 	 on the intervals in A.
 5 | 
 6 | Usage:   bedtools coverage [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>
 7 | 
 8 | Options: 
 9 | 	-hist	Report a histogram of coverage for each feature in A
10 | 		as well as a summary histogram for _all_ features in A.
11 | 
12 | 		Output (tab delimited) after each feature in A:
13 | 		  1) depth
14 | 		  2) # bases at depth
15 | 		  3) size of A
16 | 		  4) % of A at depth
17 | 
18 | 	-d	Report the depth at each position in each A feature.
19 | 		Positions reported are one based.  Each position
20 | 		and depth follow the complete A feature.
21 | 
22 | 	-counts	Only report the count of overlaps, don't compute fraction, etc.
23 | 
24 | 	-mean	Report the mean depth of all positions in each A feature.
25 | 
26 | 	-s	Require same strandedness.  That is, only report hits in B
27 | 		that overlap A on the _same_ strand.
28 | 		- By default, overlaps are reported without respect to strand.
29 | 
30 | 	-S	Require different strandedness.  That is, only report hits in B
31 | 		that overlap A on the _opposite_ strand.
32 | 		- By default, overlaps are reported without respect to strand.
33 | 
34 | 	-f	Minimum overlap required as a fraction of A.
35 | 		- Default is 1E-9 (i.e., 1bp).
36 | 		- FLOAT (e.g. 0.50)
37 | 
38 | 	-F	Minimum overlap required as a fraction of B.
39 | 		- Default is 1E-9 (i.e., 1bp).
40 | 		- FLOAT (e.g. 0.50)
41 | 
42 | 	-r	Require that the fraction overlap be reciprocal for A AND B.
43 | 		- In other words, if -f is 0.90 and -r is used, this requires
44 | 		  that B overlap 90% of A and A _also_ overlaps 90% of B.
45 | 
46 | 	-e	Require that the minimum fraction be satisfied for A OR B.
47 | 		- In other words, if -e is used with -f 0.90 and -F 0.10 this requires
48 | 		  that either 90% of A is covered OR 10% of  B is covered.
49 | 		  Without -e, both fractions would have to be satisfied.
50 | 
51 | 	-split	Treat "split" BAM or BED12 entries as distinct BED intervals.
52 | 
53 | 	-g	Provide a genome file to enforce consistent chromosome sort order
54 | 		across input files. Only applies when used with -sorted option.
55 | 
56 | 	-nonamecheck	For sorted data, don't throw an error if the file has different naming conventions
57 | 			for the same chromosome. ex. "chr1" vs "chr01".
58 | 
59 | 	-sorted	Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input.
60 | 
61 | 	-bed	If using BAM input, write output as BED.
62 | 
63 | 	-header	Print the header from the A file prior to results.
64 | 
65 | 	-nobuf	Disable buffered output. Using this option will cause each line
66 | 		of output to be printed as it is generated, rather than saved
67 | 		in a buffer. This will make printing large output files 
68 | 		noticeably slower, but can be useful in conjunction with
69 | 		other software tools and scripts that need to process one
70 | 		line of bedtools output at a time.
71 | 
72 | 	-iobuf	Specify amount of memory to use for input buffer.
73 | 		Takes an integer argument. Optional suffixes K/M/G supported.
74 | 		Note: currently has no effect with compressed files.
75 | 
76 | Default Output:  
77 | 	 After each entry in A, reports: 
78 | 	   1) The number of features in B that overlapped the A interval.
79 | 	   2) The number of bases in A that had non-zero coverage.
80 | 	   3) The length of the entry in A.
81 | 	   4) The fraction of bases in A that had non-zero coverage.
82 | 
83 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_multiinter.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Tool:    bedtools multiinter (aka multiIntersectBed)
 3 | Version: v2.26.0
 4 | Summary: Identifies common intervals among multiple
 5 | 	 BED/GFF/VCF files.
 6 | 
 7 | Usage:   bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn
 8 | 	 Requires that each interval file is sorted by chrom/start. 
 9 | 
10 | Options: 
11 | 	-cluster	Invoke Ryan Layers's clustering algorithm.
12 | 
13 | 	-header		Print a header line.
14 | 			(chrom/start/end + names of each file).
15 | 
16 | 	-names		A list of names (one/file) to describe each file in -i.
17 | 			These names will be printed in the header line.
18 | 
19 | 	-g		Use genome file to calculate empty regions.
20 | 			- STRING.
21 | 
22 | 	-empty		Report empty regions (i.e., start/end intervals w/o
23 | 			values in all files).
24 | 			- Requires the '-g FILE' parameter.
25 | 
26 | 	-filler TEXT	Use TEXT when representing intervals having no value.
27 | 			- Default is '0', but you can use 'N/A' or any text.
28 | 
29 | 	-examples	Show detailed usage examples.
30 | 
31 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_random.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | *****
 3 | *****ERROR: Need a genome (-g) file. 
 4 | *****
 5 | 
 6 | Tool:    bedtools random (aka randomBed)
 7 | Version: v2.26.0
 8 | Summary: Generate random intervals among a genome.
 9 | 
10 | Usage:   bedtools random [OPTIONS] -g <genome>
11 | 
12 | Options: 
13 | 	-l	The length of the intervals to generate.
14 | 		- Default = 100.
15 | 		- (INTEGER)
16 | 
17 | 	-n	The number of intervals to generate.
18 | 		- Default = 1,000,000.
19 | 		- (INTEGER)
20 | 
21 | 	-seed	Supply an integer seed for the shuffling.
22 | 		- By default, the seed is chosen automatically.
23 | 		- (INTEGER)
24 | 
25 | Notes: 
26 | 	(1)  The genome file should tab delimited and structured as follows:
27 | 	     <chromName><TAB><chromSize>
28 | 
29 | 	For example, Human (hg19):
30 | 	chr1	249250621
31 | 	chr2	243199373
32 | 	...
33 | 	chr18_gl000207_random	4262
34 | 
35 | Tips: 
36 | 	One can use the UCSC Genome Browser's MySQL database to extract
37 | 	chromosome sizes. For example, H. sapiens:
38 | 
39 | 	mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \
40 | 	"select chrom, size from hg19.chromInfo"  > hg19.genome
41 | 
42 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_spacing.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Tool:    bedtools spacing
 3 | Version: v2.26.0
 4 | Summary: Report (last col.) the gap lengths between intervals in a file.
 5 | 
 6 | Usage:   bedtools spacing [OPTIONS] -i <bed/gff/vcf/bam>
 7 | 
 8 | Notes: 
 9 | 	(1)  Input must be sorted by chrom,start (sort -k1,1 -k2,2n for BED).
10 | 	(2)  The 1st element for each chrom will have NULL distance. (".").
11 | 	(3)  Distance for overlapping intervaks is -1 and bookended is 0.
12 | 
13 | Example: 
14 | 	$ cat test.bed 
15 | 	chr1    0   10 
16 | 	chr1    10  20 
17 | 	chr1    21  30 
18 | 	chr1    35  45 
19 | 	chr1    100 200 
20 | 
21 | 	$ bedtools spacing -i test.bed 
22 | 	chr1    0   10  . 
23 | 	chr1    10  20  0 
24 | 	chr1    21  30  1 
25 | 	chr1    35  45  5 
26 | 	chr1    100 200 55 
27 | 
28 | 	-bed	If using BAM input, write output as BED.
29 | 
30 | 	-header	Print the header from the A file prior to results.
31 | 
32 | 	-nobuf	Disable buffered output. Using this option will cause each line
33 | 		of output to be printed as it is generated, rather than saved
34 | 		in a buffer. This will make printing large output files 
35 | 		noticeably slower, but can be useful in conjunction with
36 | 		other software tools and scripts that need to process one
37 | 		line of bedtools output at a time.
38 | 
39 | 	-iobuf	Specify amount of memory to use for input buffer.
40 | 		Takes an integer argument. Optional suffixes K/M/G supported.
41 | 		Note: currently has no effect with compressed files.
42 | 
43 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_subtract.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Tool:    bedtools subtract (aka subtractBed)
 3 | Version: v2.26.0
 4 | Summary: Removes the portion(s) of an interval that is overlapped
 5 | 	 by another feature(s).
 6 | 
 7 | Usage:   bedtools subtract [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>
 8 | 
 9 | Options: 
10 | 	-A	Remove entire feature if any overlap.  That is, by default,
11 | 		only subtract the portion of A that overlaps B. Here, if
12 | 		any overlap is found (or -f amount), the entire feature is removed.
13 | 
14 | 	-N	Same as -A except when used with -f, the amount is the sum
15 | 		of all features (not any single feature).
16 | 
17 | 	-wb	Write the original entry in B for each overlap.
18 | 		- Useful for knowing _what_ A overlaps. Restricted by -f and -r.
19 | 
20 | 	-wo	Write the original A and B entries plus the number of base
21 | 		pairs of overlap between the two features.
22 | 		- Overlaps restricted by -f and -r.
23 | 		  Only A features with overlap are reported.
24 | 
25 | 	-s	Require same strandedness.  That is, only report hits in B
26 | 		that overlap A on the _same_ strand.
27 | 		- By default, overlaps are reported without respect to strand.
28 | 
29 | 	-S	Require different strandedness.  That is, only report hits in B
30 | 		that overlap A on the _opposite_ strand.
31 | 		- By default, overlaps are reported without respect to strand.
32 | 
33 | 	-f	Minimum overlap required as a fraction of A.
34 | 		- Default is 1E-9 (i.e., 1bp).
35 | 		- FLOAT (e.g. 0.50)
36 | 
37 | 	-F	Minimum overlap required as a fraction of B.
38 | 		- Default is 1E-9 (i.e., 1bp).
39 | 		- FLOAT (e.g. 0.50)
40 | 
41 | 	-r	Require that the fraction overlap be reciprocal for A AND B.
42 | 		- In other words, if -f is 0.90 and -r is used, this requires
43 | 		  that B overlap 90% of A and A _also_ overlaps 90% of B.
44 | 
45 | 	-e	Require that the minimum fraction be satisfied for A OR B.
46 | 		- In other words, if -e is used with -f 0.90 and -F 0.10 this requires
47 | 		  that either 90% of A is covered OR 10% of  B is covered.
48 | 		  Without -e, both fractions would have to be satisfied.
49 | 
50 | 	-split	Treat "split" BAM or BED12 entries as distinct BED intervals.
51 | 
52 | 	-g	Provide a genome file to enforce consistent chromosome sort order
53 | 		across input files. Only applies when used with -sorted option.
54 | 
55 | 	-nonamecheck	For sorted data, don't throw an error if the file has different naming conventions
56 | 			for the same chromosome. ex. "chr1" vs "chr01".
57 | 
58 | 	-sorted	Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input.
59 | 
60 | 	-bed	If using BAM input, write output as BED.
61 | 
62 | 	-header	Print the header from the A file prior to results.
63 | 
64 | 	-nobuf	Disable buffered output. Using this option will cause each line
65 | 		of output to be printed as it is generated, rather than saved
66 | 		in a buffer. This will make printing large output files 
67 | 		noticeably slower, but can be useful in conjunction with
68 | 		other software tools and scripts that need to process one
69 | 		line of bedtools output at a time.
70 | 
71 | 	-iobuf	Specify amount of memory to use for input buffer.
72 | 		Takes an integer argument. Optional suffixes K/M/G supported.
73 | 		Note: currently has no effect with compressed files.
74 | 
75 | 


--------------------------------------------------------------------------------
/test/test_data/bedtools_window.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Tool:    bedtools window (aka windowBed)
 3 | Version: v2.26.0
 4 | Summary: Examines a "window" around each feature in A and
 5 | 	 reports all features in B that overlap the window. For each
 6 | 	 overlap the entire entry in A and B are reported.
 7 | 
 8 | Usage:   bedtools window [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>
 9 | 
10 | Options: 
11 | 	-abam	The A input file is in BAM format.  Output will be BAM as well. Replaces -a.
12 | 
13 | 	-ubam	Write uncompressed BAM output. Default writes compressed BAM.
14 | 
15 | 	-bed	When using BAM input (-abam), write output as BED. The default
16 | 		is to write output in BAM when using -abam.
17 | 
18 | 	-w	Base pairs added upstream and downstream of each entry
19 | 		in A when searching for overlaps in B.
20 | 		- Creates symterical "windows" around A.
21 | 		- Default is 1000 bp.
22 | 		- (INTEGER)
23 | 
24 | 	-l	Base pairs added upstream (left of) of each entry
25 | 		in A when searching for overlaps in B.
26 | 		- Allows one to define assymterical "windows".
27 | 		- Default is 1000 bp.
28 | 		- (INTEGER)
29 | 
30 | 	-r	Base pairs added downstream (right of) of each entry
31 | 		in A when searching for overlaps in B.
32 | 		- Allows one to define assymterical "windows".
33 | 		- Default is 1000 bp.
34 | 		- (INTEGER)
35 | 
36 | 	-sw	Define -l and -r based on strand.  For example if used, -l 500
37 | 		for a negative-stranded feature will add 500 bp downstream.
38 | 		- Default = disabled.
39 | 
40 | 	-sm	Only report hits in B that overlap A on the _same_ strand.
41 | 		- By default, overlaps are reported without respect to strand.
42 | 
43 | 	-Sm	Only report hits in B that overlap A on the _opposite_ strand.
44 | 		- By default, overlaps are reported without respect to strand.
45 | 
46 | 	-u	Write the original A entry _once_ if _any_ overlaps found in B.
47 | 		- In other words, just report the fact >=1 hit was found.
48 | 
49 | 	-c	For each entry in A, report the number of overlaps with B.
50 | 		- Reports 0 for A entries that have no overlap with B.
51 | 		- Overlaps restricted by -f.
52 | 
53 | 	-v	Only report those entries in A that have _no overlaps_ with B.
54 | 		- Similar to "grep -v."
55 | 
56 | 	-header	Print the header from the A file prior to results.
57 | 
58 | 


--------------------------------------------------------------------------------
/test/test_data/bowtie2_build.txt:
--------------------------------------------------------------------------------
 1 | Bowtie 2 version 2.3.5.1 by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea)
 2 | Usage: bowtie2-build [options]* <reference_in> <bt2_index_base>
 3 |     reference_in            comma-separated list of files with ref sequences
 4 |     bt2_index_base          write bt2 data to files with this dir/basename
 5 | *** Bowtie 2 indexes work only with v2 (not v1).  Likewise for v1 indexes. ***
 6 | Options:
 7 |     -f                      reference files are Fasta (default)
 8 |     -c                      reference sequences given on cmd line (as
 9 |                             <reference_in>)
10 |     --large-index           force generated index to be 'large', even if ref
11 |                             has fewer than 4 billion nucleotides
12 |     --debug                 use the debug binary; slower, assertions enabled
13 |     --sanitized             use sanitized binary; slower, uses ASan and/or UBSan
14 |     --verbose               log the issued command
15 |     -a/--noauto             disable automatic -p/--bmax/--dcv memory-fitting
16 |     -p/--packed             use packed strings internally; slower, less memory
17 |     --bmax <int>            max bucket sz for blockwise suffix-array builder
18 |     --bmaxdivn <int>        max bucket sz as divisor of ref len (default: 4)
19 |     --dcv <int>             diff-cover period for blockwise (default: 1024)
20 |     --nodc                  disable diff-cover (algorithm becomes quadratic)
21 |     -r/--noref              don't build .3/.4 index files
22 |     -3/--justref            just build .3/.4 index files
23 |     -o/--offrate <int>      SA is sampled every 2^<int> BWT chars (default: 5)
24 |     -t/--ftabchars <int>    # of chars consumed in initial lookup (default: 10)
25 |     --threads <int>         # of threads
26 |     --seed <int>            seed for random number generator
27 |     -q/--quiet              verbose output (for debugging)
28 |     -h/--help               print detailed description of tool and its options
29 |     --usage                 print this usage message
30 |     --version               print version information and quit
31 | 


--------------------------------------------------------------------------------
/test/test_data/bwa.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Program: bwa (alignment via Burrows-Wheeler transformation)
 3 | Version: 0.7.17-r1188
 4 | Contact: Heng Li <lh3@sanger.ac.uk>
 5 | 
 6 | Usage:   bwa <command> [options]
 7 | 
 8 | Command: index         index sequences in the FASTA format
 9 |          mem           BWA-MEM algorithm
10 |          fastmap       identify super-maximal exact matches
11 |          pemerge       merge overlapping paired ends (EXPERIMENTAL)
12 |          aln           gapped/ungapped alignment
13 |          samse         generate alignment (single ended)
14 |          sampe         generate alignment (paired ended)
15 |          bwasw         BWA-SW for long queries
16 | 
17 |          shm           manage indices in shared memory
18 |          fa2pac        convert FASTA to PAC format
19 |          pac2bwt       generate BWT from PAC
20 |          pac2bwtgen    alternative algorithm for generating BWT
21 |          bwtupdate     update .bwt to the new format
22 |          bwt2sa        generate SA from BWT and Occ
23 | 
24 | Note: To use BWA, you need to first index the genome with `bwa index'.
25 |       There are three alignment algorithms in BWA: `mem', `bwasw', and
26 |       `aln/samse/sampe'. If you are not sure which to use, try `bwa mem'
27 |       first. Please `man ./bwa.1' for the manual.
28 | 
29 | 


--------------------------------------------------------------------------------
/test/test_data/bwa_bwt2sa.txt:
--------------------------------------------------------------------------------
1 | Usage: bwa bwt2sa [-i 32] <in.bwt> <out.sa>
2 | 


--------------------------------------------------------------------------------
/test/test_data/bwa_bwtupdate.txt:
--------------------------------------------------------------------------------
1 | Usage: bwa bwtupdate <the.bwt>
2 | 


--------------------------------------------------------------------------------
/test/test_data/bwa_index.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Usage:   bwa index [options] <in.fasta>
 3 | 
 4 | Options: -a STR    BWT construction algorithm: bwtsw, is or rb2 [auto]
 5 |          -p STR    prefix of the index [same as fasta name]
 6 |          -b INT    block size for the bwtsw algorithm (effective with -a bwtsw) [10000000]
 7 |          -6        index files named as <in.fasta>.64.* instead of <in.fasta>.* 
 8 | 
 9 | Warning: `-a bwtsw' does not work for short genomes, while `-a is' and
10 |          `-a div' do not work not for long genomes.
11 | 
12 | 


--------------------------------------------------------------------------------
/test/test_data/bwa_mem.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]
 3 | 
 4 | Algorithm options:
 5 | 
 6 |        -t INT        number of threads [1]
 7 |        -k INT        minimum seed length [19]
 8 |        -w INT        band width for banded alignment [100]
 9 |        -d INT        off-diagonal X-dropoff [100]
10 |        -r FLOAT      look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
11 |        -y INT        seed occurrence for the 3rd round seeding [20]
12 |        -c INT        skip seeds with more than INT occurrences [500]
13 |        -D FLOAT      drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
14 |        -W INT        discard a chain if seeded bases shorter than INT [0]
15 |        -m INT        perform at most INT rounds of mate rescues for each read [50]
16 |        -S            skip mate rescue
17 |        -P            skip pairing; mate rescue performed unless -S also in use
18 | 
19 | Scoring options:
20 | 
21 |        -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [1]
22 |        -B INT        penalty for a mismatch [4]
23 |        -O INT[,INT]  gap open penalties for deletions and insertions [6,6]
24 |        -E INT[,INT]  gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
25 |        -L INT[,INT]  penalty for 5'- and 3'-end clipping [5,5]
26 |        -U INT        penalty for an unpaired read pair [17]
27 | 
28 |        -x STR        read type. Setting -x changes multiple parameters unless overridden [null]
29 |                      pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0  (PacBio reads to ref)
30 |                      ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0  (Oxford Nanopore 2D-reads to ref)
31 |                      intractg: -B9 -O16 -L5  (intra-species contigs to ref)
32 | 
33 | Input/output options:
34 | 
35 |        -p            smart pairing (ignoring in2.fq)
36 |        -R STR        read group header line such as '@RG\tID:foo\tSM:bar' [null]
37 |        -H STR/FILE   insert STR to header if it starts with @; or insert lines in FILE [null]
38 |        -o FILE       sam file to output results to [stdout]
39 |        -j            treat ALT contigs as part of the primary assembly (i.e. ignore <idxbase>.alt file)
40 |        -5            for split alignment, take the alignment with the smallest coordinate as primary
41 |        -q            don't modify mapQ of supplementary alignments
42 |        -K INT        process INT input bases in each batch regardless of nThreads (for reproducibility) []
43 | 
44 |        -v INT        verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]
45 |        -T INT        minimum score to output [30]
46 |        -h INT[,INT]  if there are <INT hits with score >80% of the max score, output all in XA [5,200]
47 |        -a            output all alignments for SE or unpaired PE
48 |        -C            append FASTA/FASTQ comment to SAM output
49 |        -V            output the reference FASTA header in the XR tag
50 |        -Y            use soft clipping for supplementary alignments
51 |        -M            mark shorter split hits as secondary
52 | 
53 |        -I FLOAT[,FLOAT[,INT[,INT]]]
54 |                      specify the mean, standard deviation (10% of the mean if absent), max
55 |                      (4 sigma from the mean if absent) and min of the insert size distribution.
56 |                      FR orientation only. [inferred]
57 | 
58 | Note: Please read the man page for detailed description of the command line and options.
59 | 
60 | 


--------------------------------------------------------------------------------
/test/test_data/dinosaur.txt:
--------------------------------------------------------------------------------
 1 | usage:
 2 | > java -jar Dinosaur-1.1.3.jar [OPTIONS] mzML
 3 | OPTIONS:
 4 |               PARAMETER DEFAULT         	DESCRIPTION
 5 |                 advHelp false           	set to output adv param file help and quit
 6 |               advParams                 	path to adv param file
 7 |             concurrency 2               	the number of assays to analyze in parallel
 8 |                   force false           	ignore missing mzML params
 9 |               maxCharge 6               	max searched ion charge
10 |               minCharge 1               	min searched ion charge
11 |                    mode global          	analysis mode: global or target. Global mode reports all isotope patterns, targeted only those matching targets.
12 |                    mzML -               	The shotgun MzML file to analyze
13 |                 nReport 10              	number of random assay to export control figure for
14 |                  outDir                 	output directory (by default same as input mzML)
15 |                 outName                 	basename for output files (by default same as input mzML)
16 |               profiling false           	set to enable CPU profiling
17 |     reportDeisoMzHeight 15.0            	mz range in deisotoper reports
18 |           reportHighRes false           	generate high-resolution plot trail when supported (for print)
19 |              reportSeed -1              	seed to use for report assay selection (<0 means random)
20 |           reportTargets false           	set to create a special report figure for each target
21 |        targetPreference rt              	if multiple isotope patterns fit target, take the closest rt apex (rt) or the most intense (intensity)
22 |                 targets                 	path to isotope patterns target file (not used by default)
23 |                 verbose false           	increase details in output
24 |             writeBinary false           	set to output binary MSFeatureProtocol file
25 |              writeHills false           	set to output csv file with all hills assigned to isotope patterns
26 |          writeMsInspect false           	set to output MsInspect feature csv file
27 |            writeQuantML false           	set to output mzQuantML file
28 |             zipQcFolder false           	set to zip the entire qc folder on algorithm completion
29 | 
30 | Not enough arguments!
31 | 


--------------------------------------------------------------------------------
/test/test_data/gth.txt:
--------------------------------------------------------------------------------
  1 | Usage: gth [option ...] -genomic file [...] -cdna file [...] -protein file [...]
  2 | Compute similarity-based gene structure predictions (spliced alignments)
  3 | using cDNA/EST and/or protein sequences and assemble the resulting spliced
  4 | alignments to consensus spliced alignments.
  5 | 
  6 | -genomic          specify input files containing genomic sequences
  7 |                   mandatory option
  8 | -cdna             specify input files containing cDNA/EST sequences
  9 | -protein          specify input files containing protein sequences
 10 | -species          specify species to select splice site model which is most
 11 |                   appropriate; possible species:
 12 |                   "human"
 13 |                   "mouse"
 14 |                   "rat"
 15 |                   "chicken"
 16 |                   "drosophila"
 17 |                   "nematode"
 18 |                   "fission_yeast"
 19 |                   "aspergillus"
 20 |                   "arabidopsis"
 21 |                   "maize"
 22 |                   "rice"
 23 |                   "medicago"
 24 |                   default: undefined
 25 | -bssm             read bssm parameter from file in the path given by the
 26 |                   environment variable BSSMDIR
 27 |                   default: undefined
 28 | -scorematrix      read amino acid substitution scoring matrix from file in the
 29 |                   path given by the environment variable GTHDATADIR
 30 |                   default: BLOSUM62
 31 | -translationtable set the codon translation table used for codon translation in
 32 |                   matching, DP, and output
 33 |                   default: 1
 34 | -f                analyze only forward strand of genomic sequences
 35 |                   default: no
 36 | -r                analyze only reverse strand of genomic sequences
 37 |                   default: no
 38 | -cdnaforward      align only forward strand of cDNAs
 39 |                   default: no
 40 | -frompos          analyze genomic sequence from this position
 41 |                   requires -topos or -width; counting from 1 on
 42 |                   default: 0
 43 | -topos            analyze genomic sequence to this position
 44 |                   requires -frompos; counting from 1 on
 45 |                   default: 0
 46 | -width            analyze only this width of genomic sequence
 47 |                   requires -frompos
 48 |                   default: 0
 49 | -v                be verbose
 50 |                   default: no
 51 | -xmlout           show output in XML format
 52 |                   default: no
 53 | -gff3out          show output in GFF3 format
 54 |                   default: no
 55 | -md5ids           show MD5 fingerprints as sequence IDs
 56 |                   default: no
 57 | -o                redirect output to specified file
 58 |                   default: undefined
 59 | -gzip             write gzip compressed output file
 60 |                   default: no
 61 | -bzip2            write bzip2 compressed output file
 62 |                   default: no
 63 | -force            force writing to output file
 64 |                   default: no
 65 | -gs2out           output in old GeneSeqer2 format
 66 |                   default: no
 67 | -minmatchlen      specify minimum match length (cDNA matching)
 68 |                   default: 20
 69 | -seedlength       specify the seed length (cDNA matching)
 70 |                   default: 18
 71 | -exdrop           specify the Xdrop value for edit distance extension (cDNA
 72 |                   matching)
 73 |                   default: 2
 74 | -prminmatchlen    specify minimum match length (protein matches)
 75 |                   default: 24
 76 | -prseedlength     specify seed length (protein matching)
 77 |                   default: 10
 78 | -prhdist          specify Hamming distance (protein matching)
 79 |                   default: 4
 80 | -gcmaxgapwidth    set the maximum gap width for global chains
 81 |                   defines approximately the maximum intron length
 82 |                   set to 0 to allow for unlimited length
 83 |                   in order to avoid false-positive exons (lonely exons) at the
 84 |                   sequence ends, it is very important to set this parameter
 85 |                   appropriately!
 86 |                   default: 1000000
 87 | -gcmincoverage    set the minimum coverage of global chains regarding to the
 88 |                   reference sequence
 89 |                   default: 50
 90 | -paralogs         compute paralogous genes (different chaining procedure)
 91 |                   default: no
 92 | -introncutout     enable the intron cutout technique
 93 |                   default: no
 94 | -fastdp           use jump table to increase speed of DP calculation
 95 |                   default: no
 96 | -autointroncutout set the automatic intron cutout matrix size in megabytes and
 97 |                   enable the automatic intron cutout technique
 98 |                   default: 0
 99 | -intermediate     stop after calculation of spliced alignments and output
100 |                   results in reusable XML format. Do not process this output
101 |                   yourself, use the ``normal'' XML output instead!
102 |                   default: no
103 | -first            set the maximum number of spliced alignments per genomic DNA
104 |                   input. Set to 0 for unlimited number.
105 |                   default: 0
106 | -help             display help for basic options and exit
107 | -help+            display help for all options and exit
108 | -version          display version information and exit
109 | 
110 | For detailed information, please refer to the manual of GenomeThreader.
111 | Report bugs to <gordon@gremme.org>.
112 | 


--------------------------------------------------------------------------------
/test/test_data/htseq_count.txt:
--------------------------------------------------------------------------------
  1 | usage: htseq-count [options] alignment_file gff_file
  2 | 
  3 | This script takes one or more alignment files in SAM/BAM format and a feature
  4 | file in GFF format and calculates for each feature the number of reads mapping
  5 | to it. See http://htseq.readthedocs.io/en/master/count.html for details.
  6 | 
  7 | positional arguments:
  8 |   samfilenames          Path to the SAM/BAM files containing the mapped reads.
  9 |                         If '-' is selected, read from standard input
 10 |   featuresfilename      Path to the GTF file containing the features
 11 | 
 12 | optional arguments:
 13 |   -h, --help            show this help message and exit
 14 |   -f {sam,bam,auto}, --format {sam,bam,auto}
 15 |                         Type of <alignment_file> data. DEPRECATED: file format
 16 |                         is detected automatically. This option is ignored.
 17 |   -r {pos,name}, --order {pos,name}
 18 |                         'pos' or 'name'. Sorting order of <alignment_file>
 19 |                         (default: name). Paired-end sequencing data must be
 20 |                         sorted either by position or by read name, and the
 21 |                         sorting order must be specified. Ignored for single-
 22 |                         end data.
 23 |   --max-reads-in-buffer MAX_BUFFER_SIZE
 24 |                         When <alignment_file> is paired end sorted by
 25 |                         position, allow only so many reads to stay in memory
 26 |                         until the mates are found (raising this number will
 27 |                         use more memory). Has no effect for single end or
 28 |                         paired end sorted by name
 29 |   -s {yes,no,reverse}, --stranded {yes,no,reverse}
 30 |                         Whether the data is from a strand-specific assay.
 31 |                         Specify 'yes', 'no', or 'reverse' (default: yes).
 32 |                         'reverse' means 'yes' with reversed strand
 33 |                         interpretation
 34 |   -a MINAQUAL, --minaqual MINAQUAL
 35 |                         Skip all reads with MAPQ alignment quality lower than
 36 |                         the given minimum value (default: 10). MAPQ is the 5th
 37 |                         column of a SAM/BAM file and its usage depends on the
 38 |                         software used to map the reads.
 39 |   -t FEATURETYPE, --type FEATURETYPE
 40 |                         Feature type (3rd column in GTF file) to be used, all
 41 |                         features of other type are ignored (default, suitable
 42 |                         for Ensembl GTF files: exon)
 43 |   -i IDATTR, --idattr IDATTR
 44 |                         GTF attribute to be used as feature ID (default,
 45 |                         suitable for Ensembl GTF files: gene_id). All feature
 46 |                         of the right type (see -t option) within the same GTF
 47 |                         attribute will be added together. The typical way of
 48 |                         using this option is to count all exonic reads from
 49 |                         each gene and add the exons but other uses are
 50 |                         possible as well.
 51 |   --additional-attr ADDITIONAL_ATTR
 52 |                         Additional feature attributes (default: none, suitable
 53 |                         for Ensembl GTF files: gene_name). Use multiple times
 54 |                         for more than one additional attribute. These
 55 |                         attributes are only used as annotations in the output,
 56 |                         while the determination of how the counts are added
 57 |                         together is done based on option -i.
 58 |   -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty}
 59 |                         Mode to handle reads overlapping more than one feature
 60 |                         (choices: union, intersection-strict, intersection-
 61 |                         nonempty; default: union)
 62 |   --nonunique {none,all,fraction,random}
 63 |                         Whether and how to score reads that are not uniquely
 64 |                         aligned or ambiguously assigned to features (choices:
 65 |                         none, all, fraction, random; default: none)
 66 |   --secondary-alignments {score,ignore}
 67 |                         Whether to score secondary alignments (0x100 flag)
 68 |   --supplementary-alignments {score,ignore}
 69 |                         Whether to score supplementary alignments (0x800 flag)
 70 |   -o SAMOUTS, --samout SAMOUTS
 71 |                         Write out all SAM alignment records into SAM/BAM files
 72 |                         (one per input file needed), annotating each line with
 73 |                         its feature assignment (as an optional field with tag
 74 |                         'XF'). See the -p option to use BAM instead of SAM.
 75 |   -p {SAM,BAM,sam,bam}, --samout-format {SAM,BAM,sam,bam}
 76 |                         Format to use with the --samout option.
 77 |   -d OUTPUT_DELIMITER, --delimiter OUTPUT_DELIMITER
 78 |                         Column delimiter in output (default: TAB).
 79 |   -c OUTPUT_FILENAME, --counts_output OUTPUT_FILENAME
 80 |                         Filename to output the counts to instead of stdout.
 81 |   --append-output       Append counts output. This option is useful if you
 82 |                         have already creates a TSV/CSV/similar file with a
 83 |                         header for your samples (with additional columns for
 84 |                         the feature name and any additionl attributes) and
 85 |                         want to fill in the rest of the file.
 86 |   -n NPROCESSES, --nprocesses NPROCESSES
 87 |                         Number of parallel CPU processes to use (default: 1).
 88 |   --feature-query FEATURE_QUERY
 89 |                         Restrict to features descibed in this expression.
 90 |                         Currently supports a single kind of expression:
 91 |                         attribute == "one attr" to restrict the GFF to a
 92 |                         single gene or transcript, e.g. --feature-query
 93 |                         'gene_name == "ACTB"' - notice the single quotes
 94 |                         around the argument of this option and the double
 95 |                         quotes around the gene name. Broader queries might
 96 |                         become available in the future.
 97 |   -q, --quiet           Suppress progress report
 98 |   --version             Show software version and exit
 99 | 
100 | Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology
101 | Laboratory (EMBL) and Fabio Zanini (fabio.zanini@unsw.edu.au), UNSW Sydney.
102 | (c) 2010-2020. Released under the terms of the GNU General Public License v3.
103 | Part of the 'HTSeq' framework, version 0.12.4.
104 | 


--------------------------------------------------------------------------------
/test/test_data/mauve.txt:
--------------------------------------------------------------------------------
 1 | Usage:
 2 | mauveAligner [options] <seq1 filename> <sml1 filename> ...  <seqN filename> <smlN filename>
 3 | Options:
 4 | 	    --output=<file> Output file name.  Prints to screen by default
 5 | 	    --mums Find MUMs only, do not attempt to determine locally collinear blocks (LCBs)
 6 | 	    --no-recursion Don't perform recursive anchor identification (implies --no-gapped-alignment)
 7 | 	    --no-lcb-extension If determining LCBs, don't attempt to extend the LCBs
 8 | 	    --seed-size=<number> Initial seed match size, default is log_2( average seq. length )
 9 | 	    --max-extension-iterations=<number> Limit LCB extensions to this number of attempts, default is 4
10 | 	    --eliminate-inclusions Eliminate linked inclusions in subset matches.
11 | 	    --weight=<number> Minimum LCB weight in base pairs per sequence
12 | 	    --match-input=<file> Use specified match file instead of searching for matches
13 | 	    --lcb-match-input  Indicates that the match input file contains matches that have been clustered into LCBs
14 | 	    --lcb-input=<file> Use specified lcb file instead of constructing LCBs (skips LCB generation)
15 | 	    --scratch-path=<path>  For large genomes, use a directory for storage of temporary data.  Should be given two or more times to with different paths.
16 | 	    --id-matrix=<file> Generate LCB stats and write them to the specified file
17 | 	    --island-size=<number> Find islands larger than the given number
18 | 	    --island-output=<file> Output islands the given file (requires --island-size)
19 | 	    --backbone-size=<number> Find stretches of backbone longer than the given number of b.p.
20 | 	    --max-backbone-gap=<number> Allow backbone to be interrupted by gaps up to this length in b.p.
21 | 	    --backbone-output=<file> Output islands the given file (requires --island-size)
22 | 	    --coverage-output=<file> Output a coverage list to the specified file (- for stdout)
23 | 	    --repeats Generates a repeat map.  Only one sequence can be specified
24 | 	    --output-guide-tree=<file> Write out a guide tree to the designated file
25 | 	    --collinear Assume that input sequences are collinear--they have no rearrangements
26 | 
27 | Gapped alignment controls:
28 | 	    --no-gapped-alignment Don't perform a gapped alignment
29 | 	    --max-gapped-aligner-length=<number> Maximum number of base pairs to attempt aligning with the gapped aligner
30 | 	    --min-recursive-gap-length=<number> Minimum size of gaps that Mauve will perform recursive MUM anchoring on (Default is 200)
31 | 
32 | Signed permutation matrix options:
33 | 	    --permutation-matrix-output=<file> Write out the LCBs as a signed permutation matrix to the given file
34 | 	    --permutation-matrix-min-weight=<number> A permutation matrix will be written for every set of LCBs with weight between this value and the value of --weight
35 | 
36 | Alignment output options:
37 | 	    --alignment-output-dir=<directory> Outputs a set of alignment files (one per LCB) to a given directory
38 | 	    --alignment-output-format=<directory> Selects the output format for --alignment-output-dir
39 | 	    --output-alignment=<file> Write out an XMFA format alignment to the designated file
40 | 
41 | Supported alignment output formats are: phylip, clustal, msf, nexus, mega, codon
42 | 
43 | 


--------------------------------------------------------------------------------
/test/test_data/podchecker.txt:
--------------------------------------------------------------------------------
 1 | Usage:
 2 |     podchecker [-help] [-man] [-(no)warnings] [file ...]
 3 | 
 4 | Options and Arguments:
 5 |     -help   Print a brief help message and exit.
 6 | 
 7 |     -man    Print the manual page and exit.
 8 | 
 9 |     -warnings -nowarnings
10 |             Turn on/off printing of warnings. Repeating -warnings increases
11 |             the warning level, i.e. more warnings are printed. Currently
12 |             increasing to level two causes flagging of unescaped "<,>"
13 |             characters.
14 | 
15 |     file    The pathname of a POD file to syntax-check (defaults to standard
16 |             input).
17 | 


--------------------------------------------------------------------------------
/test/test_data/samtools.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Program: samtools (Tools for alignments in the SAM format)
 3 | Version: 1.9 (using htslib 1.9)
 4 | 
 5 | Usage:   samtools <command> [options]
 6 | 
 7 | Commands:
 8 |   -- Indexing
 9 |      dict           create a sequence dictionary file
10 |      faidx          index/extract FASTA
11 |      fqidx          index/extract FASTQ
12 |      index          index alignment
13 | 
14 |   -- Editing
15 |      calmd          recalculate MD/NM tags and '=' bases
16 |      fixmate        fix mate information
17 |      reheader       replace BAM header
18 |      targetcut      cut fosmid regions (for fosmid pool only)
19 |      addreplacerg   adds or replaces RG tags
20 |      markdup        mark duplicates
21 | 
22 |   -- File operations
23 |      collate        shuffle and group alignments by name
24 |      cat            concatenate BAMs
25 |      merge          merge sorted alignments
26 |      mpileup        multi-way pileup
27 |      sort           sort alignment file
28 |      split          splits a file by read group
29 |      quickcheck     quickly check if SAM/BAM/CRAM file appears intact
30 |      fastq          converts a BAM to a FASTQ
31 |      fasta          converts a BAM to a FASTA
32 | 
33 |   -- Statistics
34 |      bedcov         read depth per BED region
35 |      depth          compute the depth
36 |      flagstat       simple stats
37 |      idxstats       BAM index stats
38 |      phase          phase heterozygotes
39 |      stats          generate stats (former bamcheck)
40 | 
41 |   -- Viewing
42 |      flags          explain BAM flags
43 |      tview          text alignment viewer
44 |      view           SAM<->BAM<->CRAM conversion
45 |      depad          convert padded BAM to unpadded BAM
46 | 
47 | 


--------------------------------------------------------------------------------
/test/test_data/samtools_bedcov.txt:
--------------------------------------------------------------------------------
 1 | Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]
 2 | 
 3 | Options:
 4 |       -Q <int>            mapping quality threshold [0]
 5 |       -j                  do not include deletions (D) and ref skips (N) in bedcov computation
 6 |       --input-fmt-option OPT[=VAL]
 7 |                Specify a single input file format option in the form
 8 |                of OPTION or OPTION=VALUE
 9 |       --reference FILE
10 |                Reference sequence FASTA FILE [null]
11 | 


--------------------------------------------------------------------------------
/test/test_data/samtools_dict.txt:
--------------------------------------------------------------------------------
 1 | About:   Create a sequence dictionary file from a fasta file
 2 | Usage:   samtools dict [options] <file.fa|file.fa.gz>
 3 | 
 4 | Options: -a, --assembly STR    assembly
 5 |          -H, --no-header       do not print @HD line
 6 |          -o, --output STR      file to write out dict file [stdout]
 7 |          -s, --species STR     species
 8 |          -u, --uri STR         URI [file:///abs/path/to/file.fa]
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/test_data/samtools_pl.txt:
--------------------------------------------------------------------------------
 1 | Program: samtools.pl (helper script for SAMtools)
 2 | Version: 0.3.3
 3 | Contact: Heng Li <lh3@sanger.ac.uk>
 4 | 
 5 | Usage:   samtools.pl <command> [<arguments>]
 6 | 
 7 | Command: varFilter     filtering SNPs and short indels
 8 |          pileup2fq     generate fastq from `pileup -c'
 9 |          showALEN      print alignment length (ALEN) following CIGAR
10 | 
11 | 


--------------------------------------------------------------------------------
/test/test_data/samtools_quickcheck.txt:
--------------------------------------------------------------------------------
 1 | Usage: samtools quickcheck [options] <input> [...]
 2 | Options:
 3 |   -v              verbose output (repeat for more verbosity)
 4 |   -q              suppress warning messages
 5 | 
 6 | Notes:
 7 | 
 8 | 1. By default quickcheck will emit a warning message if and only if a file
 9 |    fails the checks, in which case the exit status is non-zero.  Under normal
10 |    behaviour with valid data it will be silent and has a zero exit status.
11 |    The warning messages are purely for manual inspection and should not be 
12 |    parsed by scripts.
13 | 
14 | 2. In order to use this command programmatically, you should check its exit
15 |    status.  One way to use quickcheck might be as a check that all BAM files in
16 |    a directory are okay:
17 | 
18 | 	samtools quickcheck *.bam && echo 'all ok' \
19 | 	   || echo 'fail!'
20 | 
21 |    The first level of verbosity lists only files that fail to stdout.
22 |    To obtain a parsable list of files that have failed, use this option:
23 | 
24 | 	samtools quickcheck -qv *.bam > bad_bams.fofn \
25 | 	   && echo 'all ok' \
26 | 	   || echo 'some files failed check, see bad_bams.fofn'
27 | 


--------------------------------------------------------------------------------
/test/test_explore_e2e.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | from unittest.mock import Mock, patch
 5 | 
 6 | import pytest
 7 | 
 8 | from aclimatise import explore_command
 9 | from aclimatise.model import Command, Positional
10 | 
11 | from .util import (
12 |     HelpText,
13 |     all_ids,
14 |     all_tests,
15 |     convert_validate,
16 |     ensure_conda,
17 |     skip_not_installed,
18 | )
19 | 
20 | 
21 | @pytest.mark.parametrize("test", all_tests, ids=all_ids)
22 | def test_explore(test: HelpText):
23 |     """
24 |     A comprehensive end-to-end test that tests the parser and converters, after exploring a given command
25 |     """
26 |     if not shutil.which(test.cmd[0]):
27 |         pytest.skip("{} is not installed".format(test.cmd[0]))
28 | 
29 |     try:
30 |         ensure_conda()
31 |     except:
32 |         pytest.skip("Not in a conda environment")
33 | 
34 |     # For speed's sake, only explore to depth 2
35 |     command = explore_command(test.cmd, max_depth=1)
36 | 
37 |     # Check we parsed correctly
38 |     test.run_assertions(command, explore=True)
39 | 
40 | 
41 | @skip_not_installed("dinosaur")
42 | @pytest.mark.timeout(360)
43 | def test_explore_dinosaur():
44 |     """
45 |     Python has an issue with killing process trees, whereby the subprocess runs another subprocess.
46 |     This tests that dinosaur
47 |     :return:
48 |     """
49 |     command = explore_command(["dinosaur"], max_depth=1)
50 | 
51 | 
52 | @pytest.mark.skipif(not shutil.which("bwa"), reason="bwa is not installed")
53 | def test_explore_bwa():
54 |     """
55 |     This tests specifically that exploring bwa yields a proper bwa mem
56 |     """
57 |     command = explore_command(["bwa"], max_depth=1)
58 | 
59 |     # Check that we parsed bwa mem correctly
60 |     mem = [cmd for cmd in command.subcommands if cmd.command[1] == "mem"][0]
61 |     assert len(mem.positional) == 3
62 |     assert len(mem.subcommands) == 0
63 |     assert len(mem.named) >= 30
64 | 
65 | 
66 | def test_repeat_positionals():
67 |     """
68 |     Test that, if we have multiple duplicate positionals, only the first is tested
69 |     """
70 |     parent = Command(
71 |         command=[],
72 |         positional=[
73 |             Positional(name="a", description="", position=i) for i in range(10)
74 |         ],
75 |     )
76 |     child = Command(command=[])
77 | 
78 |     count = 0
79 | 
80 |     def mock_convert(*args, **kwargs):
81 |         nonlocal count
82 |         if count == 0:
83 |             count += 1
84 |             return parent
85 |         return child
86 | 
87 |     # with patch("aclimatise.execution.help.CliHelpExecutor.explore", new=lambda *args, **kwargs: child):
88 |     with patch(
89 |         "aclimatise.execution.help.CliHelpExecutor.convert",
90 |         new=Mock(side_effect=mock_convert),
91 |     ) as mocked:
92 |         explore_command([])
93 | 
94 |         # We should only call convert twice, once for the parent and once for the child, since there's only one unique positional
95 |         assert mocked.call_count == 2
96 | 


--------------------------------------------------------------------------------
/test/test_model.py:
--------------------------------------------------------------------------------
 1 | from aclimatise.model import Command
 2 | 
 3 | 
 4 | def test_reanalyse(samtools_cmd: Command):
 5 |     """
 6 |     Test the command.reanalyse() method
 7 |     """
 8 |     reanalysed = samtools_cmd.reanalyse()
 9 |     assert reanalysed.help_text == samtools_cmd.help_text
10 |     assert len(reanalysed.subcommands) == len(samtools_cmd.subcommands)
11 | 
12 |     re_sort = reanalysed["sort"]
13 |     assert len(re_sort.positional) > 0
14 |     assert len(re_sort.named) > 0
15 | 


--------------------------------------------------------------------------------
/test/test_parse_e2e.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | 
 4 | import pytest
 5 | from pkg_resources import resource_filename
 6 | 
 7 | from aclimatise.integration import parse_help
 8 | 
 9 | from .util import (
10 |     HelpText,
11 |     all_ids,
12 |     all_tests,
13 |     all_tests_lookup,
14 |     convert_validate,
15 |     validate_cwl,
16 |     validate_wdl,
17 | )
18 | 
19 | 
20 | @pytest.mark.parametrize("test", all_tests, ids=all_ids)
21 | def test_all(test: HelpText):
22 |     """
23 |     A comprehensive end-to-end test that tests the parser and converters, using the test data files
24 |     """
25 |     with open(resource_filename("test", test.path)) as fp:
26 |         help_text = fp.read()
27 | 
28 |     cmd = parse_help(test.cmd, help_text)
29 | 
30 |     # Check that the help text is included in the command
31 |     assert cmd.help_text == help_text
32 | 
33 |     test.run_assertions(cmd, explore=False)
34 | 
35 | 
36 | @pytest.mark.timeout(20)
37 | def test_long_text():
38 |     """
39 |     This tests the case where the parse function is handed an inordinate amount of text. In this case, we shouldn't
40 |     bother parsing, and just return an empty command
41 |     """
42 |     text = "\n".join(
43 |         [
44 |             "".join(
45 |                 random.choices(
46 |                     string.ascii_letters + " ",
47 |                     weights=[1] * len(string.ascii_letters) + [5],
48 |                     k=100,
49 |                 )
50 |             )
51 |             for i in range(2000)
52 |         ]
53 |     )
54 |     command = parse_help(["some", "command"], text=text)
55 |     assert len(command.positional) == 0
56 |     assert len(command.named) == 0
57 | 


--------------------------------------------------------------------------------
/test/test_type_inference.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from aclimatise.cli_types import (
 4 |     CliBoolean,
 5 |     CliDir,
 6 |     CliFile,
 7 |     CliFloat,
 8 |     CliInteger,
 9 |     CliString,
10 |     CliType,
11 | )
12 | from aclimatise.model import CliArgument, EmptyFlagArg, Flag, SimpleFlagArg, infer_type
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     "string,typ",
17 |     [
18 |         ("", None),
19 |         ("int", CliInteger()),
20 |         ("size", CliInteger()),
21 |         ("length", CliInteger()),
22 |         ("max", CliInteger()),
23 |         ("min", CliInteger()),
24 |         ("str", CliString()),
25 |         ("float", CliFloat()),
26 |         ("decimal", CliFloat()),
27 |         ("bool", CliBoolean()),
28 |         ("file", CliFile()),
29 |         ("path", CliFile()),
30 |         ("input file", CliFile(output=False)),
31 |         ("output file", CliFile(output=True)),
32 |         ("folder", CliDir()),
33 |         ("directory", CliDir()),
34 |         ("output directory", CliDir(output=True)),
35 |         ("blah 23 blub", CliInteger()),
36 |         ("nonsense 23.42", CliFloat()),
37 |         (".42 gibberish", CliFloat()),
38 |         ("1E-5", CliFloat()),
39 |         ("BOOL Output strand bias files, 'true' or 'false'", CliBoolean()),
40 |         ("file to write out dict file [stdout]", CliFile(output=True)),
41 |         ("Filename to output the counts to instead of stdout.", CliFile(output=True)),
42 |         pytest.param(
43 |             "Write out all SAM alignment records into SAM/BAM files (one per input file needed), annotating each line with its feature assignment (as an optional field with tag 'XF'). See the -p option to use BAM instead of SAM.",
44 |             CliFile(output=True),
45 |             marks=pytest.mark.xfail(
46 |                 reason="This description doesn't make it clear that it wants an output file. I'm not sure how this could ever be parsed"
47 |             ),
48 |         ),
49 |     ],
50 | )
51 | def test_type_inference(string, typ):
52 |     inferred_type = infer_type(string)
53 |     assert inferred_type == typ
54 | 
55 | 
56 | @pytest.mark.parametrize(
57 |     "flag,typ",
58 |     [
59 |         [
60 |             Flag(
61 |                 description="Filename to output the counts to instead of stdout.",
62 |                 synonyms=["-c", "--counts_output"],
63 |                 args=SimpleFlagArg("OUTPUT_FILENAME"),
64 |             ),
65 |             CliFile(output=True),
66 |         ],
67 |         pytest.param(
68 |             Flag(
69 |                 description="redirect output to specified file\ndefault: undefined",
70 |                 synonyms=["-o"],
71 |                 args=EmptyFlagArg(),
72 |             ),
73 |             CliFile(output=True),
74 |             marks=pytest.mark.xfail(
75 |                 reason="Because the help doesn't indicate an argument, we can't know that this is an output file"
76 |             ),
77 |         ),
78 |     ],
79 | )
80 | def test_flag_type_inference(flag: CliArgument, typ: CliType):
81 |     inferred_type = flag.get_type()
82 |     assert inferred_type == typ
83 | 


--------------------------------------------------------------------------------
/test/test_yaml_dump.py:
--------------------------------------------------------------------------------
 1 | from io import StringIO
 2 | 
 3 | from aclimatise.integration import parse_help
 4 | from aclimatise.yaml import yaml
 5 | 
 6 | 
 7 | def test_round_trip(bwamem_help):
 8 |     command = parse_help(["bwa", "mem"], bwamem_help)
 9 | 
10 |     # Dump
11 |     buffer = StringIO()
12 |     yaml.dump(command, buffer)
13 | 
14 |     # Load
15 |     buffer.seek(0)
16 |     output = yaml.load(buffer)
17 | 
18 |     # Assert the round trip worked
19 |     assert command == output
20 | 


--------------------------------------------------------------------------------
/test/usage/test_usage.py:
--------------------------------------------------------------------------------
  1 | from itertools import chain
  2 | 
  3 | import pytest
  4 | 
  5 | from aclimatise.flag_parser.elements import arg_expression, flag_with_arg, list_type_arg
  6 | from aclimatise.model import Flag, RepeatFlagArg, SimpleFlagArg
  7 | from aclimatise.usage_parser.elements import (  # short_flag_list,
  8 |     stack,
  9 |     usage,
 10 |     usage_element,
 11 | )
 12 | from aclimatise.usage_parser.model import UsageElement
 13 | 
 14 | 
 15 | def test_bwa():
 16 |     txt = "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]"
 17 |     els = usage.parseString(txt)
 18 |     print(els)
 19 | 
 20 | 
 21 | @pytest.mark.skip(
 22 |     "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash"
 23 | )
 24 | def test_samtools_merge_short_flags():
 25 |     text = "-nurlf"
 26 |     els = short_flag_list.parseString(text)
 27 |     assert len(els) == 5
 28 |     assert isinstance(els[0], Flag)
 29 | 
 30 | 
 31 | @pytest.mark.skip(
 32 |     "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash"
 33 | )
 34 | def test_samtools_merge_optional_short_flags():
 35 |     text = "[-nurlf]"
 36 |     els = usage_element.parseString(text)
 37 |     assert len(els) == 5
 38 |     assert isinstance(els[0], Flag)
 39 |     assert els[0].optional
 40 | 
 41 | 
 42 | def test_samtools_merge_variable():
 43 |     text = "<out.bam>"
 44 |     els = usage_element.parseString(text)
 45 |     assert len(els) == 1
 46 |     assert isinstance(els[0], UsageElement)
 47 |     assert els[0].variable
 48 | 
 49 | 
 50 | def test_samtools_merge_flag_arg():
 51 |     text = "-h inh.sam"
 52 |     els = usage_element.parseString(text)
 53 |     assert len(els) == 1
 54 |     assert isinstance(els[0], Flag)
 55 |     assert isinstance(els[0].args, SimpleFlagArg)
 56 | 
 57 | 
 58 | def test_samtools_merge_optional_flag_arg():
 59 |     text = "[-h inh.sam]"
 60 |     els = usage_element.setDebug().parseString(text)
 61 |     assert len(els) == 1
 62 |     assert isinstance(els[0], Flag)
 63 |     assert els[0].optional
 64 |     assert isinstance(els[0].args, SimpleFlagArg)
 65 | 
 66 | 
 67 | def test_samtools_merge_list_args():
 68 |     text = "[<in2.bam> ... <inN.bam>]"
 69 |     el = usage_element.parseString(text)
 70 |     assert len(el) == 1
 71 |     assert isinstance(el[0], UsageElement)
 72 |     assert el[0].repeatable
 73 | 
 74 | 
 75 | def test_samtools_merge_full(process, usage_parser):
 76 |     text = process(
 77 |         """
 78 |     Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]
 79 |     """
 80 |     )
 81 |     command = usage_parser.parse_usage(cmd=["samtools", "merge"], usage=text)
 82 | 
 83 |     assert len(command.positional) == 3
 84 |     assert command.positional[0].name == "out.bam"
 85 |     assert command.positional[1].name == "in1.bam"
 86 | 
 87 |     assert len(command.named) == 3
 88 |     assert command.all_synonyms == {"-nurlf", "-h", "-b"}
 89 | 
 90 | 
 91 | def test_pisces_usage(usage_parser):
 92 |     text = "USAGE: dotnet Pisces.dll -bam <bam path> -g <genome path>"
 93 |     command = usage_parser.parse_usage(["pisces"], text)
 94 |     assert len(command.named) == 2
 95 |     assert len(command.positional) == 0
 96 |     assert command.all_synonyms == {"-bam", "-g"}
 97 | 
 98 | 
 99 | def test_trailing_text(process, usage_parser):
100 |     """
101 |     Tests that the usage parser will not parse text after the usage section has ended
102 |     """
103 |     text = process(
104 |         """
105 |     usage: htseq-count [options] alignment_file gff_file
106 | 
107 |     This script takes one or more alignment files in SAM/BAM format and a feature
108 |     file in GFF format and calculates for each feature the number of reads mapping
109 |     to it. See http://htseq.readthedocs.io/en/master/count.html for details.
110 |     """
111 |     )
112 |     command = usage_parser.parse_usage(["htseq-count"], text)
113 |     # We don't count either the command "htseq-count", or "[options]" as an argument, so there are only 2 positionals
114 |     assert len(command.positional) == 2
115 | 
116 | 
117 | def test_bwt2sa(usage_parser):
118 |     text = """
119 | Usage: bwa bwt2sa [-i 32] <in.bwt> <out.sa>
120 |     """
121 | 
122 |     command = usage_parser.parse_usage(["bwa", "bwt2sa"], text)
123 | 
124 |     # in and out
125 |     assert len(command.positional) == 2
126 | 
127 |     # -i
128 |     assert len(command.named) == 1
129 | 
130 | 
131 | def test_bedtools_multiinter_flag_arg():
132 |     text = " FILE1 FILE2 .. FILEn"
133 |     arg = arg_expression.parseString(text)[0]
134 |     assert isinstance(arg, RepeatFlagArg)
135 |     assert arg.name == "FILEn"
136 | 
137 | 
138 | def test_bedtools_multiinter_flag():
139 |     text = "-i FILE1 FILE2 .. FILEn"
140 |     arg = flag_with_arg.parseString(text)[0]
141 |     assert isinstance(arg.argtype, RepeatFlagArg)
142 |     assert arg.name == "-i"
143 | 
144 | 
145 | def test_bedtools_multiinter(usage_parser):
146 |     text = """
147 | Summary: Identifies common intervals among multiple
148 | 	 BED/GFF/VCF files.
149 | 
150 | Usage:   bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn
151 | 	 Requires that each interval file is sorted by chrom/start. 
152 | 
153 | Options: 
154 | 	-cluster	Invoke Ryan Layers's clustering algorithm.
155 |     """
156 | 
157 |     command = usage_parser.parse_usage(["bedtools", "multiinter"], text)
158 | 
159 |     assert len(command.positional) == 0
160 |     assert len(command.named) == 1
161 |     assert command.named[0].longest_synonym == "-i"
162 |     assert isinstance(command.named[0].args, RepeatFlagArg)
163 | 
164 | 
165 | def test_samtools_dict(usage_parser):
166 |     text = """
167 | Usage:   samtools dict [options] <file.fa|file.fa.gz>
168 |     """
169 |     command = usage_parser.parse_usage(["samtools", "dict"], text, debug=True)
170 |     assert len(command.positional) == 1
171 | 
172 | 
173 | def test_mid_line_usage(usage_parser):
174 |     text = """
175 |     Can't open --usage: No such file or directory at /usr/bin/samtools.pl line 50.
176 |     """
177 |     command = usage_parser.parse_usage(["samtools.pl", "showALEN"], text, debug=True)
178 |     assert command.empty
179 | 
180 | 
181 | def test_usage_description_block(usage_parser):
182 |     text = """
183 | Usage:
184 |   shell [options] -e string
185 |     execute string in V8
186 |   shell [options] file1 file2 ... filek
187 |     run JavaScript scripts in file1, file2, ..., filek
188 |   shell [options]
189 |   shell [options] --shell [file1 file2 ... filek]
190 |     run an interactive JavaScript shell
191 |   d8 [options] file1 file2 ... filek
192 |   d8 [options]
193 |   d8 [options] --shell [file1 file2 ... filek]
194 |     run the new debugging shell
195 |     """
196 |     command = usage_parser.parse_usage(["typeHLA.js"], text, debug=True)
197 | 
198 |     positional_names = {pos.name for pos in command.positional}
199 |     flag_synonyms = set(chain.from_iterable([flag.synonyms for flag in command.named]))
200 | 
201 |     assert "shell" in positional_names
202 |     assert "filek" in positional_names
203 |     assert "d8" in positional_names
204 | 
205 |     assert "--shell" in flag_synonyms
206 |     assert "-e" in flag_synonyms
207 | 


--------------------------------------------------------------------------------